diff --git a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat b/.ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat similarity index 66% rename from .ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat rename to .ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat index cece0aeb2..94ad31942 100755 --- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat +++ b/.ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat @@ -1,2 +1,2 @@ -.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory +.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-dynamic-vram pause diff --git a/.ci/windows_intel_base_files/run_intel_gpu.bat b/.ci/windows_intel_base_files/run_intel_gpu.bat new file mode 100755 index 000000000..274d7c948 --- /dev/null +++ b/.ci/windows_intel_base_files/run_intel_gpu.bat @@ -0,0 +1,2 @@ +.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build +pause diff --git a/.github/workflows/openapi-lint.yml b/.github/workflows/openapi-lint.yml new file mode 100644 index 000000000..be949de2a --- /dev/null +++ b/.github/workflows/openapi-lint.yml @@ -0,0 +1,31 @@ +name: OpenAPI Lint + +on: + pull_request: + paths: + - 'openapi.yaml' + - '.spectral.yaml' + - '.github/workflows/openapi-lint.yml' + +permissions: + contents: read + +jobs: + spectral: + name: Run Spectral + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Spectral + run: npm install -g @stoplight/spectral-cli@6 + + - name: Lint openapi.yaml + run: spectral lint openapi.yaml --ruleset .spectral.yaml --fail-severity=error diff --git a/.github/workflows/release-stable-all.yml b/.github/workflows/release-stable-all.yml index 8f07a7b1c..d7cf69fe2 100644 --- a/.github/workflows/release-stable-all.yml +++ b/.github/workflows/release-stable-all.yml @@ -20,29 +20,12 @@ jobs: git_tag: ${{ inputs.git_tag }} cache_tag: "cu130" python_minor: "13" - python_patch: "11" + python_patch: "12" rel_name: "nvidia" rel_extra_name: "" test_release: true secrets: inherit - release_nvidia_cu128: - permissions: - contents: "write" - packages: "write" - pull-requests: "read" - name: "Release NVIDIA cu128" - uses: ./.github/workflows/stable-release.yml - with: - git_tag: ${{ inputs.git_tag }} - cache_tag: "cu128" - python_minor: "12" - python_patch: "10" - rel_name: "nvidia" - rel_extra_name: "_cu128" - test_release: true - secrets: inherit - release_nvidia_cu126: permissions: contents: "write" @@ -76,3 +59,20 @@ jobs: rel_extra_name: "" test_release: false secrets: inherit + + release_xpu: + permissions: + contents: "write" + packages: "write" + pull-requests: "read" + name: "Release Intel XPU" + uses: ./.github/workflows/stable-release.yml + with: + git_tag: ${{ inputs.git_tag }} + cache_tag: "xpu" + python_minor: "13" + python_patch: "12" + rel_name: "intel" + rel_extra_name: "" + test_release: true + secrets: inherit diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml index f501b7b31..bc64ed74d 100644 --- a/.github/workflows/stable-release.yml +++ b/.github/workflows/stable-release.yml @@ -145,6 +145,8 @@ jobs: cp -r ComfyUI/.ci/windows_${{ inputs.rel_name }}_base_files/* ./ cp ../update_comfyui_and_python_dependencies.bat ./update/ + echo 'local-portable' > ComfyUI/.comfy_environment + cd .. "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=768m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable diff --git a/.github/workflows/tag-dispatch-cloud.yml b/.github/workflows/tag-dispatch-cloud.yml new file mode 100644 index 000000000..53a0e91d6 --- /dev/null +++ b/.github/workflows/tag-dispatch-cloud.yml @@ -0,0 +1,45 @@ +name: Tag Dispatch to Cloud + +on: + push: + tags: + - 'v*' + +jobs: + dispatch-cloud: + runs-on: ubuntu-latest + steps: + - name: Send repository dispatch to cloud + env: + DISPATCH_TOKEN: ${{ secrets.CLOUD_REPO_DISPATCH_TOKEN }} + RELEASE_TAG: ${{ github.ref_name }} + run: | + set -euo pipefail + + if [ -z "${DISPATCH_TOKEN:-}" ]; then + echo "::error::CLOUD_REPO_DISPATCH_TOKEN is required but not set." + exit 1 + fi + + RELEASE_URL="https://github.com/${{ github.repository }}/releases/tag/${RELEASE_TAG}" + + PAYLOAD="$(jq -n \ + --arg release_tag "$RELEASE_TAG" \ + --arg release_url "$RELEASE_URL" \ + '{ + event_type: "comfyui_tag_pushed", + client_payload: { + release_tag: $release_tag, + release_url: $release_url + } + }')" + + curl -fsSL \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${DISPATCH_TOKEN}" \ + https://api.github.com/repos/Comfy-Org/cloud/dispatches \ + -d "$PAYLOAD" + + echo "✅ Dispatched ComfyUI tag ${RELEASE_TAG} to Comfy-Org/cloud" diff --git a/.gitignore b/.gitignore index 2700ad5c2..fc426eda4 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,6 @@ venv*/ *.log web_custom_versions/ .DS_Store -openapi.yaml filtered-openapi.yaml uv.lock +.comfy_environment diff --git a/.spectral.yaml b/.spectral.yaml new file mode 100644 index 000000000..a4b137628 --- /dev/null +++ b/.spectral.yaml @@ -0,0 +1,100 @@ +extends: + - spectral:oas + +# Severity levels: error, warn, info, hint, off +# Rules from the built-in "spectral:oas" ruleset are active by default. +# Below we tune severity and add custom rules for our conventions. +# +# This ruleset mirrors Comfy-Org/cloud/.spectral.yaml so specs across the +# organization are linted against a single consistent standard. + +rules: + # ----------------------------------------------------------------------- + # Built-in rule severity overrides + # ----------------------------------------------------------------------- + operation-operationId: error + operation-description: warn + operation-tag-defined: error + info-contact: off + info-description: warn + no-eval-in-markdown: error + no-$ref-siblings: error + + # ----------------------------------------------------------------------- + # Custom rules: naming conventions + # ----------------------------------------------------------------------- + + # Property names should be snake_case + property-name-snake-case: + description: Property names must be snake_case + severity: warn + given: "$.components.schemas.*.properties[*]~" + then: + function: pattern + functionOptions: + match: "^[a-z][a-z0-9]*(_[a-z0-9]+)*$" + + # Operation IDs should be camelCase + operation-id-camel-case: + description: Operation IDs must be camelCase + severity: warn + given: "$.paths.*.*.operationId" + then: + function: pattern + functionOptions: + match: "^[a-z][a-zA-Z0-9]*$" + + # ----------------------------------------------------------------------- + # Custom rules: response conventions + # ----------------------------------------------------------------------- + + # Error responses (4xx, 5xx) should use a consistent shape + error-response-schema: + description: Error responses should reference a standard error schema + severity: hint + given: "$.paths.*.*.responses[?(@property >= '400' && @property < '600')].content['application/json'].schema" + then: + field: "$ref" + function: truthy + + # All 2xx responses with JSON body should have a schema + response-schema-defined: + description: Success responses with JSON content should define a schema + severity: warn + given: "$.paths.*.*.responses[?(@property >= '200' && @property < '300')].content['application/json']" + then: + field: schema + function: truthy + + # ----------------------------------------------------------------------- + # Custom rules: best practices + # ----------------------------------------------------------------------- + + # Path parameters must have a description + path-param-description: + description: Path parameters should have a description + severity: warn + given: + - "$.paths.*.parameters[?(@.in == 'path')]" + - "$.paths.*.*.parameters[?(@.in == 'path')]" + then: + field: description + function: truthy + + # Schemas should have a description + schema-description: + description: Component schemas should have a description + severity: hint + given: "$.components.schemas.*" + then: + field: description + function: truthy + +overrides: + # /ws uses HTTP 101 (Switching Protocols) — a legitimate response for a + # WebSocket upgrade, but not a 2xx, so operation-success-response fires + # as a false positive. OpenAPI 3.x has no native WebSocket support. + - files: + - "openapi.yaml#/paths/~1ws" + rules: + operation-success-response: off diff --git a/CODEOWNERS b/CODEOWNERS index 4d5448636..946dbf946 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,2 +1,2 @@ # Admins -* @comfyanonymous @kosinkadink @guill +* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128 @kijai diff --git a/QUANTIZATION.md b/QUANTIZATION.md index 1693e13f3..300822029 100644 --- a/QUANTIZATION.md +++ b/QUANTIZATION.md @@ -139,9 +139,9 @@ Example: "_quantization_metadata": { "format_version": "1.0", "layers": { - "model.layers.0.mlp.up_proj": "float8_e4m3fn", - "model.layers.0.mlp.down_proj": "float8_e4m3fn", - "model.layers.1.mlp.up_proj": "float8_e4m3fn" + "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"}, + "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"}, + "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"} } } } @@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s 3. **Compute scales**: Derive `input_scale` from collected statistics 4. **Store in checkpoint**: Save `input_scale` parameters alongside weights -The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. \ No newline at end of file +The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. diff --git a/README.md b/README.md index a47506fc8..0eecd8a4b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@
# ComfyUI -**The most powerful and modular visual AI engine and application.** +**The most powerful and modular AI engine for content creation.** [![Website][website-shield]][website-url] @@ -31,10 +31,16 @@ [github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest [github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases -![ComfyUI Screenshot](https://github.com/user-attachments/assets/7ccaf2c1-9b72-41ae-9a89-5688c94b7abe) +ComfyUI Screenshot +
-ComfyUI lets you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. Available on Windows, Linux, and macOS. +ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more... +- ComfyUI natively supports the latest open-source state of the art models. +- API nodes provide access to the best closed source models such as Nano Banana, Seedance, Hunyuan3D, etc. +- It is available on Windows, Linux, and macOS, locally with our [desktop application](https://www.comfy.org/download), our [portable install](#installing) or on our [cloud](https://www.comfy.org/cloud). +- The most sophisticated workflows can be exposed through a simple UI thanks to App Mode. +- It integrates seamlessly into production pipelines with our API endpoints. ## Get Started @@ -77,6 +83,7 @@ See what ComfyUI can do with the [newer template workflows](https://comfy.org/wo - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/) - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/) - [Z Image](https://comfyanonymous.github.io/ComfyUI_examples/z_image/) + - Ernie Image - Image Editing Models - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/) - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model) @@ -126,7 +133,7 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git ComfyUI follows a weekly release cycle targeting Monday but this regularly changes because of model releases or large changes to the codebase. There are three interconnected repositories: 1. **[ComfyUI Core](https://github.com/comfyanonymous/ComfyUI)** - - Releases a new stable version (e.g., v0.7.0) roughly every week. + - Releases a new major stable version (e.g., v0.7.0) roughly every 2 weeks. - Starting from v0.4.0 patch versions will be used for fixes backported onto the current stable release. - Minor versions will be used for releases off the master branch. - Patch versions may still be used for releases on the master branch in cases where a backport would not make sense. @@ -137,7 +144,7 @@ ComfyUI follows a weekly release cycle targeting Monday but this regularly chang - Builds a new release using the latest stable core version 3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)** - - Weekly frontend updates are merged into the core repository + - Every 2+ weeks frontend updates are merged into the core repository - Features are frozen for the upcoming core release - Development continues for the next release cycle @@ -193,11 +200,15 @@ If you have trouble extracting it, right click the file -> properties -> unblock The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start. -#### Alternative Downloads: +#### All Official Portable Downloads: -[Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z) +[Portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z) -[Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs). +[Portable for Intel GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_intel.7z) + +[Portable for Nvidia GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia.7z) (supports 20 series and above). + +[Portable for Nvidia GPUs with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs). #### How do I share models between another UI and ComfyUI? @@ -276,7 +287,7 @@ Nvidia users should install stable pytorch using this command: This is the command to install pytorch nightly instead which might have performance improvements. -```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130``` +```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu132``` #### Troubleshooting @@ -418,6 +429,8 @@ Use `--tls-keyfile key.pem --tls-certfile cert.pem` to enable TLS/SSL, the app w See also: [https://www.comfy.org/](https://www.comfy.org/) +> _psst — we're hiring!_ Help build ComfyUI: [comfy.org/careers](https://www.comfy.org/careers) + ## Frontend Development As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). This repository now hosts the compiled JS (from TS/Vue) under the `web/` directory. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..299b0067b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,44 @@ +# Security Policy + +## Scope + +ComfyUI is designed to run locally. By default, the server binds to `127.0.0.1`, meaning only the user's own machine can reach it. Our threat model assumes: + +- The user installed ComfyUI through a supported channel: the desktop application, the portable build, or a manual install following the README. +- The user has not installed untrusted custom nodes. Custom nodes are arbitrary Python code and are trusted as much as any other software the user chooses to install. +- Anyone with access to the ComfyUI URL is trusted (a direct consequence of the localhost-only default). +- PyTorch and other dependencies are at the versions we ship or recommend in the README. + +A report is in scope only if it affects a user operating within this threat model. + +## What We Consider a Vulnerability + +We want to hear about issues where a **reasonable user** — someone who does not install random untrusted nodes and who reads UI prompts and warnings before clicking through them — can be harmed by ComfyUI itself. + +The clearest example: a workflow file that such a user might plausibly load and run, using only built-in nodes, that results in **untrusted code execution, arbitrary file read/write outside expected directories, or credential/data exfiltration**. + +When submitting a report, please include a clear description of *why this is a problem for a typical local ComfyUI user*. Reports without this context are difficult to act on. + +## What We Do Not Consider a Security Vulnerability + +Please report the following through our regular [GitHub issues](https://github.com/comfyanonymous/ComfyUI/issues) instead. Filing them as security reports will likely cause them to be deprioritized or closed. + +- **Issues requiring `--listen` or any non-default network exposure.** ComfyUI binds to localhost by default. If a remote attacker needs to reach the server for the attack to work, the user has chosen to expose it and is responsible for securing that deployment (firewall, reverse proxy, authentication, etc.). These are bugs, not vulnerabilities. +- **`torch.load` and related deserialization issues in old PyTorch versions.** These are upstream PyTorch issues. Our distributions ship with — and our documentation recommends — recent PyTorch versions where these are addressed. +- **Vulnerabilities that depend on outdated library versions** that we neither ship nor recommend (e.g., requiring PyTorch 2.6 or older). +- **Issues that require a specific custom node to be installed.** Custom nodes are third-party code. Report these to the maintainer of that node. +- **Crashes, hangs, or resource exhaustion from a loaded workflow.** Annoying, but not a security issue in our model. File a regular bug. +- **Social-engineering scenarios** where the user is expected to ignore an explicit UI warning or prompt. + +## Reporting + +If you believe you have found an issue that falls within the scope above, please report it privately via GitHub's [Report a vulnerability](https://github.com/comfyanonymous/ComfyUI/security/advisories/new) feature rather than opening a public issue. + +Please include: + +1. A description of the vulnerability and the affected component. +2. Reproduction steps, ideally with a minimal workflow file or proof-of-concept. +3. The ComfyUI version, install method (desktop / portable / manual), and OS. +4. An explanation of how this affects a typical local user as described in the threat model. + +We will acknowledge valid reports and coordinate a fix and disclosure timeline with you. diff --git a/api_server/routes/internal/internal_routes.py b/api_server/routes/internal/internal_routes.py index b224306da..1477afa01 100644 --- a/api_server/routes/internal/internal_routes.py +++ b/api_server/routes/internal/internal_routes.py @@ -67,7 +67,7 @@ class InternalRoutes: (entry for entry in os.scandir(directory) if is_visible_file(entry)), key=lambda entry: -entry.stat().st_mtime ) - return web.json_response([entry.name for entry in sorted_files], status=200) + return web.json_response([f"{entry.name} [{directory_type}]" for entry in sorted_files], status=200) def get_app(self): diff --git a/app/frontend_management.py b/app/frontend_management.py index f753ef0de..d0596b276 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -27,7 +27,7 @@ def frontend_install_warning_message(): return f""" {get_missing_requirements_message()} -This error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead. +The ComfyUI frontend is shipped in a pip package so it needs to be updated separately from the ComfyUI code. """.strip() def parse_version(version: str) -> tuple[int, int, int]: @@ -38,40 +38,54 @@ def is_valid_version(version: str) -> bool: pattern = r"^(\d+)\.(\d+)\.(\d+)$" return bool(re.match(pattern, version)) -def get_installed_frontend_version(): - """Get the currently installed frontend package version.""" - frontend_version_str = version("comfyui-frontend-package") - return frontend_version_str - - def get_required_frontend_version(): return get_required_packages_versions().get("comfyui-frontend-package", None) -def check_frontend_version(): - """Check if the frontend version is up to date.""" +COMFY_PACKAGE_VERSIONS = [] +def get_comfy_package_versions(): + """List installed/required versions for every comfy* package in requirements.txt.""" + if COMFY_PACKAGE_VERSIONS: + return COMFY_PACKAGE_VERSIONS.copy() + out = COMFY_PACKAGE_VERSIONS + for name, required in (get_required_packages_versions() or {}).items(): + if not name.startswith("comfy"): + continue + try: + installed = version(name) + except Exception: + installed = None + out.append({"name": name, "installed": installed, "required": required}) + return out.copy() - try: - frontend_version_str = get_installed_frontend_version() - frontend_version = parse_version(frontend_version_str) - required_frontend_str = get_required_frontend_version() - required_frontend = parse_version(required_frontend_str) - if frontend_version < required_frontend: + +def check_comfy_packages_versions(): + """Warn for every comfy* package whose installed version is below requirements.txt.""" + from packaging.version import InvalidVersion, parse as parse_pep440 + for pkg in get_comfy_package_versions(): + installed_str = pkg["installed"] + required_str = pkg["required"] + if not installed_str or not required_str: + continue + try: + outdated = parse_pep440(installed_str) < parse_pep440(required_str) + except InvalidVersion as e: + logging.error(f"Failed to check {pkg['name']} version: {e}") + continue + if outdated: app.logger.log_startup_warning( f""" ________________________________________________________________________ WARNING WARNING WARNING WARNING WARNING -Installed frontend version {".".join(map(str, frontend_version))} is lower than the recommended version {".".join(map(str, required_frontend))}. +Installed {pkg["name"]} version {installed_str} is lower than the recommended version {required_str}. -{frontend_install_warning_message()} +{get_missing_requirements_message()} ________________________________________________________________________ """.strip() ) else: - logging.info("ComfyUI frontend version: {}".format(frontend_version_str)) - except Exception as e: - logging.error(f"Failed to check frontend version: {e}") + logging.info("{} version: {}".format(pkg["name"], installed_str)) REQUEST_TIMEOUT = 10 # seconds @@ -201,6 +215,11 @@ class FrontendManager: def get_required_templates_version(cls) -> str: return get_required_packages_versions().get("comfyui-workflow-templates", None) + @classmethod + def get_comfy_package_versions(cls): + """List installed/required versions for every comfy* package in requirements.txt.""" + return get_comfy_package_versions() + @classmethod def default_frontend_path(cls) -> str: try: @@ -341,7 +360,7 @@ comfyui-workflow-templates is not installed. main error source might be request timeout or invalid URL. """ if version_string == DEFAULT_VERSION_STRING: - check_frontend_version() + check_comfy_packages_versions() return cls.default_frontend_path() repo_owner, repo_name, version = cls.parse_version_string(version_string) @@ -403,7 +422,7 @@ comfyui-workflow-templates is not installed. except Exception as e: logging.error("Failed to initialize frontend: %s", e) logging.info("Falling back to the default frontend.") - check_frontend_version() + check_comfy_packages_versions() return cls.default_frontend_path() @classmethod def template_asset_handler(cls): diff --git a/app/node_replace_manager.py b/app/node_replace_manager.py index d9aab5b22..72e8ac2b1 100644 --- a/app/node_replace_manager.py +++ b/app/node_replace_manager.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging + from aiohttp import web from typing import TYPE_CHECKING, TypedDict @@ -31,8 +33,22 @@ class NodeReplaceManager: self._replacements: dict[str, list[NodeReplace]] = {} def register(self, node_replace: NodeReplace): - """Register a node replacement mapping.""" - self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace) + """Register a node replacement mapping. + + Idempotent: if a replacement with the same (old_node_id, new_node_id) + is already registered, the duplicate is ignored. This prevents stale + entries from accumulating when custom nodes are reloaded in the same + process (e.g. via ComfyUI-Manager). + """ + existing = self._replacements.setdefault(node_replace.old_node_id, []) + for entry in existing: + if entry.new_node_id == node_replace.new_node_id: + logging.debug( + "Node replacement %s -> %s already registered, ignoring duplicate.", + node_replace.old_node_id, node_replace.new_node_id, + ) + return + existing.append(node_replace) def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None: """Get replacements for an old node ID.""" diff --git a/app/user_manager.py b/app/user_manager.py index e18afb71b..0517b3344 100644 --- a/app/user_manager.py +++ b/app/user_manager.py @@ -28,8 +28,8 @@ def get_file_info(path: str, relative_to: str) -> FileInfo: return { "path": os.path.relpath(path, relative_to).replace(os.sep, '/'), "size": os.path.getsize(path), - "modified": os.path.getmtime(path), - "created": os.path.getctime(path) + "modified": int(os.path.getmtime(path) * 1000), + "created": int(os.path.getctime(path) * 1000), } diff --git a/blueprints/.glsl/Glow_30.frag b/blueprints/.glsl/Glow_30.frag index 0ee152628..f3c85a212 100644 --- a/blueprints/.glsl/Glow_30.frag +++ b/blueprints/.glsl/Glow_30.frag @@ -2,7 +2,6 @@ precision mediump float; uniform sampler2D u_image0; -uniform vec2 u_resolution; uniform int u_int0; // Blend mode uniform int u_int1; // Color tint uniform float u_float0; // Intensity @@ -75,7 +74,7 @@ void main() { float t0 = threshold - 0.15; float t1 = threshold + 0.15; - vec2 texelSize = 1.0 / u_resolution; + vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0)); float radius2 = radius * radius; float sampleScale = clamp(radius * 0.75, 0.35, 1.0); diff --git a/blueprints/.glsl/Image_Blur_1.frag b/blueprints/.glsl/Image_Blur_1.frag index 83238111d..1819e1695 100644 --- a/blueprints/.glsl/Image_Blur_1.frag +++ b/blueprints/.glsl/Image_Blur_1.frag @@ -12,7 +12,6 @@ const int RADIAL_SAMPLES = 12; const float RADIAL_STRENGTH = 0.0003; uniform sampler2D u_image0; -uniform vec2 u_resolution; uniform int u_int0; // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL) uniform float u_float0; // Blur radius/amount uniform int u_pass; // Pass index (0 = horizontal, 1 = vertical) @@ -25,7 +24,7 @@ float gaussian(float x, float sigma) { } void main() { - vec2 texelSize = 1.0 / u_resolution; + vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0)); float radius = max(u_float0, 0.0); // Radial (angular) blur - single pass, doesn't use separable diff --git a/blueprints/.glsl/Sharpen_23.frag b/blueprints/.glsl/Sharpen_23.frag index c03f94b66..e7463a329 100644 --- a/blueprints/.glsl/Sharpen_23.frag +++ b/blueprints/.glsl/Sharpen_23.frag @@ -2,14 +2,13 @@ precision highp float; uniform sampler2D u_image0; -uniform vec2 u_resolution; uniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0 in vec2 v_texCoord; layout(location = 0) out vec4 fragColor0; void main() { - vec2 texel = 1.0 / u_resolution; + vec2 texel = 1.0 / vec2(textureSize(u_image0, 0)); // Sample center and neighbors vec4 center = texture(u_image0, v_texCoord); diff --git a/blueprints/.glsl/Unsharp_Mask_26.frag b/blueprints/.glsl/Unsharp_Mask_26.frag index f5990cb4a..d968c9c03 100644 --- a/blueprints/.glsl/Unsharp_Mask_26.frag +++ b/blueprints/.glsl/Unsharp_Mask_26.frag @@ -2,7 +2,6 @@ precision highp float; uniform sampler2D u_image0; -uniform vec2 u_resolution; uniform float u_float0; // amount [0.0 - 3.0] typical: 0.5-1.5 uniform float u_float1; // radius [0.5 - 10.0] blur radius in pixels uniform float u_float2; // threshold [0.0 - 0.1] min difference to sharpen @@ -19,7 +18,7 @@ float getLuminance(vec3 color) { } void main() { - vec2 texel = 1.0 / u_resolution; + vec2 texel = 1.0 / vec2(textureSize(u_image0, 0)); float radius = max(u_float1, 0.5); float amount = u_float0; float threshold = u_float2; diff --git a/blueprints/Brightness and Contrast.json b/blueprints/Brightness and Contrast.json index 2c7e60eb1..78fc52f29 100644 --- a/blueprints/Brightness and Contrast.json +++ b/blueprints/Brightness and Contrast.json @@ -1 +1,440 @@ -{"revision": 0, "last_node_id": 140, "last_link_id": 0, "nodes": [{"id": 140, "type": "916dff42-6166-4d45-b028-04eaf69fbb35", "pos": [500, 1440], "size": [250, 178], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["4", "value"], ["5", "value"]]}, "widgets_values": [], "title": "Brightness and Contrast"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "916dff42-6166-4d45-b028-04eaf69fbb35", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 143, "lastLinkId": 118, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Brightness and Contrast", "inputNode": {"id": -10, "bounding": [360, -176, 120, 60]}, "outputNode": {"id": -20, "bounding": [1410, -176, 120, 60]}, "inputs": [{"id": "a5aae7ea-b511-4045-b5da-94101e269cd7", "name": "images.image0", "type": "IMAGE", "linkIds": [117], "localized_name": "images.image0", "label": "image", "pos": [460, -156]}], "outputs": [{"id": "30b72604-69b3-4944-b253-a9099bbd73a9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [118], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [1430, -156]}], "widgets": [], "nodes": [{"id": 4, "type": "PrimitiveFloat", "pos": [540, -280], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "brightness", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [115]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [50]}, {"id": 5, "type": "PrimitiveFloat", "pos": [540, -170], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "contrast", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [116]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [136, 136, 136]}, {"offset": 0.4, "color": [68, 68, 68]}, {"offset": 0.6, "color": [187, 187, 187]}, {"offset": 0.8, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 143, "type": "GLSLShader", "pos": [840, -280], "size": [400, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 117}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 115}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 116}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [118]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Brightness slider -100..100\nuniform float u_float1; // Contrast slider -100..100\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float MID_GRAY = 0.18; // 18% reflectance\n\n// sRGB gamma 2.2 approximation\nvec3 srgbToLinear(vec3 c) {\n return pow(max(c, 0.0), vec3(2.2));\n}\n\nvec3 linearToSrgb(vec3 c) {\n return pow(max(c, 0.0), vec3(1.0/2.2));\n}\n\nfloat mapBrightness(float b) {\n return clamp(b / 100.0, -1.0, 1.0);\n}\n\nfloat mapContrast(float c) {\n return clamp(c / 100.0 + 1.0, 0.0, 2.0);\n}\n\nvoid main() {\n vec4 orig = texture(u_image0, v_texCoord);\n\n float brightness = mapBrightness(u_float0);\n float contrast = mapContrast(u_float1);\n\n vec3 lin = srgbToLinear(orig.rgb);\n\n lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;\n\n // Convert back to sRGB\n vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));\n\n fragColor = vec4(result, orig.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 115, "origin_id": 4, "origin_slot": 0, "target_id": 143, "target_slot": 2, "type": "FLOAT"}, {"id": 116, "origin_id": 5, "origin_slot": 0, "target_id": 143, "target_slot": 3, "type": "FLOAT"}, {"id": 117, "origin_id": -10, "origin_slot": 0, "target_id": 143, "target_slot": 0, "type": "IMAGE"}, {"id": 118, "origin_id": 143, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}, "extra": {}} +{ + "revision": 0, + "last_node_id": 140, + "last_link_id": 0, + "nodes": [ + { + "id": 140, + "type": "916dff42-6166-4d45-b028-04eaf69fbb35", + "pos": [ + 500, + 1440 + ], + "size": [ + 250, + 178 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "4", + "value" + ], + [ + "5", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Brightness and Contrast" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "916dff42-6166-4d45-b028-04eaf69fbb35", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 143, + "lastLinkId": 118, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Brightness and Contrast", + "inputNode": { + "id": -10, + "bounding": [ + 360, + -176, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1410, + -176, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "a5aae7ea-b511-4045-b5da-94101e269cd7", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 117 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 460, + -156 + ] + } + ], + "outputs": [ + { + "id": "30b72604-69b3-4944-b253-a9099bbd73a9", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 118 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 1430, + -156 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [ + 540, + -280 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "brightness", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 115 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 5, + "type": "PrimitiveFloat", + "pos": [ + 540, + -170 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "contrast", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 116 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 136, + 136, + 136 + ] + }, + { + "offset": 0.4, + "color": [ + 68, + 68, + 68 + ] + }, + { + "offset": 0.6, + "color": [ + 187, + 187, + 187 + ] + }, + { + "offset": 0.8, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 143, + "type": "GLSLShader", + "pos": [ + 840, + -280 + ], + "size": [ + 400, + 212 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 117 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 115 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 116 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 118 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Brightness slider -100..100\nuniform float u_float1; // Contrast slider -100..100\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float MID_GRAY = 0.18; // 18% reflectance\n\n// sRGB gamma 2.2 approximation\nvec3 srgbToLinear(vec3 c) {\n return pow(max(c, 0.0), vec3(2.2));\n}\n\nvec3 linearToSrgb(vec3 c) {\n return pow(max(c, 0.0), vec3(1.0/2.2));\n}\n\nfloat mapBrightness(float b) {\n return clamp(b / 100.0, -1.0, 1.0);\n}\n\nfloat mapContrast(float c) {\n return clamp(c / 100.0 + 1.0, 0.0, 2.0);\n}\n\nvoid main() {\n vec4 orig = texture(u_image0, v_texCoord);\n\n float brightness = mapBrightness(u_float0);\n float contrast = mapContrast(u_float1);\n\n vec3 lin = srgbToLinear(orig.rgb);\n\n lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;\n\n // Convert back to sRGB\n vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));\n\n fragColor = vec4(result, orig.a);\n}\n", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 115, + "origin_id": 4, + "origin_slot": 0, + "target_id": 143, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 116, + "origin_id": 5, + "origin_slot": 0, + "target_id": 143, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 117, + "origin_id": -10, + "origin_slot": 0, + "target_id": 143, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 118, + "origin_id": 143, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adjusts image brightness and contrast using a real-time GPU fragment shader." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Canny to Image (Z-Image-Turbo).json b/blueprints/Canny to Image (Z-Image-Turbo).json index 8b78a834a..14deb64cc 100644 --- a/blueprints/Canny to Image (Z-Image-Turbo).json +++ b/blueprints/Canny to Image (Z-Image-Turbo).json @@ -1 +1,1578 @@ -{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 18, "last_link_id": 32, "nodes": [{"id": 18, "type": "c84f7959-3738-422b-ba6e-5808b5e90101", "pos": [300, 3830], "size": [400, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "control image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "canny low threshold", "name": "low_threshold", "type": "FLOAT", "widget": {"name": "low_threshold"}, "link": null}, {"label": "canny high threshold", "name": "high_threshold", "type": "FLOAT", "widget": {"name": "high_threshold"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "low_threshold"], ["-1", "high_threshold"], ["7", "seed"], ["7", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 0.3, 0.4, null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "c84f7959-3738-422b-ba6e-5808b5e90101", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 18, "lastLinkId": 32, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Canny to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [-280, 4960, 158.880859375, 200]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [26], "label": "control image", "pos": [-141.119140625, 4980]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [16], "label": "prompt", "pos": [-141.119140625, 5000]}, {"id": "6bd34d18-79f6-470f-94df-ca14c84ef3d8", "name": "low_threshold", "type": "FLOAT", "linkIds": [24], "label": "canny low threshold", "pos": [-141.119140625, 5020]}, {"id": "bbced993-057f-4d2d-909c-d791be73d1d2", "name": "high_threshold", "type": "FLOAT", "linkIds": [25], "label": "canny high threshold", "pos": [-141.119140625, 5040]}, {"id": "db7969bf-4b05-48a0-9598-87d3ac85b505", "name": "unet_name", "type": "COMBO", "linkIds": [29], "pos": [-141.119140625, 5060]}, {"id": "925b611c-5edf-406f-8dc5-7fec07d049a7", "name": "clip_name", "type": "COMBO", "linkIds": [30], "pos": [-141.119140625, 5080]}, {"id": "b4cf508b-4753-40d2-8c83-5a424237ee07", "name": "vae_name", "type": "COMBO", "linkIds": [31], "pos": [-141.119140625, 5100]}, {"id": "bd948f38-3a11-4091-99fc-bb2b3511bcd2", "name": "name", "type": "COMBO", "linkIds": [32], "pos": [-141.119140625, 5120]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [18], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 1, "type": "CLIPLoader", "pos": [228.60376290329597, 4700.188357350136], "size": [270, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 30}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 2, "type": "UNETLoader", "pos": [228.60376290329597, 4550.1883046176445], "size": [270, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 29}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 3, "type": "VAELoader", "pos": [228.60376290329597, 4880.18831633181], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 31}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [2, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 4, "type": "ModelPatchLoader", "pos": [228.60376290329597, 5010.1884895078], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 32}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 6, "type": "ModelSamplingAuraFlow", "pos": [998.6039930366841, 4490.18831829042], "size": [290, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 3}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 7, "type": "KSampler", "pos": [998.6039930366841, 4600.188351166619], "size": [300, 460], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 8, "type": "ConditioningZeroOut", "pos": [748.2704434516113, 5044.855005348689], "size": [204.134765625, 26.000000000000004], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 8}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 9, "type": "QwenImageDiffsynthControlnet", "pos": [608.2704174118008, 5204.85499785943], "size": [290, 138], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 9}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 10}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 11}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 22}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [3]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 12, "type": "CLIPTextEncode", "pos": [548.2704310845766, 4544.854974431101], "size": [400, 330], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 14}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 16}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [5, 8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 5, "type": "VAEDecode", "pos": [1338.6038576146689, 4500.188344983101], "size": [200, 46], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 1}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 2}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 15, "type": "ImageScaleToTotalPixels", "pos": [220, 5220], "size": [270, 106], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 26}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["nearest-exact", 1, 1]}, {"id": 11, "type": "GetImageSize", "pos": [540, 5450], "size": [140, 66], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 23}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [12]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [13]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 10, "type": "EmptySD3LatentImage", "pos": [760, 5430], "size": [260, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 12}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 13}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 14, "type": "Canny", "pos": [220, 5380], "size": [270, 82], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 27}, {"localized_name": "low_threshold", "name": "low_threshold", "type": "FLOAT", "widget": {"name": "low_threshold"}, "link": 24}, {"localized_name": "high_threshold", "name": "high_threshold", "type": "FLOAT", "widget": {"name": "high_threshold"}, "link": 25}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [22, 23, 28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "Canny"}, "widgets_values": [0.3, 0.4]}, {"id": 16, "type": "PreviewImage", "pos": [220, 5520], "size": [260, 270], "flags": {}, "order": 14, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 28}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "PreviewImage"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Prompt", "bounding": [530, 4460, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [210, 4460, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [530, 5120, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 1, "origin_id": 7, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "LATENT"}, {"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 5, "target_slot": 1, "type": "VAE"}, {"id": 3, "origin_id": 9, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "MODEL"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 12, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 10, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 12, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "CONDITIONING"}, {"id": 9, "origin_id": 2, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "MODEL"}, {"id": 10, "origin_id": 4, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 11, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "VAE"}, {"id": 12, "origin_id": 11, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "INT"}, {"id": 13, "origin_id": 11, "origin_slot": 1, "target_id": 10, "target_slot": 1, "type": "INT"}, {"id": 14, "origin_id": 1, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 12, "target_slot": 1, "type": "STRING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 22, "origin_id": 14, "origin_slot": 0, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 23, "origin_id": 14, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 24, "origin_id": -10, "origin_slot": 2, "target_id": 14, "target_slot": 1, "type": "FLOAT"}, {"id": 25, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 2, "type": "FLOAT"}, {"id": 26, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 15, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 28, "origin_id": 14, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 29, "origin_id": -10, "origin_slot": 4, "target_id": 2, "target_slot": 0, "type": "COMBO"}, {"id": 30, "origin_id": -10, "origin_slot": 5, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 31, "origin_id": -10, "origin_slot": 6, "target_id": 3, "target_slot": 0, "type": "COMBO"}, {"id": 32, "origin_id": -10, "origin_slot": 7, "target_id": 4, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Canny to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.967267584583181, "offset": [444.759060017523, -3564.372163194443]}}, "version": 0.4} +{ + "id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", + "revision": 0, + "last_node_id": 18, + "last_link_id": 32, + "nodes": [ + { + "id": 18, + "type": "c84f7959-3738-422b-ba6e-5808b5e90101", + "pos": [ + 300, + 3830 + ], + "size": [ + 400, + 460 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "control image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "canny low threshold", + "name": "low_threshold", + "type": "FLOAT", + "widget": { + "name": "low_threshold" + }, + "link": null + }, + { + "label": "canny high threshold", + "name": "high_threshold", + "type": "FLOAT", + "widget": { + "name": "high_threshold" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "low_threshold" + ], + [ + "-1", + "high_threshold" + ], + [ + "7", + "seed" + ], + [ + "7", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + 0.3, + 0.4, + null, + null, + "z_image_turbo_bf16.safetensors", + "qwen_3_4b.safetensors", + "ae.safetensors", + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "c84f7959-3738-422b-ba6e-5808b5e90101", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 18, + "lastLinkId": 32, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Canny to Image (Z-Image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + -280, + 4960, + 158.880859375, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1598.6038576146689, + 4936.043696127976, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 26 + ], + "label": "control image", + "pos": [ + -141.119140625, + 4980 + ] + }, + { + "id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", + "name": "text", + "type": "STRING", + "linkIds": [ + 16 + ], + "label": "prompt", + "pos": [ + -141.119140625, + 5000 + ] + }, + { + "id": "6bd34d18-79f6-470f-94df-ca14c84ef3d8", + "name": "low_threshold", + "type": "FLOAT", + "linkIds": [ + 24 + ], + "label": "canny low threshold", + "pos": [ + -141.119140625, + 5020 + ] + }, + { + "id": "bbced993-057f-4d2d-909c-d791be73d1d2", + "name": "high_threshold", + "type": "FLOAT", + "linkIds": [ + 25 + ], + "label": "canny high threshold", + "pos": [ + -141.119140625, + 5040 + ] + }, + { + "id": "db7969bf-4b05-48a0-9598-87d3ac85b505", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 29 + ], + "pos": [ + -141.119140625, + 5060 + ] + }, + { + "id": "925b611c-5edf-406f-8dc5-7fec07d049a7", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 30 + ], + "pos": [ + -141.119140625, + 5080 + ] + }, + { + "id": "b4cf508b-4753-40d2-8c83-5a424237ee07", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 31 + ], + "pos": [ + -141.119140625, + 5100 + ] + }, + { + "id": "bd948f38-3a11-4091-99fc-bb2b3511bcd2", + "name": "name", + "type": "COMBO", + "linkIds": [ + 32 + ], + "pos": [ + -141.119140625, + 5120 + ] + } + ], + "outputs": [ + { + "id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 18 + ], + "pos": [ + 1618.6038576146689, + 4956.043696127976 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "CLIPLoader", + "pos": [ + 228.60376290329597, + 4700.188357350136 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 30 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 14 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 2, + "type": "UNETLoader", + "pos": [ + 228.60376290329597, + 4550.1883046176445 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 29 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 3, + "type": "VAELoader", + "pos": [ + 228.60376290329597, + 4880.18831633181 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 31 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 2, + 11 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 4, + "type": "ModelPatchLoader", + "pos": [ + 228.60376290329597, + 5010.1884895078 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "name", + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": 32 + } + ], + "outputs": [ + { + "localized_name": "MODEL_PATCH", + "name": "MODEL_PATCH", + "type": "MODEL_PATCH", + "links": [ + 10 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ModelPatchLoader", + "models": [ + { + "name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "directory": "model_patches" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + }, + { + "id": 6, + "type": "ModelSamplingAuraFlow", + "pos": [ + 998.6039930366841, + 4490.18831829042 + ], + "size": [ + 290, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 3 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 4 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 7, + "type": "KSampler", + "pos": [ + 998.6039930366841, + 4600.188351166619 + ], + "size": [ + 300, + 460 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 4 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 5 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 7 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 1 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 9, + 1, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "ConditioningZeroOut", + "pos": [ + 748.2704434516113, + 5044.855005348689 + ], + "size": [ + 204.134765625, + 26.000000000000004 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 6 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 9, + "type": "QwenImageDiffsynthControlnet", + "pos": [ + 608.2704174118008, + 5204.85499785943 + ], + "size": [ + 290, + 138 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 9 + }, + { + "localized_name": "model_patch", + "name": "model_patch", + "type": "MODEL_PATCH", + "link": 10 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 11 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 22 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 3 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "QwenImageDiffsynthControlnet", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 12, + "type": "CLIPTextEncode", + "pos": [ + 548.2704310845766, + 4544.854974431101 + ], + "size": [ + 400, + 330 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 14 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 16 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 5, + 8 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 5, + "type": "VAEDecode", + "pos": [ + 1338.6038576146689, + 4500.188344983101 + ], + "size": [ + 200, + 46 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 1 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 2 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 18 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 15, + "type": "ImageScaleToTotalPixels", + "pos": [ + 220, + 5220 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 26 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "megapixels", + "name": "megapixels", + "type": "FLOAT", + "widget": { + "name": "megapixels" + }, + "link": null + }, + { + "localized_name": "resolution_steps", + "name": "resolution_steps", + "type": "INT", + "widget": { + "name": "resolution_steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "ImageScaleToTotalPixels" + }, + "widgets_values": [ + "nearest-exact", + 1, + 1 + ] + }, + { + "id": 11, + "type": "GetImageSize", + "pos": [ + 540, + 5450 + ], + "size": [ + 140, + 66 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 23 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 12 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 13 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 10, + "type": "EmptySD3LatentImage", + "pos": [ + 760, + 5430 + ], + "size": [ + 260, + 106 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 12 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 13 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 14, + "type": "Canny", + "pos": [ + 220, + 5380 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 27 + }, + { + "localized_name": "low_threshold", + "name": "low_threshold", + "type": "FLOAT", + "widget": { + "name": "low_threshold" + }, + "link": 24 + }, + { + "localized_name": "high_threshold", + "name": "high_threshold", + "type": "FLOAT", + "widget": { + "name": "high_threshold" + }, + "link": 25 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 22, + 23, + 28 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "Canny" + }, + "widgets_values": [ + 0.3, + 0.4 + ] + }, + { + "id": 16, + "type": "PreviewImage", + "pos": [ + 220, + 5520 + ], + "size": [ + 260, + 270 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 28 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "PreviewImage" + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt", + "bounding": [ + 530, + 4460, + 440, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Models", + "bounding": [ + 210, + 4460, + 300, + 640 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Apple ControlNet", + "bounding": [ + 530, + 5120, + 440, + 260 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 1, + "origin_id": 7, + "origin_slot": 0, + "target_id": 5, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 2, + "origin_id": 3, + "origin_slot": 0, + "target_id": 5, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 3, + "origin_id": 9, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 4, + "origin_id": 6, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 5, + "origin_id": 12, + "origin_slot": 0, + "target_id": 7, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 6, + "origin_id": 8, + "origin_slot": 0, + "target_id": 7, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 7, + "origin_id": 10, + "origin_slot": 0, + "target_id": 7, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 8, + "origin_id": 12, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 9, + "origin_id": 2, + "origin_slot": 0, + "target_id": 9, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 10, + "origin_id": 4, + "origin_slot": 0, + "target_id": 9, + "target_slot": 1, + "type": "MODEL_PATCH" + }, + { + "id": 11, + "origin_id": 3, + "origin_slot": 0, + "target_id": 9, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 12, + "origin_id": 11, + "origin_slot": 0, + "target_id": 10, + "target_slot": 0, + "type": "INT" + }, + { + "id": 13, + "origin_id": 11, + "origin_slot": 1, + "target_id": 10, + "target_slot": 1, + "type": "INT" + }, + { + "id": 14, + "origin_id": 1, + "origin_slot": 0, + "target_id": 12, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 16, + "origin_id": -10, + "origin_slot": 1, + "target_id": 12, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 18, + "origin_id": 5, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 22, + "origin_id": 14, + "origin_slot": 0, + "target_id": 9, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 23, + "origin_id": 14, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 24, + "origin_id": -10, + "origin_slot": 2, + "target_id": 14, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 25, + "origin_id": -10, + "origin_slot": 3, + "target_id": 14, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 26, + "origin_id": -10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 27, + "origin_id": 15, + "origin_slot": 0, + "target_id": 14, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 14, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 29, + "origin_id": -10, + "origin_slot": 4, + "target_id": 2, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 30, + "origin_id": -10, + "origin_slot": 5, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 31, + "origin_id": -10, + "origin_slot": 6, + "target_id": 3, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 32, + "origin_id": -10, + "origin_slot": 7, + "target_id": 4, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "category": "Image generation and editing/Canny to image", + "description": "Generates an image from a Canny edge map using Z-Image-Turbo, with text conditioning." + } + ] + }, + "config": {}, + "extra": { + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true, + "ds": { + "scale": 0.967267584583181, + "offset": [ + 444.759060017523, + -3564.372163194443 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Canny to Video (LTX 2.0).json b/blueprints/Canny to Video (LTX 2.0).json index cd2c4e594..a9682c8a4 100644 --- a/blueprints/Canny to Video (LTX 2.0).json +++ b/blueprints/Canny to Video (LTX 2.0).json @@ -1 +1,3620 @@ -{"id": "02f6166f-32f8-4673-b861-76be1464cba5", "revision": 0, "last_node_id": 155, "last_link_id": 391, "nodes": [{"id": 1, "type": "884e1862-7567-4e72-bd2a-fd4fdfd06320", "pos": [1519.643633934233, 3717.5350173634242], "size": [400, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "canny_images", "name": "image", "type": "IMAGE", "link": null}, {"label": "image_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "first_frame", "name": "image_1", "type": "IMAGE", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": null}, {"label": "distlled_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "strength"], ["143", "noise_seed"], ["126", "control_after_generate"], ["-1", "bypass"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "text_encoder"], ["-1", "lora_name_1"], ["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1, null, null, false, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-canny-control.safetensors", "gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-distilled-lora-384.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "884e1862-7567-4e72-bd2a-fd4fdfd06320", "version": 1, "state": {"lastGroupId": 11, "lastNodeId": 155, "lastLinkId": 391, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Canny to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2180, 4070, 146.8515625, 240]}, "outputNode": {"id": -20, "bounding": [1750, 4090, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "pos": [-2053.1484375, 4090]}, {"id": "35a07084-3ecf-482a-a330-b40278770ca3", "name": "image", "type": "IMAGE", "linkIds": [348, 349], "label": "canny_images", "pos": [-2053.1484375, 4110]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "image_strength", "pos": [-2053.1484375, 4130]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2053.1484375, 4150]}, {"id": "bea20802-d654-4287-a8ef-0f834314bcf9", "name": "image_1", "type": "IMAGE", "linkIds": [364, 379], "label": "first_frame", "pos": [-2053.1484375, 4170]}, {"id": "4e2f26b5-9ad6-49a6-8e90-0ed24fc6a423", "name": "ckpt_name", "type": "COMBO", "linkIds": [385, 386, 387], "pos": [-2053.1484375, 4190]}, {"id": "81fdfcf3-92ca-4f8d-b13d-d22758231530", "name": "lora_name", "type": "COMBO", "linkIds": [388], "pos": [-2053.1484375, 4210]}, {"id": "3fa7991e-4419-44a7-9377-1b6125fef355", "name": "text_encoder", "type": "COMBO", "linkIds": [389], "pos": [-2053.1484375, 4230]}, {"id": "b9277d33-2f18-47bb-95ab-666799e8730f", "name": "lora_name_1", "type": "COMBO", "linkIds": [390], "label": "distlled_lora", "pos": [-2053.1484375, 4250]}, {"id": "80b2e9cf-e1a7-462f-ae0d-ffb4ba668a65", "name": "model_name", "type": "COMBO", "linkIds": [391], "label": "upscale_model", "pos": [-2053.1484375, 4270]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770, 4110]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-698, 3670], "size": [270, 106.66666666666667], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-698, 3840], "size": [270, 68.88020833333334], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [410, 3850], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [410, 4100], "size": [270, 110], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [410, 3700], "size": [270, 98], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1100, 4810], "size": [270, 120], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 383}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": 329}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-388, 3520], "size": [213.125, 120], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1134, 4140], "size": [270, 86.66666666666667], "flags": {}, "order": 27, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1164, 3880], "size": [400, 200], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-520, 4700], "size": [187.5, 60], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-394, 3800], "size": [240, 46], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.999849798713, 3514.000055195033], "size": [410, 320], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [410, 3980], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 95, "type": "LTXVScheduler", "pos": [-700, 3980], "size": [270, 170], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-698, 3520], "size": [270, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [710, 3570], "size": [212.38333740234376, 106], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [410, 3570], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1130, 3840], "size": [240, 46], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 384}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1420, 3760], "size": [270, 78], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1650, 3760], "size": [420, 82], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 388}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-canny-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Canny-Control/resolve/main/ltx-2-19b-ic-lora-canny-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-canny-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730, 3730], "size": [193.2916015625, 46], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1120, 3640], "size": [270, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1130, 3530], "size": [240, 50], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 110, "type": "GetImageSize", "pos": [-1630, 4450], "size": [260, 80], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 349}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [329, 330]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1630, 4620], "size": [270, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1630, 4750], "size": [270, 58], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1100, 4610], "size": [270, 146.66666666666669], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 330}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1090, 4400], "size": [270, 152], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 364}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 118, "type": "Reroute", "pos": [-230, 4210], "size": [75, 26], "flags": {}, "order": 13, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-20, 4070], "size": [270, 182], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 379}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-10, 3840], "size": [240, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-10, 3960], "size": [260, 66], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 132, "type": "LTXVAddGuide", "pos": [-600, 4420], "size": [270, 209.16666666666669], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 348}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1650, 3590], "size": [420, 98], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 385}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1650, 4040], "size": [420, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 389}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 387}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-70, 3570], "size": [390, 82], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 390}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-70, 3700], "size": [390, 60], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 391}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 154, "type": "MarkdownNote", "pos": [-1660, 4870], "size": [350, 170], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#222", "bgcolor": "#000"}, {"id": 155, "type": "LTXVAudioVAELoader", "pos": [-1640, 3910], "size": [400, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 386}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [383, 384]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "LTXVAudioVAELoader"}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1640, 4550, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 329, "origin_id": 110, "origin_slot": 2, "target_id": 111, "target_slot": 1, "type": "INT"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 330, "origin_id": 110, "origin_slot": 2, "target_id": 115, "target_slot": 2, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 348, "origin_id": -10, "origin_slot": 1, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 349, "origin_id": -10, "origin_slot": 1, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 3, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 364, "origin_id": -10, "origin_slot": 4, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 3, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 379, "origin_id": -10, "origin_slot": 4, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 383, "origin_id": 155, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 384, "origin_id": 155, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 385, "origin_id": -10, "origin_slot": 5, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 386, "origin_id": -10, "origin_slot": 5, "target_id": 155, "target_slot": 0, "type": "COMBO"}, {"id": 387, "origin_id": -10, "origin_slot": 5, "target_id": 97, "target_slot": 1, "type": "COMBO"}, {"id": 388, "origin_id": -10, "origin_slot": 6, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 389, "origin_id": -10, "origin_slot": 7, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 390, "origin_id": -10, "origin_slot": 8, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 391, "origin_id": -10, "origin_slot": 9, "target_id": 100, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Canny to video"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.7537190265006444, "offset": [-330.27244430536007, -3324.725077010053]}}, "version": 0.4} +{ + "id": "02f6166f-32f8-4673-b861-76be1464cba5", + "revision": 0, + "last_node_id": 155, + "last_link_id": 391, + "nodes": [ + { + "id": 1, + "type": "884e1862-7567-4e72-bd2a-fd4fdfd06320", + "pos": [ + 1519.643633934233, + 3717.5350173634242 + ], + "size": [ + 400, + 500 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "canny_images", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "image_strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "label": "disable_first_frame", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": null + }, + { + "label": "first_frame", + "name": "image_1", + "type": "IMAGE", + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + }, + { + "label": "distlled_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "label": "upscale_model", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "strength" + ], + [ + "143", + "noise_seed" + ], + [ + "126", + "control_after_generate" + ], + [ + "-1", + "bypass" + ], + [ + "-1", + "ckpt_name" + ], + [ + "-1", + "lora_name" + ], + [ + "-1", + "text_encoder" + ], + [ + "-1", + "lora_name_1" + ], + [ + "-1", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.7.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + 1, + null, + null, + false, + "ltx-2-19b-dev-fp8.safetensors", + "ltx-2-19b-ic-lora-canny-control.safetensors", + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2-19b-distilled-lora-384.safetensors", + "ltx-2-spatial-upscaler-x2-1.0.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "884e1862-7567-4e72-bd2a-fd4fdfd06320", + "version": 1, + "state": { + "lastGroupId": 11, + "lastNodeId": 155, + "lastLinkId": 391, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Canny to Video (LTX 2.0)", + "inputNode": { + "id": -10, + "bounding": [ + -2180, + 4070, + 146.8515625, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1750, + 4090, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", + "name": "text", + "type": "STRING", + "linkIds": [ + 345 + ], + "pos": [ + -2053.1484375, + 4090 + ] + }, + { + "id": "35a07084-3ecf-482a-a330-b40278770ca3", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 348, + 349 + ], + "label": "canny_images", + "pos": [ + -2053.1484375, + 4110 + ] + }, + { + "id": "59430efe-1090-4e36-8afe-b21ce7f4268b", + "name": "strength", + "type": "FLOAT", + "linkIds": [ + 370, + 371 + ], + "label": "image_strength", + "pos": [ + -2053.1484375, + 4130 + ] + }, + { + "id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", + "name": "bypass", + "type": "BOOLEAN", + "linkIds": [ + 363, + 368 + ], + "label": "disable_first_frame", + "pos": [ + -2053.1484375, + 4150 + ] + }, + { + "id": "bea20802-d654-4287-a8ef-0f834314bcf9", + "name": "image_1", + "type": "IMAGE", + "linkIds": [ + 364, + 379 + ], + "label": "first_frame", + "pos": [ + -2053.1484375, + 4170 + ] + }, + { + "id": "4e2f26b5-9ad6-49a6-8e90-0ed24fc6a423", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 385, + 386, + 387 + ], + "pos": [ + -2053.1484375, + 4190 + ] + }, + { + "id": "81fdfcf3-92ca-4f8d-b13d-d22758231530", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 388 + ], + "pos": [ + -2053.1484375, + 4210 + ] + }, + { + "id": "3fa7991e-4419-44a7-9377-1b6125fef355", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 389 + ], + "pos": [ + -2053.1484375, + 4230 + ] + }, + { + "id": "b9277d33-2f18-47bb-95ab-666799e8730f", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 390 + ], + "label": "distlled_lora", + "pos": [ + -2053.1484375, + 4250 + ] + }, + { + "id": "80b2e9cf-e1a7-462f-ae0d-ffb4ba668a65", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 391 + ], + "label": "upscale_model", + "pos": [ + -2053.1484375, + 4270 + ] + } + ], + "outputs": [ + { + "id": "4e837941-de2d-4df8-8f94-686e24036897", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 304 + ], + "localized_name": "VIDEO", + "pos": [ + 1770, + 4110 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 93, + "type": "CFGGuider", + "pos": [ + -698, + 3670 + ], + "size": [ + 270, + 106.66666666666667 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 326 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 309 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 311 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 261 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 94, + "type": "KSamplerSelect", + "pos": [ + -698, + 3840 + ], + "size": [ + 270, + 68.88020833333334 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 262 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 99, + "type": "ManualSigmas", + "pos": [ + 410, + 3850 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 278 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 101, + "type": "LTXVConcatAVLatent", + "pos": [ + 410, + 4100 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 365 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 279 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 108, + "type": "CFGGuider", + "pos": [ + 410, + 3700 + ], + "size": [ + 270, + 98 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 280 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 281 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 282 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 276 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 111, + "type": "LTXVEmptyLatentAudio", + "pos": [ + -1100, + 4810 + ], + "size": [ + 270, + 120 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 383 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 329 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 354 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 300 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 123, + "type": "SamplerCustomAdvanced", + "pos": [ + -388, + 3520 + ], + "size": [ + 213.125, + 120 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 260 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 261 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 262 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 263 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 323 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 272 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 114, + "type": "LTXVConditioning", + "pos": [ + -1134, + 4140 + ], + "size": [ + 270, + 86.66666666666667 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 292 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 293 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 355 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 313 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 314 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 119, + "type": "CLIPTextEncode", + "pos": [ + -1164, + 3880 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 294 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 293 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 116, + "type": "LTXVConcatAVLatent", + "pos": [ + -520, + 4700 + ], + "size": [ + 187.5, + 60 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 324 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 300 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 322, + 323 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 122, + "type": "LTXVSeparateAVLatent", + "pos": [ + -394, + 3800 + ], + "size": [ + 240, + 46 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 270 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 266 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 124, + "type": "CLIPTextEncode", + "pos": [ + -1174.999849798713, + 3514.000055195033 + ], + "size": [ + 410, + 320 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 295 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 345 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 292 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 98, + "type": "KSamplerSelect", + "pos": [ + 410, + 3980 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 277 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "gradient_estimation" + ] + }, + { + "id": 95, + "type": "LTXVScheduler", + "pos": [ + -700, + 3980 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 322 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "max_shift", + "name": "max_shift", + "type": "FLOAT", + "widget": { + "name": "max_shift" + }, + "link": null + }, + { + "localized_name": "base_shift", + "name": "base_shift", + "type": "FLOAT", + "widget": { + "name": "base_shift" + }, + "link": null + }, + { + "localized_name": "stretch", + "name": "stretch", + "type": "BOOLEAN", + "widget": { + "name": "stretch" + }, + "link": null + }, + { + "localized_name": "terminal", + "name": "terminal", + "type": "FLOAT", + "widget": { + "name": "terminal" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 263 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVScheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 126, + "type": "RandomNoise", + "pos": [ + -698, + 3520 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 260 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize" + ] + }, + { + "id": 107, + "type": "SamplerCustomAdvanced", + "pos": [ + 710, + 3570 + ], + "size": [ + 212.38333740234376, + 106 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 347 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 276 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 277 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 278 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 279 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 336 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 143, + "type": "RandomNoise", + "pos": [ + 410, + 3570 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 347 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 139, + "type": "LTXVAudioVAEDecode", + "pos": [ + 1130, + 3840 + ], + "size": [ + 240, + 46 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 338 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 384 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 339 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 106, + "type": "CreateVideo", + "pos": [ + 1420, + 3760 + ], + "size": [ + 270, + 78 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 352 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 339 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 356 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 304 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 134, + "type": "LoraLoaderModelOnly", + "pos": [ + -1650, + 3760 + ], + "size": [ + 420, + 82 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 325 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 388 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 326, + 327 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "ltx-2-19b-ic-lora-canny-control.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Canny-Control/resolve/main/ltx-2-19b-ic-lora-canny-control.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-ic-lora-canny-control.safetensors", + 1 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 138, + "type": "LTXVSeparateAVLatent", + "pos": [ + 730, + 3730 + ], + "size": [ + 193.2916015625, + 46 + ], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 336 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 337, + 351 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 338 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 144, + "type": "VAEDecodeTiled", + "pos": [ + 1120, + 3640 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 351 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 353 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 352 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 512, + 64, + 4096, + 8 + ] + }, + { + "id": 113, + "type": "VAEDecode", + "pos": [ + 1130, + 3530 + ], + "size": [ + 240, + 50 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 337 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 291 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 110, + "type": "GetImageSize", + "pos": [ + -1630, + 4450 + ], + "size": [ + 260, + 80 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 349 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 296 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 297 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 329, + 330 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 145, + "type": "PrimitiveInt", + "pos": [ + -1630, + 4620 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 354 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24, + "fixed" + ] + }, + { + "id": 148, + "type": "PrimitiveFloat", + "pos": [ + -1630, + 4750 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 355, + 356 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 115, + "type": "EmptyLTXVLatentVideo", + "pos": [ + -1100, + 4610 + ], + "size": [ + 270, + 146.66666666666669 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 296 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 297 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 330 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 360 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 149, + "type": "LTXVImgToVideoInplace", + "pos": [ + -1090, + 4400 + ], + "size": [ + 270, + 152 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 359 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 364 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 360 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 370 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 363 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 357 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 118, + "type": "Reroute", + "pos": [ + -230, + 4210 + ], + "size": [ + 75, + 26 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 303 + } + ], + "outputs": [ + { + "name": "", + "type": "VAE", + "links": [ + 289, + 291, + 367 + ] + } + ], + "properties": { + "showOutputText": false, + "horizontal": false + } + }, + { + "id": 151, + "type": "LTXVImgToVideoInplace", + "pos": [ + -20, + 4070 + ], + "size": [ + 270, + 182 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 367 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 379 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 366 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 371 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 368 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 365 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 104, + "type": "LTXVCropGuides", + "pos": [ + -10, + 3840 + ], + "size": [ + 240, + 66 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 310 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 312 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 270 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 281 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 282 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 287 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 112, + "type": "LTXVLatentUpsampler", + "pos": [ + -10, + 3960 + ], + "size": [ + 260, + 66 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 287 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 288 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 289 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 366 + ] + } + ], + "title": "spatial", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVLatentUpsampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 132, + "type": "LTXVAddGuide", + "pos": [ + -600, + 4420 + ], + "size": [ + 270, + 209.16666666666669 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 313 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 314 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 328 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 357 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 348 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 309, + 310 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 311, + 312 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 324 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 103, + "type": "CheckpointLoaderSimple", + "pos": [ + -1650, + 3590 + ], + "size": [ + 420, + 98 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 385 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 325 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 303, + 328, + 353, + 359 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CheckpointLoaderSimple", + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + }, + { + "id": 97, + "type": "LTXAVTextEncoderLoader", + "pos": [ + -1650, + 4040 + ], + "size": [ + 420, + 106 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 389 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 387 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 294, + 295 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXAVTextEncoderLoader", + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + }, + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2-19b-dev-fp8.safetensors", + "default" + ] + }, + { + "id": 105, + "type": "LoraLoaderModelOnly", + "pos": [ + -70, + 3570 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 327 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 390 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 280 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "ltx-2-19b-distilled-lora-384.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-distilled-lora-384.safetensors", + 1 + ] + }, + { + "id": 100, + "type": "LatentUpscaleModelLoader", + "pos": [ + -70, + 3700 + ], + "size": [ + 390, + 60 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 391 + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "links": [ + 288 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LatentUpscaleModelLoader", + "models": [ + { + "name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", + "directory": "latent_upscale_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-spatial-upscaler-x2-1.0.safetensors" + ] + }, + { + "id": 154, + "type": "MarkdownNote", + "pos": [ + -1660, + 4870 + ], + "size": [ + 350, + 170 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Frame Rate Note", + "properties": {}, + "widgets_values": [ + "Please make sure the frame rate value is the same in both boxes" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 155, + "type": "LTXVAudioVAELoader", + "pos": [ + -1640, + 3910 + ], + "size": [ + 400, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 386 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 383, + 384 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "LTXVAudioVAELoader" + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -1660, + 3440, + 440, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Basic Sampling", + "bounding": [ + -700, + 3440, + 570, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -1180, + 3440, + 440, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Latent", + "bounding": [ + -1180, + 4290, + 1050, + 680 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 9, + "title": "Upscale Sampling(2x)", + "bounding": [ + -100, + 3440, + 1090, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Sampler", + "bounding": [ + 350, + 3480, + 620, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Model", + "bounding": [ + -90, + 3480, + 430, + 310 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Frame rate", + "bounding": [ + -1640, + 4550, + 290, + 271.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 326, + "origin_id": 134, + "origin_slot": 0, + "target_id": 93, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 309, + "origin_id": 132, + "origin_slot": 0, + "target_id": 93, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 311, + "origin_id": 132, + "origin_slot": 1, + "target_id": 93, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 266, + "origin_id": 122, + "origin_slot": 1, + "target_id": 101, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 280, + "origin_id": 105, + "origin_slot": 0, + "target_id": 108, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 281, + "origin_id": 104, + "origin_slot": 0, + "target_id": 108, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 282, + "origin_id": 104, + "origin_slot": 1, + "target_id": 108, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 329, + "origin_id": 110, + "origin_slot": 2, + "target_id": 111, + "target_slot": 1, + "type": "INT" + }, + { + "id": 260, + "origin_id": 126, + "origin_slot": 0, + "target_id": 123, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 261, + "origin_id": 93, + "origin_slot": 0, + "target_id": 123, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 262, + "origin_id": 94, + "origin_slot": 0, + "target_id": 123, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 263, + "origin_id": 95, + "origin_slot": 0, + "target_id": 123, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 323, + "origin_id": 116, + "origin_slot": 0, + "target_id": 123, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 296, + "origin_id": 110, + "origin_slot": 0, + "target_id": 115, + "target_slot": 0, + "type": "INT" + }, + { + "id": 297, + "origin_id": 110, + "origin_slot": 1, + "target_id": 115, + "target_slot": 1, + "type": "INT" + }, + { + "id": 330, + "origin_id": 110, + "origin_slot": 2, + "target_id": 115, + "target_slot": 2, + "type": "INT" + }, + { + "id": 325, + "origin_id": 103, + "origin_slot": 0, + "target_id": 134, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 292, + "origin_id": 124, + "origin_slot": 0, + "target_id": 114, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 293, + "origin_id": 119, + "origin_slot": 0, + "target_id": 114, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 294, + "origin_id": 97, + "origin_slot": 0, + "target_id": 119, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 324, + "origin_id": 132, + "origin_slot": 2, + "target_id": 116, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 300, + "origin_id": 111, + "origin_slot": 0, + "target_id": 116, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 313, + "origin_id": 114, + "origin_slot": 0, + "target_id": 132, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 314, + "origin_id": 114, + "origin_slot": 1, + "target_id": 132, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 328, + "origin_id": 103, + "origin_slot": 2, + "target_id": 132, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 272, + "origin_id": 123, + "origin_slot": 0, + "target_id": 122, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 336, + "origin_id": 107, + "origin_slot": 1, + "target_id": 138, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 339, + "origin_id": 139, + "origin_slot": 0, + "target_id": 106, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 295, + "origin_id": 97, + "origin_slot": 0, + "target_id": 124, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 303, + "origin_id": 103, + "origin_slot": 2, + "target_id": 118, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 338, + "origin_id": 138, + "origin_slot": 1, + "target_id": 139, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 337, + "origin_id": 138, + "origin_slot": 0, + "target_id": 113, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 291, + "origin_id": 118, + "origin_slot": 0, + "target_id": 113, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 276, + "origin_id": 108, + "origin_slot": 0, + "target_id": 107, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 277, + "origin_id": 98, + "origin_slot": 0, + "target_id": 107, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 278, + "origin_id": 99, + "origin_slot": 0, + "target_id": 107, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 279, + "origin_id": 101, + "origin_slot": 0, + "target_id": 107, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 327, + "origin_id": 134, + "origin_slot": 0, + "target_id": 105, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 310, + "origin_id": 132, + "origin_slot": 0, + "target_id": 104, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 312, + "origin_id": 132, + "origin_slot": 1, + "target_id": 104, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 270, + "origin_id": 122, + "origin_slot": 0, + "target_id": 104, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 287, + "origin_id": 104, + "origin_slot": 2, + "target_id": 112, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 288, + "origin_id": 100, + "origin_slot": 0, + "target_id": 112, + "target_slot": 1, + "type": "LATENT_UPSCALE_MODEL" + }, + { + "id": 289, + "origin_id": 118, + "origin_slot": 0, + "target_id": 112, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 322, + "origin_id": 116, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 304, + "origin_id": 106, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 345, + "origin_id": -10, + "origin_slot": 0, + "target_id": 124, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 347, + "origin_id": 143, + "origin_slot": 0, + "target_id": 107, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 348, + "origin_id": -10, + "origin_slot": 1, + "target_id": 132, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 349, + "origin_id": -10, + "origin_slot": 1, + "target_id": 110, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 351, + "origin_id": 138, + "origin_slot": 0, + "target_id": 144, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 352, + "origin_id": 144, + "origin_slot": 0, + "target_id": 106, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 353, + "origin_id": 103, + "origin_slot": 2, + "target_id": 144, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 354, + "origin_id": 145, + "origin_slot": 0, + "target_id": 111, + "target_slot": 2, + "type": "INT" + }, + { + "id": 355, + "origin_id": 148, + "origin_slot": 0, + "target_id": 114, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 356, + "origin_id": 148, + "origin_slot": 0, + "target_id": 106, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 357, + "origin_id": 149, + "origin_slot": 0, + "target_id": 132, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 359, + "origin_id": 103, + "origin_slot": 2, + "target_id": 149, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 360, + "origin_id": 115, + "origin_slot": 0, + "target_id": 149, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 363, + "origin_id": -10, + "origin_slot": 3, + "target_id": 149, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 364, + "origin_id": -10, + "origin_slot": 4, + "target_id": 149, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 365, + "origin_id": 151, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 366, + "origin_id": 112, + "origin_slot": 0, + "target_id": 151, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 367, + "origin_id": 118, + "origin_slot": 0, + "target_id": 151, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 368, + "origin_id": -10, + "origin_slot": 3, + "target_id": 151, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 370, + "origin_id": -10, + "origin_slot": 2, + "target_id": 149, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 371, + "origin_id": -10, + "origin_slot": 2, + "target_id": 151, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 379, + "origin_id": -10, + "origin_slot": 4, + "target_id": 151, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 383, + "origin_id": 155, + "origin_slot": 0, + "target_id": 111, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 384, + "origin_id": 155, + "origin_slot": 0, + "target_id": 139, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 385, + "origin_id": -10, + "origin_slot": 5, + "target_id": 103, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 386, + "origin_id": -10, + "origin_slot": 5, + "target_id": 155, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 387, + "origin_id": -10, + "origin_slot": 5, + "target_id": 97, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 388, + "origin_id": -10, + "origin_slot": 6, + "target_id": 134, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 389, + "origin_id": -10, + "origin_slot": 7, + "target_id": 97, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 390, + "origin_id": -10, + "origin_slot": 8, + "target_id": 105, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 391, + "origin_id": -10, + "origin_slot": 9, + "target_id": 100, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Canny to video", + "description": "Generates video from Canny edge maps using LTX-2, with optional synchronized audio." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG", + "ds": { + "scale": 0.7537190265006444, + "offset": [ + -330.27244430536007, + -3324.725077010053 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Chromatic Aberration.json b/blueprints/Chromatic Aberration.json index 5513cc665..893fb1190 100644 --- a/blueprints/Chromatic Aberration.json +++ b/blueprints/Chromatic Aberration.json @@ -1 +1,385 @@ -{"revision": 0, "last_node_id": 19, "last_link_id": 0, "nodes": [{"id": 19, "type": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", "pos": [3710, -2070], "size": [260, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Chromatic Aberration", "properties": {"proxyWidgets": [["17", "choice"], ["18", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 18, "lastLinkId": 23, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Chromatic Aberration", "inputNode": {"id": -10, "bounding": [3270, -2050, 120, 60]}, "outputNode": {"id": -20, "bounding": [4260, -2050, 120, 60]}, "inputs": [{"id": "3b33ac46-93a6-4b1c-896a-ed6fbd24e59c", "name": "images.image0", "type": "IMAGE", "linkIds": [20], "localized_name": "images.image0", "label": "image", "pos": [3370, -2030]}], "outputs": [{"id": "abe7cd79-a87b-4bd0-8923-d79a57d81a6e", "name": "IMAGE0", "type": "IMAGE", "linkIds": [23], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4280, -2030]}], "widgets": [], "nodes": [{"id": 16, "type": "GLSLShader", "pos": [3810, -2320], "size": [390, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 20}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 22}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 21}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [23]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Mode\nuniform float u_float0; // Amount (0 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MODE_LINEAR = 0;\nconst int MODE_RADIAL = 1;\nconst int MODE_BARREL = 2;\nconst int MODE_SWIRL = 3;\nconst int MODE_DIAGONAL = 4;\n\nconst float AMOUNT_SCALE = 0.0005;\nconst float RADIAL_MULT = 4.0;\nconst float BARREL_MULT = 8.0;\nconst float INV_SQRT2 = 0.70710678118;\n\nvoid main() {\n vec2 uv = v_texCoord;\n vec4 original = texture(u_image0, uv);\n\n float amount = u_float0 * AMOUNT_SCALE;\n\n if (amount < 0.000001) {\n fragColor = original;\n return;\n }\n\n // Aspect-corrected coordinates for circular effects\n float aspect = u_resolution.x / u_resolution.y;\n vec2 centered = uv - 0.5;\n vec2 corrected = vec2(centered.x * aspect, centered.y);\n float r = length(corrected);\n vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);\n vec2 offset = vec2(0.0);\n\n if (u_int0 == MODE_LINEAR) {\n // Horizontal shift (no aspect correction needed)\n offset = vec2(amount, 0.0);\n }\n else if (u_int0 == MODE_RADIAL) {\n // Outward from center, stronger at edges\n offset = dir * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_BARREL) {\n // Lens distortion simulation (r² falloff)\n offset = dir * r * r * amount * BARREL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_SWIRL) {\n // Perpendicular to radial (rotational aberration)\n vec2 perp = vec2(-dir.y, dir.x);\n offset = perp * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_DIAGONAL) {\n // 45° offset (no aspect correction needed)\n offset = vec2(amount, amount) * INV_SQRT2;\n }\n \n float red = texture(u_image0, uv + offset).r;\n float green = original.g;\n float blue = texture(u_image0, uv - offset).b;\n \n fragColor = vec4(red, green, blue, original.a);\n}", "from_input"]}, {"id": 18, "type": "PrimitiveFloat", "pos": [3810, -2430], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [22]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "step": 1}, "widgets_values": [30]}, {"id": 17, "type": "CustomCombo", "pos": [3520, -2320], "size": [270, 222], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [21]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Linear", 0, "Linear", "Radial", "Barrel", "Swirl", "Diagonal", ""]}], "groups": [], "links": [{"id": 22, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "FLOAT"}, {"id": 21, "origin_id": 17, "origin_slot": 1, "target_id": 16, "target_slot": 4, "type": "INT"}, {"id": 20, "origin_id": -10, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 23, "origin_id": 16, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 19, + "last_link_id": 0, + "nodes": [ + { + "id": 19, + "type": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", + "pos": [ + 3710, + -2070 + ], + "size": [ + 260, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "title": "Chromatic Aberration", + "properties": { + "proxyWidgets": [ + [ + "17", + "choice" + ], + [ + "18", + "value" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 18, + "lastLinkId": 23, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Chromatic Aberration", + "inputNode": { + "id": -10, + "bounding": [ + 3270, + -2050, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4260, + -2050, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3b33ac46-93a6-4b1c-896a-ed6fbd24e59c", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 20 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 3370, + -2030 + ] + } + ], + "outputs": [ + { + "id": "abe7cd79-a87b-4bd0-8923-d79a57d81a6e", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 23 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4280, + -2030 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 16, + "type": "GLSLShader", + "pos": [ + 3810, + -2320 + ], + "size": [ + 390, + 212 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 20 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 22 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 21 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 23 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Mode\nuniform float u_float0; // Amount (0 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MODE_LINEAR = 0;\nconst int MODE_RADIAL = 1;\nconst int MODE_BARREL = 2;\nconst int MODE_SWIRL = 3;\nconst int MODE_DIAGONAL = 4;\n\nconst float AMOUNT_SCALE = 0.0005;\nconst float RADIAL_MULT = 4.0;\nconst float BARREL_MULT = 8.0;\nconst float INV_SQRT2 = 0.70710678118;\n\nvoid main() {\n vec2 uv = v_texCoord;\n vec4 original = texture(u_image0, uv);\n\n float amount = u_float0 * AMOUNT_SCALE;\n\n if (amount < 0.000001) {\n fragColor = original;\n return;\n }\n\n // Aspect-corrected coordinates for circular effects\n float aspect = u_resolution.x / u_resolution.y;\n vec2 centered = uv - 0.5;\n vec2 corrected = vec2(centered.x * aspect, centered.y);\n float r = length(corrected);\n vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);\n vec2 offset = vec2(0.0);\n\n if (u_int0 == MODE_LINEAR) {\n // Horizontal shift (no aspect correction needed)\n offset = vec2(amount, 0.0);\n }\n else if (u_int0 == MODE_RADIAL) {\n // Outward from center, stronger at edges\n offset = dir * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_BARREL) {\n // Lens distortion simulation (r² falloff)\n offset = dir * r * r * amount * BARREL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_SWIRL) {\n // Perpendicular to radial (rotational aberration)\n vec2 perp = vec2(-dir.y, dir.x);\n offset = perp * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_DIAGONAL) {\n // 45° offset (no aspect correction needed)\n offset = vec2(amount, amount) * INV_SQRT2;\n }\n \n float red = texture(u_image0, uv + offset).r;\n float green = original.g;\n float blue = texture(u_image0, uv - offset).b;\n \n fragColor = vec4(red, green, blue, original.a);\n}", + "from_input" + ] + }, + { + "id": 18, + "type": "PrimitiveFloat", + "pos": [ + 3810, + -2430 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "amount", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 22 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "step": 1 + }, + "widgets_values": [ + 30 + ] + }, + { + "id": 17, + "type": "CustomCombo", + "pos": [ + 3520, + -2320 + ], + "size": [ + 270, + 222 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "mode", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 21 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "Linear", + 0, + "Linear", + "Radial", + "Barrel", + "Swirl", + "Diagonal", + "" + ] + } + ], + "groups": [], + "links": [ + { + "id": 22, + "origin_id": 18, + "origin_slot": 0, + "target_id": 16, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 21, + "origin_id": 17, + "origin_slot": 1, + "target_id": 16, + "target_slot": 4, + "type": "INT" + }, + { + "id": 20, + "origin_id": -10, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 23, + "origin_id": 16, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adds lens-style chromatic aberration (color fringing) using a real-time GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Color Adjustment.json b/blueprints/Color Adjustment.json index 47f3df783..5abbf8baa 100644 --- a/blueprints/Color Adjustment.json +++ b/blueprints/Color Adjustment.json @@ -1 +1,604 @@ -{"revision": 0, "last_node_id": 14, "last_link_id": 0, "nodes": [{"id": 14, "type": "36677b92-5dd8-47a5-9380-4da982c1894f", "pos": [3610, -2630], "size": [270, 150], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["4", "value"], ["5", "value"], ["7", "value"], ["6", "value"]]}, "widgets_values": [], "title": "Color Adjustment"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "36677b92-5dd8-47a5-9380-4da982c1894f", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 16, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Color Adjustment", "inputNode": {"id": -10, "bounding": [3110, -3560, 120, 60]}, "outputNode": {"id": -20, "bounding": [4070, -3560, 120, 60]}, "inputs": [{"id": "0431d493-5f28-4430-bd00-84733997fc08", "name": "images.image0", "type": "IMAGE", "linkIds": [29], "localized_name": "images.image0", "label": "image", "pos": [3210, -3540]}], "outputs": [{"id": "bee8ea06-a114-4612-8937-939f2c927bdb", "name": "IMAGE0", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4090, -3540]}], "widgets": [], "nodes": [{"id": 15, "type": "GLSLShader", "pos": [3590, -3940], "size": [420, 252], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 29}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 34}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 30}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 31}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 33}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [28]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // temperature (-100 to 100)\nuniform float u_float1; // tint (-100 to 100)\nuniform float u_float2; // vibrance (-100 to 100)\nuniform float u_float3; // saturation (-100 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float INPUT_SCALE = 0.01;\nconst float TEMP_TINT_PRIMARY = 0.3;\nconst float TEMP_TINT_SECONDARY = 0.15;\nconst float VIBRANCE_BOOST = 2.0;\nconst float SATURATION_BOOST = 2.0;\nconst float SKIN_PROTECTION = 0.5;\nconst float EPSILON = 0.001;\nconst vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);\n\nvoid main() {\n vec4 tex = texture(u_image0, v_texCoord);\n vec3 color = tex.rgb;\n \n // Scale inputs: -100/100 \u2192 -1/1\n float temperature = u_float0 * INPUT_SCALE;\n float tint = u_float1 * INPUT_SCALE;\n float vibrance = u_float2 * INPUT_SCALE;\n float saturation = u_float3 * INPUT_SCALE;\n \n // Temperature (warm/cool): positive = warm, negative = cool\n color.r += temperature * TEMP_TINT_PRIMARY;\n color.b -= temperature * TEMP_TINT_PRIMARY;\n \n // Tint (green/magenta): positive = green, negative = magenta\n color.g += tint * TEMP_TINT_PRIMARY;\n color.r -= tint * TEMP_TINT_SECONDARY;\n color.b -= tint * TEMP_TINT_SECONDARY;\n \n // Single clamp after temperature/tint\n color = clamp(color, 0.0, 1.0);\n \n // Vibrance with skin protection\n if (vibrance != 0.0) {\n float maxC = max(color.r, max(color.g, color.b));\n float minC = min(color.r, min(color.g, color.b));\n float sat = maxC - minC;\n float gray = dot(color, LUMA_WEIGHTS);\n \n if (vibrance < 0.0) {\n // Desaturate: -100 \u2192 gray\n color = mix(vec3(gray), color, 1.0 + vibrance);\n } else {\n // Boost less saturated colors more\n float vibranceAmt = vibrance * (1.0 - sat);\n \n // Branchless skin tone protection\n float isWarmTone = step(color.b, color.g) * step(color.g, color.r);\n float warmth = (color.r - color.b) / max(maxC, EPSILON);\n float skinTone = isWarmTone * warmth * sat * (1.0 - sat);\n vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);\n \n color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);\n }\n }\n \n // Saturation\n if (saturation != 0.0) {\n float gray = dot(color, LUMA_WEIGHTS);\n float satMix = saturation < 0.0\n ? 1.0 + saturation // -100 \u2192 gray\n : 1.0 + saturation * SATURATION_BOOST; // +100 \u2192 3x boost\n color = mix(vec3(gray), color, satMix);\n }\n \n fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);\n}", "from_input"]}, {"id": 6, "type": "PrimitiveFloat", "pos": [3290, -3610], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "vibrance", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [26, 31]}], "title": "Vibrance", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 7, "type": "PrimitiveFloat", "pos": [3290, -3720], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "saturation", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [33]}], "title": "Saturation", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 5, "type": "PrimitiveFloat", "pos": [3290, -3830], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "tint", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [30]}], "title": "Tint", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 255, 0]}, {"offset": 0.5, "color": [255, 255, 255]}, {"offset": 1, "color": [255, 0, 255]}]}, "widgets_values": [0]}, {"id": 4, "type": "PrimitiveFloat", "pos": [3290, -3940], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "temperature", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [34]}], "title": "Temperature", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [68, 136, 255]}, {"offset": 0.5, "color": [255, 255, 255]}, {"offset": 1, "color": [255, 136, 0]}]}, "widgets_values": [0]}], "groups": [], "links": [{"id": 34, "origin_id": 4, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "FLOAT"}, {"id": 30, "origin_id": 5, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "FLOAT"}, {"id": 31, "origin_id": 6, "origin_slot": 0, "target_id": 15, "target_slot": 4, "type": "FLOAT"}, {"id": 33, "origin_id": 7, "origin_slot": 0, "target_id": 15, "target_slot": 5, "type": "FLOAT"}, {"id": 29, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 28, "origin_id": 15, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 14, + "last_link_id": 0, + "nodes": [ + { + "id": 14, + "type": "36677b92-5dd8-47a5-9380-4da982c1894f", + "pos": [ + 3610, + -2630 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "4", + "value" + ], + [ + "5", + "value" + ], + [ + "7", + "value" + ], + [ + "6", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Color Adjustment" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "36677b92-5dd8-47a5-9380-4da982c1894f", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 16, + "lastLinkId": 36, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Color Adjustment", + "inputNode": { + "id": -10, + "bounding": [ + 3110, + -3560, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4070, + -3560, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "0431d493-5f28-4430-bd00-84733997fc08", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 29 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 3210, + -3540 + ] + } + ], + "outputs": [ + { + "id": "bee8ea06-a114-4612-8937-939f2c927bdb", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 28 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4090, + -3540 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 15, + "type": "GLSLShader", + "pos": [ + 3590, + -3940 + ], + "size": [ + 420, + 252 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 29 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 34 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 30 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 31 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": 33 + }, + { + "label": "u_float4", + "localized_name": "floats.u_float4", + "name": "floats.u_float4", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 28 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // temperature (-100 to 100)\nuniform float u_float1; // tint (-100 to 100)\nuniform float u_float2; // vibrance (-100 to 100)\nuniform float u_float3; // saturation (-100 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float INPUT_SCALE = 0.01;\nconst float TEMP_TINT_PRIMARY = 0.3;\nconst float TEMP_TINT_SECONDARY = 0.15;\nconst float VIBRANCE_BOOST = 2.0;\nconst float SATURATION_BOOST = 2.0;\nconst float SKIN_PROTECTION = 0.5;\nconst float EPSILON = 0.001;\nconst vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);\n\nvoid main() {\n vec4 tex = texture(u_image0, v_texCoord);\n vec3 color = tex.rgb;\n \n // Scale inputs: -100/100 \u2192 -1/1\n float temperature = u_float0 * INPUT_SCALE;\n float tint = u_float1 * INPUT_SCALE;\n float vibrance = u_float2 * INPUT_SCALE;\n float saturation = u_float3 * INPUT_SCALE;\n \n // Temperature (warm/cool): positive = warm, negative = cool\n color.r += temperature * TEMP_TINT_PRIMARY;\n color.b -= temperature * TEMP_TINT_PRIMARY;\n \n // Tint (green/magenta): positive = green, negative = magenta\n color.g += tint * TEMP_TINT_PRIMARY;\n color.r -= tint * TEMP_TINT_SECONDARY;\n color.b -= tint * TEMP_TINT_SECONDARY;\n \n // Single clamp after temperature/tint\n color = clamp(color, 0.0, 1.0);\n \n // Vibrance with skin protection\n if (vibrance != 0.0) {\n float maxC = max(color.r, max(color.g, color.b));\n float minC = min(color.r, min(color.g, color.b));\n float sat = maxC - minC;\n float gray = dot(color, LUMA_WEIGHTS);\n \n if (vibrance < 0.0) {\n // Desaturate: -100 \u2192 gray\n color = mix(vec3(gray), color, 1.0 + vibrance);\n } else {\n // Boost less saturated colors more\n float vibranceAmt = vibrance * (1.0 - sat);\n \n // Branchless skin tone protection\n float isWarmTone = step(color.b, color.g) * step(color.g, color.r);\n float warmth = (color.r - color.b) / max(maxC, EPSILON);\n float skinTone = isWarmTone * warmth * sat * (1.0 - sat);\n vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);\n \n color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);\n }\n }\n \n // Saturation\n if (saturation != 0.0) {\n float gray = dot(color, LUMA_WEIGHTS);\n float satMix = saturation < 0.0\n ? 1.0 + saturation // -100 \u2192 gray\n : 1.0 + saturation * SATURATION_BOOST; // +100 \u2192 3x boost\n color = mix(vec3(gray), color, satMix);\n }\n \n fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);\n}", + "from_input" + ] + }, + { + "id": 6, + "type": "PrimitiveFloat", + "pos": [ + 3290, + -3610 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "vibrance", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 26, + 31 + ] + } + ], + "title": "Vibrance", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 7, + "type": "PrimitiveFloat", + "pos": [ + 3290, + -3720 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "saturation", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 33 + ] + } + ], + "title": "Saturation", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 5, + "type": "PrimitiveFloat", + "pos": [ + 3290, + -3830 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "tint", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 30 + ] + } + ], + "title": "Tint", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 255, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 255, + 255, + 255 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [ + 3290, + -3940 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "temperature", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 34 + ] + } + ], + "title": "Temperature", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 68, + 136, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 255, + 255, + 255 + ] + }, + { + "offset": 1, + "color": [ + 255, + 136, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + } + ], + "groups": [], + "links": [ + { + "id": 34, + "origin_id": 4, + "origin_slot": 0, + "target_id": 15, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 30, + "origin_id": 5, + "origin_slot": 0, + "target_id": 15, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 31, + "origin_id": 6, + "origin_slot": 0, + "target_id": 15, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 33, + "origin_id": 7, + "origin_slot": 0, + "target_id": 15, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 29, + "origin_id": -10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 15, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adjusts saturation, temperature, tint, and vibrance using a real-time GPU fragment shader." + } + ] + } +} diff --git a/blueprints/Color Balance.json b/blueprints/Color Balance.json index fe272d5dc..d921eab37 100644 --- a/blueprints/Color Balance.json +++ b/blueprints/Color Balance.json @@ -1 +1,1137 @@ -{"revision": 0, "last_node_id": 20, "last_link_id": 0, "nodes": [{"id": 20, "type": "243b9e93-7303-4500-8c70-58acb712f5bc", "pos": [3610, -2630], "size": [270, 420], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["4", "value"], ["5", "value"], ["6", "value"], ["7", "value"], ["8", "value"], ["9", "value"], ["10", "value"], ["11", "value"], ["12", "value"], ["13", "value"]]}, "widgets_values": [], "title": "Color Balance"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "243b9e93-7303-4500-8c70-58acb712f5bc", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Color Balance", "inputNode": {"id": -10, "bounding": [2660, -4500, 120, 60]}, "outputNode": {"id": -20, "bounding": [4270, -4500, 120, 60]}, "inputs": [{"id": "d24c0b6d-00bd-4e95-be80-8114e8376ec0", "name": "images.image0", "type": "IMAGE", "linkIds": [29], "localized_name": "images.image0", "label": "image", "pos": [2760, -4480]}], "outputs": [{"id": "92723f62-996e-496d-ad4f-81a38be4ad64", "name": "IMAGE0", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4290, -4480]}], "widgets": [], "nodes": [{"id": 4, "type": "PrimitiveFloat", "pos": [3060, -4500], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "shadows red", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [30]}], "title": "Shadows Red", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 255, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 5, "type": "PrimitiveFloat", "pos": [3060, -4390], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "shadows green", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [31]}], "title": "Shadows Green", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 0, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 255, 0]}]}, "widgets_values": [0]}, {"id": 6, "type": "PrimitiveFloat", "pos": [3060, -4280], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "shadows blue", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [32]}], "title": "Shadows Blue", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 255, 0]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 0, 255]}]}, "widgets_values": [0]}, {"id": 7, "type": "PrimitiveFloat", "pos": [3060, -4170], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "midtones red", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [33]}], "title": "Midtones Red", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 255, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 8, "type": "PrimitiveFloat", "pos": [3060, -4060], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "midtones green", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [34]}], "title": "Midtones Green", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 0, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 255, 0]}]}, "widgets_values": [0]}, {"id": 9, "type": "PrimitiveFloat", "pos": [3060, -3950], "size": [270, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "midtones blue", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [35]}], "title": "Midtones Blue", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 255, 0]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 0, 255]}]}, "widgets_values": [0]}, {"id": 10, "type": "PrimitiveFloat", "pos": [3060, -3840], "size": [270, 58], "flags": {}, "order": 6, "mode": 0, "inputs": [{"label": "highlights red", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "title": "Highlights Red", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 255, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 11, "type": "PrimitiveFloat", "pos": [3060, -3730], "size": [270, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"label": "highlights green", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [37]}], "title": "Highlights Green", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 0, 255]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 255, 0]}]}, "widgets_values": [0]}, {"id": 12, "type": "PrimitiveFloat", "pos": [3060, -3620], "size": [270, 58], "flags": {}, "order": 8, "mode": 0, "inputs": [{"label": "highlights blue", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [38]}], "title": "Highlights Blue", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 255, 0]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [0, 0, 255]}]}, "widgets_values": [0]}, {"id": 13, "type": "PrimitiveBoolean", "pos": [3060, -3510], "size": [270, 58], "flags": {}, "order": 9, "mode": 0, "inputs": [{"label": "preserve luminosity", "localized_name": "value", "name": "value", "type": "BOOLEAN", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "BOOLEAN", "name": "BOOLEAN", "type": "BOOLEAN", "links": [39]}], "title": "Preserve Luminosity", "properties": {"Node name for S&R": "PrimitiveBoolean"}, "widgets_values": [true]}, {"id": 15, "type": "GLSLShader", "pos": [3590, -4500], "size": [420, 500], "flags": {}, "order": 10, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 29}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 30}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 31}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 32}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 33}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": 34}, {"label": "u_float5", "localized_name": "floats.u_float5", "name": "floats.u_float5", "shape": 7, "type": "FLOAT", "link": 35}, {"label": "u_float6", "localized_name": "floats.u_float6", "name": "floats.u_float6", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float7", "localized_name": "floats.u_float7", "name": "floats.u_float7", "shape": 7, "type": "FLOAT", "link": 37}, {"label": "u_float8", "localized_name": "floats.u_float8", "name": "floats.u_float8", "shape": 7, "type": "FLOAT", "link": 38}, {"label": "u_bool0", "localized_name": "bools.u_bool0", "name": "bools.u_bool0", "shape": 7, "type": "BOOLEAN", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [28]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // shadows red (-100 to 100)\nuniform float u_float1; // shadows green (-100 to 100)\nuniform float u_float2; // shadows blue (-100 to 100)\nuniform float u_float3; // midtones red (-100 to 100)\nuniform float u_float4; // midtones green (-100 to 100)\nuniform float u_float5; // midtones blue (-100 to 100)\nuniform float u_float6; // highlights red (-100 to 100)\nuniform float u_float7; // highlights green (-100 to 100)\nuniform float u_float8; // highlights blue (-100 to 100)\nuniform bool u_bool0; // preserve luminosity\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nvec3 rgb2hsl(vec3 c) {\n float maxC = max(c.r, max(c.g, c.b));\n float minC = min(c.r, min(c.g, c.b));\n float l = (maxC + minC) * 0.5;\n if (maxC == minC) return vec3(0.0, 0.0, l);\n float d = maxC - minC;\n float s = l > 0.5 ? d / (2.0 - maxC - minC) : d / (maxC + minC);\n float h;\n if (maxC == c.r) {\n h = (c.g - c.b) / d + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / d + 2.0;\n } else {\n h = (c.r - c.g) / d + 4.0;\n }\n h /= 6.0;\n return vec3(h, s, l);\n}\n\nfloat hue2rgb(float p, float q, float t) {\n if (t < 0.0) t += 1.0;\n if (t > 1.0) t -= 1.0;\n if (t < 1.0 / 6.0) return p + (q - p) * 6.0 * t;\n if (t < 1.0 / 2.0) return q;\n if (t < 2.0 / 3.0) return p + (q - p) * (2.0 / 3.0 - t) * 6.0;\n return p;\n}\n\nvec3 hsl2rgb(vec3 hsl) {\n float h = hsl.x, s = hsl.y, l = hsl.z;\n if (s == 0.0) return vec3(l);\n float q = l < 0.5 ? l * (1.0 + s) : l + s - l * s;\n float p = 2.0 * l - q;\n return vec3(\n hue2rgb(p, q, h + 1.0 / 3.0),\n hue2rgb(p, q, h),\n hue2rgb(p, q, h - 1.0 / 3.0)\n );\n}\n\nvoid main() {\n vec4 tex = texture(u_image0, v_texCoord);\n vec3 color = tex.rgb;\n\n // Build shadows/midtones/highlights vectors (scale -100..100 to -1..1)\n vec3 shadows = vec3(u_float0, u_float1, u_float2) * 0.01;\n vec3 midtones = vec3(u_float3, u_float4, u_float5) * 0.01;\n vec3 highlights = vec3(u_float6, u_float7, u_float8) * 0.01;\n\n // GIMP: HSL lightness for weight calculation\n float maxC = max(color.r, max(color.g, color.b));\n float minC = min(color.r, min(color.g, color.b));\n float lightness = (maxC + minC) * 0.5;\n\n // GIMP weight curves: linear ramps with constants a=0.25, b=0.333, scale=0.7\n const float a = 0.25;\n const float b = 0.333;\n const float scale = 0.7;\n\n float sw = clamp((lightness - b) / -a + 0.5, 0.0, 1.0) * scale;\n float mw = clamp((lightness - b) / a + 0.5, 0.0, 1.0) *\n clamp((lightness + b - 1.0) / -a + 0.5, 0.0, 1.0) * scale;\n float hw = clamp((lightness + b - 1.0) / a + 0.5, 0.0, 1.0) * scale;\n\n color += sw * shadows + mw * midtones + hw * highlights;\n\n if (u_bool0) {\n vec3 hsl = rgb2hsl(clamp(color, 0.0, 1.0));\n hsl.z = lightness;\n color = hsl2rgb(hsl);\n }\n\n fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 29, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 28, "origin_id": 15, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 30, "origin_id": 4, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "FLOAT"}, {"id": 31, "origin_id": 5, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "FLOAT"}, {"id": 32, "origin_id": 6, "origin_slot": 0, "target_id": 15, "target_slot": 4, "type": "FLOAT"}, {"id": 33, "origin_id": 7, "origin_slot": 0, "target_id": 15, "target_slot": 5, "type": "FLOAT"}, {"id": 34, "origin_id": 8, "origin_slot": 0, "target_id": 15, "target_slot": 6, "type": "FLOAT"}, {"id": 35, "origin_id": 9, "origin_slot": 0, "target_id": 15, "target_slot": 7, "type": "FLOAT"}, {"id": 36, "origin_id": 10, "origin_slot": 0, "target_id": 15, "target_slot": 8, "type": "FLOAT"}, {"id": 37, "origin_id": 11, "origin_slot": 0, "target_id": 15, "target_slot": 9, "type": "FLOAT"}, {"id": 38, "origin_id": 12, "origin_slot": 0, "target_id": 15, "target_slot": 10, "type": "FLOAT"}, {"id": 39, "origin_id": 13, "origin_slot": 0, "target_id": 15, "target_slot": 11, "type": "BOOLEAN"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} \ No newline at end of file +{ + "revision": 0, + "last_node_id": 20, + "last_link_id": 0, + "nodes": [ + { + "id": 20, + "type": "243b9e93-7303-4500-8c70-58acb712f5bc", + "pos": [ + 3610, + -2630 + ], + "size": [ + 270, + 420 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "4", + "value" + ], + [ + "5", + "value" + ], + [ + "6", + "value" + ], + [ + "7", + "value" + ], + [ + "8", + "value" + ], + [ + "9", + "value" + ], + [ + "10", + "value" + ], + [ + "11", + "value" + ], + [ + "12", + "value" + ], + [ + "13", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Color Balance" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "243b9e93-7303-4500-8c70-58acb712f5bc", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 15, + "lastLinkId": 39, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Color Balance", + "inputNode": { + "id": -10, + "bounding": [ + 2660, + -4500, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4270, + -4500, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "d24c0b6d-00bd-4e95-be80-8114e8376ec0", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 29 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 2760, + -4480 + ] + } + ], + "outputs": [ + { + "id": "92723f62-996e-496d-ad4f-81a38be4ad64", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 28 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4290, + -4480 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -4500 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "shadows red", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 30 + ] + } + ], + "title": "Shadows Red", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 255, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 5, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -4390 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "shadows green", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 31 + ] + } + ], + "title": "Shadows Green", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 0, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 255, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 6, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -4280 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "shadows blue", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 32 + ] + } + ], + "title": "Shadows Blue", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 255, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 0, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 7, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -4170 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "midtones red", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 33 + ] + } + ], + "title": "Midtones Red", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 255, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 8, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -4060 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "midtones green", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 34 + ] + } + ], + "title": "Midtones Green", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 0, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 255, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 9, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -3950 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "midtones blue", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 35 + ] + } + ], + "title": "Midtones Blue", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 255, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 0, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 10, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -3840 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "highlights red", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 36 + ] + } + ], + "title": "Highlights Red", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 255, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 11, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -3730 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "label": "highlights green", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 37 + ] + } + ], + "title": "Highlights Green", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 0, + 255 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 255, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 12, + "type": "PrimitiveFloat", + "pos": [ + 3060, + -3620 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "label": "highlights blue", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 38 + ] + } + ], + "title": "Highlights Blue", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": -100, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 255, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 0, + 0, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 13, + "type": "PrimitiveBoolean", + "pos": [ + 3060, + -3510 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "label": "preserve luminosity", + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 39 + ] + } + ], + "title": "Preserve Luminosity", + "properties": { + "Node name for S&R": "PrimitiveBoolean" + }, + "widgets_values": [ + true + ] + }, + { + "id": 15, + "type": "GLSLShader", + "pos": [ + 3590, + -4500 + ], + "size": [ + 420, + 500 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 29 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 30 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 31 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 32 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": 33 + }, + { + "label": "u_float4", + "localized_name": "floats.u_float4", + "name": "floats.u_float4", + "shape": 7, + "type": "FLOAT", + "link": 34 + }, + { + "label": "u_float5", + "localized_name": "floats.u_float5", + "name": "floats.u_float5", + "shape": 7, + "type": "FLOAT", + "link": 35 + }, + { + "label": "u_float6", + "localized_name": "floats.u_float6", + "name": "floats.u_float6", + "shape": 7, + "type": "FLOAT", + "link": 36 + }, + { + "label": "u_float7", + "localized_name": "floats.u_float7", + "name": "floats.u_float7", + "shape": 7, + "type": "FLOAT", + "link": 37 + }, + { + "label": "u_float8", + "localized_name": "floats.u_float8", + "name": "floats.u_float8", + "shape": 7, + "type": "FLOAT", + "link": 38 + }, + { + "label": "u_bool0", + "localized_name": "bools.u_bool0", + "name": "bools.u_bool0", + "shape": 7, + "type": "BOOLEAN", + "link": 39 + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 28 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // shadows red (-100 to 100)\nuniform float u_float1; // shadows green (-100 to 100)\nuniform float u_float2; // shadows blue (-100 to 100)\nuniform float u_float3; // midtones red (-100 to 100)\nuniform float u_float4; // midtones green (-100 to 100)\nuniform float u_float5; // midtones blue (-100 to 100)\nuniform float u_float6; // highlights red (-100 to 100)\nuniform float u_float7; // highlights green (-100 to 100)\nuniform float u_float8; // highlights blue (-100 to 100)\nuniform bool u_bool0; // preserve luminosity\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nvec3 rgb2hsl(vec3 c) {\n float maxC = max(c.r, max(c.g, c.b));\n float minC = min(c.r, min(c.g, c.b));\n float l = (maxC + minC) * 0.5;\n if (maxC == minC) return vec3(0.0, 0.0, l);\n float d = maxC - minC;\n float s = l > 0.5 ? d / (2.0 - maxC - minC) : d / (maxC + minC);\n float h;\n if (maxC == c.r) {\n h = (c.g - c.b) / d + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / d + 2.0;\n } else {\n h = (c.r - c.g) / d + 4.0;\n }\n h /= 6.0;\n return vec3(h, s, l);\n}\n\nfloat hue2rgb(float p, float q, float t) {\n if (t < 0.0) t += 1.0;\n if (t > 1.0) t -= 1.0;\n if (t < 1.0 / 6.0) return p + (q - p) * 6.0 * t;\n if (t < 1.0 / 2.0) return q;\n if (t < 2.0 / 3.0) return p + (q - p) * (2.0 / 3.0 - t) * 6.0;\n return p;\n}\n\nvec3 hsl2rgb(vec3 hsl) {\n float h = hsl.x, s = hsl.y, l = hsl.z;\n if (s == 0.0) return vec3(l);\n float q = l < 0.5 ? l * (1.0 + s) : l + s - l * s;\n float p = 2.0 * l - q;\n return vec3(\n hue2rgb(p, q, h + 1.0 / 3.0),\n hue2rgb(p, q, h),\n hue2rgb(p, q, h - 1.0 / 3.0)\n );\n}\n\nvoid main() {\n vec4 tex = texture(u_image0, v_texCoord);\n vec3 color = tex.rgb;\n\n // Build shadows/midtones/highlights vectors (scale -100..100 to -1..1)\n vec3 shadows = vec3(u_float0, u_float1, u_float2) * 0.01;\n vec3 midtones = vec3(u_float3, u_float4, u_float5) * 0.01;\n vec3 highlights = vec3(u_float6, u_float7, u_float8) * 0.01;\n\n // GIMP: HSL lightness for weight calculation\n float maxC = max(color.r, max(color.g, color.b));\n float minC = min(color.r, min(color.g, color.b));\n float lightness = (maxC + minC) * 0.5;\n\n // GIMP weight curves: linear ramps with constants a=0.25, b=0.333, scale=0.7\n const float a = 0.25;\n const float b = 0.333;\n const float scale = 0.7;\n\n float sw = clamp((lightness - b) / -a + 0.5, 0.0, 1.0) * scale;\n float mw = clamp((lightness - b) / a + 0.5, 0.0, 1.0) *\n clamp((lightness + b - 1.0) / -a + 0.5, 0.0, 1.0) * scale;\n float hw = clamp((lightness + b - 1.0) / a + 0.5, 0.0, 1.0) * scale;\n\n color += sw * shadows + mw * midtones + hw * highlights;\n\n if (u_bool0) {\n vec3 hsl = rgb2hsl(clamp(color, 0.0, 1.0));\n hsl.z = lightness;\n color = hsl2rgb(hsl);\n }\n\n fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);\n}", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 29, + "origin_id": -10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 15, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 30, + "origin_id": 4, + "origin_slot": 0, + "target_id": 15, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 31, + "origin_id": 5, + "origin_slot": 0, + "target_id": 15, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 32, + "origin_id": 6, + "origin_slot": 0, + "target_id": 15, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 33, + "origin_id": 7, + "origin_slot": 0, + "target_id": 15, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 34, + "origin_id": 8, + "origin_slot": 0, + "target_id": 15, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 35, + "origin_id": 9, + "origin_slot": 0, + "target_id": 15, + "target_slot": 7, + "type": "FLOAT" + }, + { + "id": 36, + "origin_id": 10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 8, + "type": "FLOAT" + }, + { + "id": 37, + "origin_id": 11, + "origin_slot": 0, + "target_id": 15, + "target_slot": 9, + "type": "FLOAT" + }, + { + "id": 38, + "origin_id": 12, + "origin_slot": 0, + "target_id": 15, + "target_slot": 10, + "type": "FLOAT" + }, + { + "id": 39, + "origin_id": 13, + "origin_slot": 0, + "target_id": 15, + "target_slot": 11, + "type": "BOOLEAN" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Balances colors across shadows, midtones, and highlights using a real-time GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Color Curves.json b/blueprints/Color Curves.json index 933c53978..b9bfb7029 100644 --- a/blueprints/Color Curves.json +++ b/blueprints/Color Curves.json @@ -1 +1,616 @@ -{"revision": 0,"last_node_id": 10,"last_link_id": 0,"nodes": [{"id": 10,"type": "d5c462c8-1372-4af8-84f2-547c83470d04","pos": [3610,-2630],"size": [270,420],"flags": {},"order": 0,"mode": 0,"inputs": [{"label": "image","localized_name": "images.image0","name": "images.image0","type": "IMAGE","link": null}],"outputs": [{"label": "IMAGE","localized_name": "IMAGE0","name": "IMAGE0","type": "IMAGE","links": []}],"properties": {"proxyWidgets": [["4","curve"],["5","curve"],["6","curve"],["7","curve"]]},"widgets_values": [],"title": "Color Curves"}],"links": [],"version": 0.4,"definitions": {"subgraphs": [{"id": "d5c462c8-1372-4af8-84f2-547c83470d04","version": 1,"state": {"lastGroupId": 0,"lastNodeId": 8,"lastLinkId": 33,"lastRerouteId": 0},"revision": 0,"config": {},"name": "Color Curves","inputNode": {"id": -10,"bounding": [2660,-4500,120,60]},"outputNode": {"id": -20,"bounding": [4270,-4500,120,60]},"inputs": [{"id": "abc345b7-f55e-4f32-a11d-3aa4c2b0936b","name": "images.image0","type": "IMAGE","linkIds": [29],"localized_name": "images.image0","label": "image","pos": [2760,-4480]}],"outputs": [{"id": "eb0ec079-46da-4408-8263-9ef85569d33d","name": "IMAGE0","type": "IMAGE","linkIds": [28],"localized_name": "IMAGE0","label": "IMAGE","pos": [4290,-4480]}],"widgets": [],"nodes": [{"id": 4,"type": "CurveEditor","pos": [3060,-4500],"size": [270,200],"flags": {},"order": 0,"mode": 0,"inputs": [{"label": "curve","localized_name": "curve","name": "curve","type": "CURVE","widget": {"name": "curve"},"link": null},{"label": "histogram","localized_name": "histogram","name": "histogram","type": "HISTOGRAM","shape": 7,"link": null}],"outputs": [{"localized_name": "CURVE","name": "CURVE","type": "CURVE","links": [30]}],"title": "RGB Master","properties": {"Node name for S&R": "CurveEditor"},"widgets_values": []},{"id": 5,"type": "CurveEditor","pos": [3060,-4250],"size": [270,200],"flags": {},"order": 1,"mode": 0,"inputs": [{"label": "curve","localized_name": "curve","name": "curve","type": "CURVE","widget": {"name": "curve"},"link": null},{"label": "histogram","localized_name": "histogram","name": "histogram","type": "HISTOGRAM","shape": 7,"link": null}],"outputs": [{"localized_name": "CURVE","name": "CURVE","type": "CURVE","links": [31]}],"title": "Red","properties": {"Node name for S&R": "CurveEditor"},"widgets_values": []},{"id": 6,"type": "CurveEditor","pos": [3060,-4000],"size": [270,200],"flags": {},"order": 2,"mode": 0,"inputs": [{"label": "curve","localized_name": "curve","name": "curve","type": "CURVE","widget": {"name": "curve"},"link": null},{"label": "histogram","localized_name": "histogram","name": "histogram","type": "HISTOGRAM","shape": 7,"link": null}],"outputs": [{"localized_name": "CURVE","name": "CURVE","type": "CURVE","links": [32]}],"title": "Green","properties": {"Node name for S&R": "CurveEditor"},"widgets_values": []},{"id": 7,"type": "CurveEditor","pos": [3060,-3750],"size": [270,200],"flags": {},"order": 3,"mode": 0,"inputs": [{"label": "curve","localized_name": "curve","name": "curve","type": "CURVE","widget": {"name": "curve"},"link": null},{"label": "histogram","localized_name": "histogram","name": "histogram","type": "HISTOGRAM","shape": 7,"link": null}],"outputs": [{"localized_name": "CURVE","name": "CURVE","type": "CURVE","links": [33]}],"title": "Blue","properties": {"Node name for S&R": "CurveEditor"},"widgets_values": []},{"id": 8,"type": "GLSLShader","pos": [3590,-4500],"size": [420,500],"flags": {},"order": 4,"mode": 0,"inputs": [{"label": "image0","localized_name": "images.image0","name": "images.image0","type": "IMAGE","link": 29},{"label": "image1","localized_name": "images.image1","name": "images.image1","shape": 7,"type": "IMAGE","link": null},{"label": "u_curve0","localized_name": "curves.u_curve0","name": "curves.u_curve0","shape": 7,"type": "CURVE","link": 30},{"label": "u_curve1","localized_name": "curves.u_curve1","name": "curves.u_curve1","shape": 7,"type": "CURVE","link": 31},{"label": "u_curve2","localized_name": "curves.u_curve2","name": "curves.u_curve2","shape": 7,"type": "CURVE","link": 32},{"label": "u_curve3","localized_name": "curves.u_curve3","name": "curves.u_curve3","shape": 7,"type": "CURVE","link": 33},{"localized_name": "fragment_shader","name": "fragment_shader","type": "STRING","widget": {"name": "fragment_shader"},"link": null},{"localized_name": "size_mode","name": "size_mode","type": "COMFY_DYNAMICCOMBO_V3","widget": {"name": "size_mode"},"link": null}],"outputs": [{"localized_name": "IMAGE0","name": "IMAGE0","type": "IMAGE","links": [28]},{"localized_name": "IMAGE1","name": "IMAGE1","type": "IMAGE","links": null},{"localized_name": "IMAGE2","name": "IMAGE2","type": "IMAGE","links": null},{"localized_name": "IMAGE3","name": "IMAGE3","type": "IMAGE","links": null}],"properties": {"Node name for S&R": "GLSLShader"},"widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform sampler2D u_curve0; // RGB master curve (256x1 LUT)\nuniform sampler2D u_curve1; // Red channel curve\nuniform sampler2D u_curve2; // Green channel curve\nuniform sampler2D u_curve3; // Blue channel curve\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\n// GIMP-compatible curve lookup with manual linear interpolation.\n// Matches gimp_curve_map_value_inline() from gimpcurve-map.c:\n// index = value * (n_samples - 1)\n// f = fract(index)\n// result = (1-f) * samples[floor] + f * samples[ceil]\n//\n// Uses texelFetch (NEAREST) to avoid GPU half-texel offset issues\n// that occur with texture() + GL_LINEAR on small 256x1 LUTs.\nfloat applyCurve(sampler2D curve, float value) {\n value = clamp(value, 0.0, 1.0);\n\n float pos = value * 255.0;\n int lo = int(floor(pos));\n int hi = min(lo + 1, 255);\n float f = pos - float(lo);\n\n float a = texelFetch(curve, ivec2(lo, 0), 0).r;\n float b = texelFetch(curve, ivec2(hi, 0), 0).r;\n\n return a + f * (b - a);\n}\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n\n // GIMP order: per-channel curves first, then RGB master curve.\n // See gimp_curve_map_pixels() default case in gimpcurve-map.c:\n // dest = colors_curve( channel_curve( src ) )\n float tmp_r = applyCurve(u_curve1, color.r);\n float tmp_g = applyCurve(u_curve2, color.g);\n float tmp_b = applyCurve(u_curve3, color.b);\n color.r = applyCurve(u_curve0, tmp_r);\n color.g = applyCurve(u_curve0, tmp_g);\n color.b = applyCurve(u_curve0, tmp_b);\n\n fragColor0 = vec4(color.rgb, color.a);\n}\n","from_input"]}],"groups": [],"links": [{"id": 29,"origin_id": -10,"origin_slot": 0,"target_id": 8,"target_slot": 0,"type": "IMAGE"},{"id": 28,"origin_id": 8,"origin_slot": 0,"target_id": -20,"target_slot": 0,"type": "IMAGE"},{"id": 30,"origin_id": 4,"origin_slot": 0,"target_id": 8,"target_slot": 2,"type": "CURVE"},{"id": 31,"origin_id": 5,"origin_slot": 0,"target_id": 8,"target_slot": 3,"type": "CURVE"},{"id": 32,"origin_id": 6,"origin_slot": 0,"target_id": 8,"target_slot": 4,"type": "CURVE"},{"id": 33,"origin_id": 7,"origin_slot": 0,"target_id": 8,"target_slot": 5,"type": "CURVE"}],"extra": {"workflowRendererVersion": "LG"},"category": "Image Tools/Color adjust"}]}} \ No newline at end of file +{ + "revision": 0, + "last_node_id": 10, + "last_link_id": 0, + "nodes": [ + { + "id": 10, + "type": "d5c462c8-1372-4af8-84f2-547c83470d04", + "pos": [ + 3610, + -2630 + ], + "size": [ + 270, + 420 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "4", + "curve" + ], + [ + "5", + "curve" + ], + [ + "6", + "curve" + ], + [ + "7", + "curve" + ] + ] + }, + "widgets_values": [], + "title": "Color Curves" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "d5c462c8-1372-4af8-84f2-547c83470d04", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 9, + "lastLinkId": 38, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Color Curves", + "inputNode": { + "id": -10, + "bounding": [ + 2660, + -4500, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4270, + -4500, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "abc345b7-f55e-4f32-a11d-3aa4c2b0936b", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 29, + 34 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 2760, + -4480 + ] + } + ], + "outputs": [ + { + "id": "eb0ec079-46da-4408-8263-9ef85569d33d", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 28 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4290, + -4480 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 4, + "type": "CurveEditor", + "pos": [ + 3060, + -4500 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "curve", + "localized_name": "curve", + "name": "curve", + "type": "CURVE", + "widget": { + "name": "curve" + }, + "link": null + }, + { + "label": "histogram", + "localized_name": "histogram", + "name": "histogram", + "type": "HISTOGRAM", + "shape": 7, + "link": 35 + } + ], + "outputs": [ + { + "localized_name": "CURVE", + "name": "CURVE", + "type": "CURVE", + "links": [ + 30 + ] + } + ], + "title": "RGB Master", + "properties": { + "Node name for S&R": "CurveEditor" + }, + "widgets_values": [] + }, + { + "id": 5, + "type": "CurveEditor", + "pos": [ + 3060, + -4250 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "curve", + "localized_name": "curve", + "name": "curve", + "type": "CURVE", + "widget": { + "name": "curve" + }, + "link": null + }, + { + "label": "histogram", + "localized_name": "histogram", + "name": "histogram", + "type": "HISTOGRAM", + "shape": 7, + "link": 36 + } + ], + "outputs": [ + { + "localized_name": "CURVE", + "name": "CURVE", + "type": "CURVE", + "links": [ + 31 + ] + } + ], + "title": "Red", + "properties": { + "Node name for S&R": "CurveEditor" + }, + "widgets_values": [] + }, + { + "id": 6, + "type": "CurveEditor", + "pos": [ + 3060, + -4000 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "curve", + "localized_name": "curve", + "name": "curve", + "type": "CURVE", + "widget": { + "name": "curve" + }, + "link": null + }, + { + "label": "histogram", + "localized_name": "histogram", + "name": "histogram", + "type": "HISTOGRAM", + "shape": 7, + "link": 37 + } + ], + "outputs": [ + { + "localized_name": "CURVE", + "name": "CURVE", + "type": "CURVE", + "links": [ + 32 + ] + } + ], + "title": "Green", + "properties": { + "Node name for S&R": "CurveEditor" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CurveEditor", + "pos": [ + 3060, + -3750 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "curve", + "localized_name": "curve", + "name": "curve", + "type": "CURVE", + "widget": { + "name": "curve" + }, + "link": null + }, + { + "label": "histogram", + "localized_name": "histogram", + "name": "histogram", + "type": "HISTOGRAM", + "shape": 7, + "link": 38 + } + ], + "outputs": [ + { + "localized_name": "CURVE", + "name": "CURVE", + "type": "CURVE", + "links": [ + 33 + ] + } + ], + "title": "Blue", + "properties": { + "Node name for S&R": "CurveEditor" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "GLSLShader", + "pos": [ + 3590, + -4500 + ], + "size": [ + 420, + 500 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 29 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_curve0", + "localized_name": "curves.u_curve0", + "name": "curves.u_curve0", + "shape": 7, + "type": "CURVE", + "link": 30 + }, + { + "label": "u_curve1", + "localized_name": "curves.u_curve1", + "name": "curves.u_curve1", + "shape": 7, + "type": "CURVE", + "link": 31 + }, + { + "label": "u_curve2", + "localized_name": "curves.u_curve2", + "name": "curves.u_curve2", + "shape": 7, + "type": "CURVE", + "link": 32 + }, + { + "label": "u_curve3", + "localized_name": "curves.u_curve3", + "name": "curves.u_curve3", + "shape": 7, + "type": "CURVE", + "link": 33 + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 28 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform sampler2D u_curve0; // RGB master curve (256x1 LUT)\nuniform sampler2D u_curve1; // Red channel curve\nuniform sampler2D u_curve2; // Green channel curve\nuniform sampler2D u_curve3; // Blue channel curve\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\n// GIMP-compatible curve lookup with manual linear interpolation.\n// Matches gimp_curve_map_value_inline() from gimpcurve-map.c:\n// index = value * (n_samples - 1)\n// f = fract(index)\n// result = (1-f) * samples[floor] + f * samples[ceil]\n//\n// Uses texelFetch (NEAREST) to avoid GPU half-texel offset issues\n// that occur with texture() + GL_LINEAR on small 256x1 LUTs.\nfloat applyCurve(sampler2D curve, float value) {\n value = clamp(value, 0.0, 1.0);\n\n float pos = value * 255.0;\n int lo = int(floor(pos));\n int hi = min(lo + 1, 255);\n float f = pos - float(lo);\n\n float a = texelFetch(curve, ivec2(lo, 0), 0).r;\n float b = texelFetch(curve, ivec2(hi, 0), 0).r;\n\n return a + f * (b - a);\n}\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n\n // GIMP order: per-channel curves first, then RGB master curve.\n // See gimp_curve_map_pixels() default case in gimpcurve-map.c:\n // dest = colors_curve( channel_curve( src ) )\n float tmp_r = applyCurve(u_curve1, color.r);\n float tmp_g = applyCurve(u_curve2, color.g);\n float tmp_b = applyCurve(u_curve3, color.b);\n color.r = applyCurve(u_curve0, tmp_r);\n color.g = applyCurve(u_curve0, tmp_g);\n color.b = applyCurve(u_curve0, tmp_b);\n\n fragColor0 = vec4(color.rgb, color.a);\n}\n", + "from_input" + ] + }, + { + "id": 9, + "type": "ImageHistogram", + "pos": [ + 2800, + -4300 + ], + "size": [ + 210, + 150 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 34 + } + ], + "outputs": [ + { + "localized_name": "HISTOGRAM", + "name": "rgb", + "type": "HISTOGRAM", + "links": [ + 35 + ] + }, + { + "localized_name": "HISTOGRAM", + "name": "luminance", + "type": "HISTOGRAM", + "links": [] + }, + { + "localized_name": "HISTOGRAM", + "name": "red", + "type": "HISTOGRAM", + "links": [ + 36 + ] + }, + { + "localized_name": "HISTOGRAM", + "name": "green", + "type": "HISTOGRAM", + "links": [ + 37 + ] + }, + { + "localized_name": "HISTOGRAM", + "name": "blue", + "type": "HISTOGRAM", + "links": [ + 38 + ] + } + ], + "properties": { + "Node name for S&R": "ImageHistogram" + }, + "widgets_values": [] + } + ], + "groups": [], + "links": [ + { + "id": 29, + "origin_id": -10, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 30, + "origin_id": 4, + "origin_slot": 0, + "target_id": 8, + "target_slot": 2, + "type": "CURVE" + }, + { + "id": 31, + "origin_id": 5, + "origin_slot": 0, + "target_id": 8, + "target_slot": 3, + "type": "CURVE" + }, + { + "id": 32, + "origin_id": 6, + "origin_slot": 0, + "target_id": 8, + "target_slot": 4, + "type": "CURVE" + }, + { + "id": 33, + "origin_id": 7, + "origin_slot": 0, + "target_id": 8, + "target_slot": 5, + "type": "CURVE" + }, + { + "id": 34, + "origin_id": -10, + "origin_slot": 0, + "target_id": 9, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 35, + "origin_id": 9, + "origin_slot": 0, + "target_id": 4, + "target_slot": 1, + "type": "HISTOGRAM" + }, + { + "id": 36, + "origin_id": 9, + "origin_slot": 2, + "target_id": 5, + "target_slot": 1, + "type": "HISTOGRAM" + }, + { + "id": 37, + "origin_id": 9, + "origin_slot": 3, + "target_id": 6, + "target_slot": 1, + "type": "HISTOGRAM" + }, + { + "id": 38, + "origin_id": 9, + "origin_slot": 4, + "target_id": 7, + "target_slot": 1, + "type": "HISTOGRAM" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Fine-tunes tone and color with per-channel curve adjustments using a real-time GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/ControlNet (Z-Image-Turbo).json b/blueprints/ControlNet (Z-Image-Turbo).json new file mode 100644 index 000000000..fbec95a97 --- /dev/null +++ b/blueprints/ControlNet (Z-Image-Turbo).json @@ -0,0 +1,1412 @@ +{ + "revision": 0, + "last_node_id": 85, + "last_link_id": 0, + "nodes": [ + { + "id": 85, + "type": "d2e76ecf-6e84-4b8c-8913-48efc09ec1c4", + "pos": [ + 440, + 1220 + ], + "size": [ + 480, + 0 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "control_image", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "patch_model", + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "title": "ControlNet (Z-Image-Turbo)", + "properties": { + "proxyWidgets": [ + [ + "83", + "text" + ], + [ + "79", + "seed" + ], + [ + "74", + "unet_name" + ], + [ + "73", + "clip_name" + ], + [ + "75", + "vae_name" + ], + [ + "76", + "name" + ], + [ + "79", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "d2e76ecf-6e84-4b8c-8913-48efc09ec1c4", + "version": 1, + "state": { + "lastGroupId": 9, + "lastNodeId": 85, + "lastLinkId": 87, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "ControlNet (Z-Image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + -500, + 620, + 120, + 180 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1390, + 1100, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "fbbb968e-d3cf-40e4-b3ce-7abb074e5bd8", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 65, + 80 + ], + "localized_name": "image", + "label": "control_image", + "pos": [ + -400, + 640 + ] + }, + { + "id": "c1b19877-5417-4580-aea1-44439c70c1dd", + "name": "text", + "type": "STRING", + "linkIds": [ + 81 + ], + "pos": [ + -400, + 660 + ] + }, + { + "id": "b5671515-bc7a-4be5-b1e7-d4f0f68907d6", + "name": "seed", + "type": "INT", + "linkIds": [ + 83 + ], + "pos": [ + -400, + 680 + ] + }, + { + "id": "2838be23-8034-4f16-87a5-d29d790e8391", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 84 + ], + "pos": [ + -400, + 700 + ] + }, + { + "id": "8a6643b5-8f78-41ff-bbc6-e87b95459706", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 85 + ], + "pos": [ + -400, + 720 + ] + }, + { + "id": "b103dc94-8ca7-456b-a809-414d7e341a1b", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 86 + ], + "pos": [ + -400, + 740 + ] + }, + { + "id": "4a7d65af-f0fd-4a5c-832a-bdc0d15b1f30", + "name": "name", + "type": "COMBO", + "linkIds": [ + 87 + ], + "label": "patch_model", + "pos": [ + -400, + 760 + ] + } + ], + "outputs": [ + { + "id": "ccb7fa39-4a3d-4eb2-8fd2-91d08fad9570", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 45 + ], + "localized_name": "IMAGE", + "pos": [ + 1410, + 1120 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 73, + "type": "CLIPLoader", + "pos": [ + 20, + 500 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 85 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 44 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 74, + "type": "UNETLoader", + "pos": [ + 20, + 320 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 84 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 79 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 75, + "type": "VAELoader", + "pos": [ + 20, + 760 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 86 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 39, + 70 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 76, + "type": "ModelPatchLoader", + "pos": [ + 20, + 940 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "name", + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": 87 + } + ], + "outputs": [ + { + "localized_name": "MODEL_PATCH", + "name": "MODEL_PATCH", + "type": "MODEL_PATCH", + "links": [ + 74 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ModelPatchLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "directory": "model_patches" + } + ] + }, + "widgets_values": [ + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + }, + { + "id": 77, + "type": "VAEDecode", + "pos": [ + 940, + 1100 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 38 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 39 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 45 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 78, + "type": "ModelSamplingAuraFlow", + "pos": [ + 910, + 270 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 69 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 40 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 79, + "type": "KSampler", + "pos": [ + 910, + 430 + ], + "size": [ + 300, + 570 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 40 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 41 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 42 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 78 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 83 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 38 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 729703840979498, + "randomize", + 8, + 1, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 80, + "type": "ConditioningZeroOut", + "pos": [ + 610, + 830 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 36 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 42 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 81, + "type": "QwenImageDiffsynthControlnet", + "pos": [ + 490, + 970 + ], + "size": [ + 290, + 200 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 79 + }, + { + "localized_name": "model_patch", + "name": "model_patch", + "type": "MODEL_PATCH", + "link": 74 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 70 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 65 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 69 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "QwenImageDiffsynthControlnet", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 82, + "type": "EmptySD3LatentImage", + "pos": [ + 40, + 1200 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 76 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 77 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 78 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 83, + "type": "CLIPTextEncode", + "pos": [ + 430, + 310 + ], + "size": [ + 400, + 440 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 44 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 81 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 36, + 41 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 84, + "type": "GetImageSize", + "pos": [ + 50, + 1410 + ], + "size": [ + 230, + 120 + ], + "flags": { + "collapsed": true + }, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 80 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 76 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 77 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + } + ], + "groups": [ + { + "id": 3, + "title": "Prompt", + "bounding": [ + 410, + 230, + 440, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Model", + "bounding": [ + -50, + 230, + 430, + 840 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 8, + "title": "Apple ControlNet", + "bounding": [ + 410, + 890, + 440, + 330 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 9, + "title": "Image Size", + "bounding": [ + -50, + 1100, + 430, + 350 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 38, + "origin_id": 79, + "origin_slot": 0, + "target_id": 77, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 39, + "origin_id": 75, + "origin_slot": 0, + "target_id": 77, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 69, + "origin_id": 81, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 40, + "origin_id": 78, + "origin_slot": 0, + "target_id": 79, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 41, + "origin_id": 83, + "origin_slot": 0, + "target_id": 79, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 42, + "origin_id": 80, + "origin_slot": 0, + "target_id": 79, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 78, + "origin_id": 82, + "origin_slot": 0, + "target_id": 79, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 36, + "origin_id": 83, + "origin_slot": 0, + "target_id": 80, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 79, + "origin_id": 74, + "origin_slot": 0, + "target_id": 81, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 74, + "origin_id": 76, + "origin_slot": 0, + "target_id": 81, + "target_slot": 1, + "type": "MODEL_PATCH" + }, + { + "id": 70, + "origin_id": 75, + "origin_slot": 0, + "target_id": 81, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 76, + "origin_id": 84, + "origin_slot": 0, + "target_id": 82, + "target_slot": 0, + "type": "INT" + }, + { + "id": 77, + "origin_id": 84, + "origin_slot": 1, + "target_id": 82, + "target_slot": 1, + "type": "INT" + }, + { + "id": 44, + "origin_id": 73, + "origin_slot": 0, + "target_id": 83, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 65, + "origin_id": -10, + "origin_slot": 0, + "target_id": 81, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 80, + "origin_id": -10, + "origin_slot": 0, + "target_id": 84, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 45, + "origin_id": 77, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 81, + "origin_id": -10, + "origin_slot": 1, + "target_id": 83, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 83, + "origin_id": -10, + "origin_slot": 2, + "target_id": 79, + "target_slot": 4, + "type": "INT" + }, + { + "id": 84, + "origin_id": -10, + "origin_slot": 3, + "target_id": 74, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 85, + "origin_id": -10, + "origin_slot": 4, + "target_id": 73, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 86, + "origin_id": -10, + "origin_slot": 5, + "target_id": 75, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 87, + "origin_id": -10, + "origin_slot": 6, + "target_id": 76, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/ControlNet", + "description": "Generates images from a text prompt and ControlNet conditioning (e.g. depth, canny) using Z-Image-Turbo." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Crop Images 2x2.json b/blueprints/Crop Images 2x2.json new file mode 100644 index 000000000..99b89b608 --- /dev/null +++ b/blueprints/Crop Images 2x2.json @@ -0,0 +1,1621 @@ +{ + "revision": 0, + "last_node_id": 139, + "last_link_id": 0, + "nodes": [ + { + "id": 135, + "type": "3b5ed000-6ab3-4458-91f7-8d6d366b0b40", + "pos": [ + -2479.9999801712506, + 2019.9999372732784 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "top_left", + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + }, + { + "label": "bottom_left", + "localized_name": "IMAGE_1", + "name": "IMAGE_1", + "type": "IMAGE", + "links": [] + }, + { + "label": "top_right", + "localized_name": "IMAGE_2", + "name": "IMAGE_2", + "type": "IMAGE", + "links": [] + }, + { + "label": "bottom_right", + "localized_name": "IMAGE_3", + "name": "IMAGE_3", + "type": "IMAGE", + "links": [] + }, + { + "label": "images", + "name": "IMAGE_4", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "Crop Images 2x2" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "3b5ed000-6ab3-4458-91f7-8d6d366b0b40", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 142, + "lastLinkId": 245, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Crop Images 2x2", + "inputNode": { + "id": -10, + "bounding": [ + -10, + 1570, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2919.9998608196274, + 1435, + 120, + 140 + ] + }, + "inputs": [ + { + "id": "741854dd-bfb1-4700-ba8c-3b9dea59d021", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 2, + 11, + 13, + 30, + 32 + ], + "localized_name": "image", + "pos": [ + 90, + 1590 + ] + } + ], + "outputs": [ + { + "id": "0eaca6d4-679a-433e-9703-bfa6dceacb18", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 41 + ], + "localized_name": "IMAGE", + "label": "top_left", + "pos": [ + 2939.9998608196274, + 1455 + ] + }, + { + "id": "fff5a1ad-3a74-4c87-938c-ee0fff55f840", + "name": "IMAGE_1", + "type": "IMAGE", + "linkIds": [ + 42 + ], + "localized_name": "IMAGE_1", + "label": "bottom_left", + "pos": [ + 2939.9998608196274, + 1475 + ] + }, + { + "id": "08f40978-fb25-4d98-b716-b61e43b16043", + "name": "IMAGE_2", + "type": "IMAGE", + "linkIds": [ + 43 + ], + "localized_name": "IMAGE_2", + "label": "top_right", + "pos": [ + 2939.9998608196274, + 1495 + ] + }, + { + "id": "17b9416f-3369-43c1-b62f-3e31fc2a7e32", + "name": "IMAGE_3", + "type": "IMAGE", + "linkIds": [ + 44 + ], + "localized_name": "IMAGE_3", + "label": "bottom_right", + "pos": [ + 2939.9998608196274, + 1515 + ] + }, + { + "id": "430e2f3b-c617-4549-9daf-3ebf5be423a3", + "name": "IMAGE_4", + "type": "IMAGE", + "linkIds": [ + 240 + ], + "label": "images", + "pos": [ + 2939.9998608196274, + 1535 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 7, + "type": "ComfyMathExpression", + "pos": [ + 740, + 1390 + ], + "size": [ + 370, + 190 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 3 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 4 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 7, + 14, + 28, + 40, + 242 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, int(a/b))" + ] + }, + { + "id": 8, + "type": "GetImageSize", + "pos": [ + 390, + 1450 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 2 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 3, + 241 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 5, + 245 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "GetImageSize" + } + }, + { + "id": 9, + "type": "PrimitiveInt", + "pos": [ + 390, + 1650 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 4, + 6 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 2, + "fixed" + ] + }, + { + "id": 10, + "type": "ImageCropV2", + "pos": [ + 1710, + 430 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 11 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 9 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 41, + 236 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 12, + "type": "PrimitiveBoundingBox", + "pos": [ + 1370, + 570 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 7 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 9 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 0, + 0, + 512, + 512 + ] + }, + { + "id": 13, + "type": "ComfyMathExpression", + "pos": [ + 750, + 1650 + ], + "size": [ + 370, + 190 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 5 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 6 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 8, + 23, + 27, + 39, + 246 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, int(a/b))" + ] + }, + { + "id": 138, + "type": "ComfyMathExpression", + "pos": [ + 1170, + 1210 + ], + "size": [ + 420, + 190 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 241 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 242 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 243, + 244 + ] + } + ], + "title": "Math Expression (Right Width)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, a - b)" + ] + }, + { + "id": 139, + "type": "ComfyMathExpression", + "pos": [ + 1170, + 1860 + ], + "size": [ + 420, + 190 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 245 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 246 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 247, + 248 + ] + } + ], + "title": "Math Expression (Bottom Height)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, a - b)" + ] + }, + { + "id": 15, + "type": "ImageCropV2", + "pos": [ + 1740, + 1600 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 13 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 12 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 42, + 238 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 16, + "type": "PrimitiveBoundingBox", + "pos": [ + 1350, + 1780 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 23 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 14 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 247 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 12 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 0, + 0, + 512, + 512 + ] + }, + { + "id": 25, + "type": "PrimitiveBoundingBox", + "pos": [ + 1350, + 1200 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 28 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 243 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 27 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 29 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 26, + "type": "ImageCropV2", + "pos": [ + 1720, + 1050 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 30 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 29 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 43, + 237 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 30, + "type": "ImageCropV2", + "pos": [ + 1740, + 2130 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 32 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 35 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 44, + 239 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 32, + "type": "PrimitiveBoundingBox", + "pos": [ + 1370, + 2280 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 40 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 39 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 244 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 248 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 35 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 137, + "type": "BatchImagesNode", + "pos": [ + 2520, + 1540 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 236 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "type": "IMAGE", + "link": 237 + }, + { + "label": "image2", + "localized_name": "images.image2", + "name": "images.image2", + "shape": 7, + "type": "IMAGE", + "link": 238 + }, + { + "label": "image3", + "localized_name": "images.image3", + "name": "images.image3", + "shape": 7, + "type": "IMAGE", + "link": 239 + }, + { + "label": "image4", + "localized_name": "images.image4", + "name": "images.image4", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 240 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "BatchImagesNode" + } + } + ], + "groups": [ + { + "id": 1, + "title": "Crop Images 2x2", + "bounding": [ + 380, + 360, + 1710, + 2270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 3, + "origin_id": 8, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "INT" + }, + { + "id": 4, + "origin_id": 9, + "origin_slot": 0, + "target_id": 7, + "target_slot": 1, + "type": "INT" + }, + { + "id": 9, + "origin_id": 12, + "origin_slot": 0, + "target_id": 10, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 7, + "origin_id": 7, + "origin_slot": 1, + "target_id": 12, + "target_slot": 2, + "type": "INT" + }, + { + "id": 8, + "origin_id": 13, + "origin_slot": 1, + "target_id": 12, + "target_slot": 3, + "type": "INT" + }, + { + "id": 5, + "origin_id": 8, + "origin_slot": 1, + "target_id": 13, + "target_slot": 0, + "type": "INT" + }, + { + "id": 6, + "origin_id": 9, + "origin_slot": 0, + "target_id": 13, + "target_slot": 1, + "type": "INT" + }, + { + "id": 12, + "origin_id": 16, + "origin_slot": 0, + "target_id": 15, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 23, + "origin_id": 13, + "origin_slot": 1, + "target_id": 16, + "target_slot": 1, + "type": "INT" + }, + { + "id": 14, + "origin_id": 7, + "origin_slot": 1, + "target_id": 16, + "target_slot": 2, + "type": "INT" + }, + { + "id": 247, + "origin_id": 139, + "origin_slot": 1, + "target_id": 16, + "target_slot": 3, + "type": "INT" + }, + { + "id": 28, + "origin_id": 7, + "origin_slot": 1, + "target_id": 25, + "target_slot": 0, + "type": "INT" + }, + { + "id": 243, + "origin_id": 138, + "origin_slot": 1, + "target_id": 25, + "target_slot": 2, + "type": "INT" + }, + { + "id": 27, + "origin_id": 13, + "origin_slot": 1, + "target_id": 25, + "target_slot": 3, + "type": "INT" + }, + { + "id": 29, + "origin_id": 25, + "origin_slot": 0, + "target_id": 26, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 35, + "origin_id": 32, + "origin_slot": 0, + "target_id": 30, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 40, + "origin_id": 7, + "origin_slot": 1, + "target_id": 32, + "target_slot": 0, + "type": "INT" + }, + { + "id": 39, + "origin_id": 13, + "origin_slot": 1, + "target_id": 32, + "target_slot": 1, + "type": "INT" + }, + { + "id": 244, + "origin_id": 138, + "origin_slot": 1, + "target_id": 32, + "target_slot": 2, + "type": "INT" + }, + { + "id": 248, + "origin_id": 139, + "origin_slot": 1, + "target_id": 32, + "target_slot": 3, + "type": "INT" + }, + { + "id": 241, + "origin_id": 8, + "origin_slot": 0, + "target_id": 138, + "target_slot": 0, + "type": "INT" + }, + { + "id": 242, + "origin_id": 7, + "origin_slot": 1, + "target_id": 138, + "target_slot": 1, + "type": "INT" + }, + { + "id": 245, + "origin_id": 8, + "origin_slot": 1, + "target_id": 139, + "target_slot": 0, + "type": "INT" + }, + { + "id": 246, + "origin_id": 13, + "origin_slot": 1, + "target_id": 139, + "target_slot": 1, + "type": "INT" + }, + { + "id": 2, + "origin_id": -10, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 11, + "origin_id": -10, + "origin_slot": 0, + "target_id": 10, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 13, + "origin_id": -10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 30, + "origin_id": -10, + "origin_slot": 0, + "target_id": 26, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 32, + "origin_id": -10, + "origin_slot": 0, + "target_id": 30, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 41, + "origin_id": 10, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 42, + "origin_id": 15, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 43, + "origin_id": 26, + "origin_slot": 0, + "target_id": -20, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 44, + "origin_id": 30, + "origin_slot": 0, + "target_id": -20, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 236, + "origin_id": 10, + "origin_slot": 0, + "target_id": 137, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 237, + "origin_id": 26, + "origin_slot": 0, + "target_id": 137, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 238, + "origin_id": 15, + "origin_slot": 0, + "target_id": 137, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 239, + "origin_id": 30, + "origin_slot": 0, + "target_id": 137, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 240, + "origin_id": 137, + "origin_slot": 0, + "target_id": -20, + "target_slot": 4, + "type": "IMAGE" + } + ], + "extra": {}, + "category": "Image Tools/Crop", + "description": "Splits an image into a 2×2 grid of four equal tiles." + } + ] + }, + "extra": { + "ue_links": [], + "links_added_by_ue": [] + } +} \ No newline at end of file diff --git a/blueprints/Crop Images 3x3.json b/blueprints/Crop Images 3x3.json new file mode 100644 index 000000000..6ac636da4 --- /dev/null +++ b/blueprints/Crop Images 3x3.json @@ -0,0 +1,2958 @@ +{ + "revision": 0, + "last_node_id": 141, + "last_link_id": 0, + "nodes": [ + { + "id": 134, + "type": "7fd47bca-ff89-476c-a98d-ca6f7cf756fe", + "pos": [ + -2620, + 1620 + ], + "size": [ + 230, + 290 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "top_left", + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + }, + { + "label": "top_center", + "name": "IMAGE_1", + "type": "IMAGE", + "links": [] + }, + { + "label": "top_right", + "name": "IMAGE_2", + "type": "IMAGE", + "links": [] + }, + { + "label": "middle_left", + "name": "IMAGE_3", + "type": "IMAGE", + "links": [] + }, + { + "label": "middle_center", + "name": "IMAGE_4", + "type": "IMAGE", + "links": [] + }, + { + "label": "middle_right", + "name": "IMAGE_5", + "type": "IMAGE", + "links": [] + }, + { + "label": "bottom_left", + "name": "IMAGE_6", + "type": "IMAGE", + "links": [] + }, + { + "label": "bottom_center", + "name": "IMAGE_7", + "type": "IMAGE", + "links": [] + }, + { + "label": "bottom_right", + "name": "IMAGE_8", + "type": "IMAGE", + "links": [] + }, + { + "label": "images", + "name": "IMAGE_9", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "Crop Images 3x3" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "7fd47bca-ff89-476c-a98d-ca6f7cf756fe", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 142, + "lastLinkId": 245, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Crop Images 3x3", + "inputNode": { + "id": -10, + "bounding": [ + -710, + 5440, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3430, + 5270, + 121.720703125, + 240 + ] + }, + "inputs": [ + { + "id": "e54e8e8b-6ce6-4f80-a38f-87a77d990efc", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 74, + 75, + 82, + 91, + 94, + 117, + 129, + 137, + 148, + 157 + ], + "localized_name": "image", + "pos": [ + -610, + 5460 + ] + } + ], + "outputs": [ + { + "id": "3dd8abe2-a7da-4052-a556-9ae157ff3cf4", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 101 + ], + "localized_name": "IMAGE", + "label": "top_left", + "pos": [ + 3450, + 5290 + ] + }, + { + "id": "aa220733-759b-474e-9d29-634a3a23c5da", + "name": "IMAGE_1", + "type": "IMAGE", + "linkIds": [ + 192 + ], + "label": "top_center", + "pos": [ + 3450, + 5310 + ] + }, + { + "id": "f1911df1-d50c-4bf8-9623-5e581d2a8902", + "name": "IMAGE_2", + "type": "IMAGE", + "linkIds": [ + 193 + ], + "label": "top_right", + "pos": [ + 3450, + 5330 + ] + }, + { + "id": "71ebb807-e7e9-438f-990d-511e0745d10d", + "name": "IMAGE_3", + "type": "IMAGE", + "linkIds": [ + 194 + ], + "label": "middle_left", + "pos": [ + 3450, + 5350 + ] + }, + { + "id": "4fb9c99c-3340-4de5-ba2d-51a653aab0b3", + "name": "IMAGE_4", + "type": "IMAGE", + "linkIds": [ + 195 + ], + "label": "middle_center", + "pos": [ + 3450, + 5370 + ] + }, + { + "id": "398643e8-e349-4d59-9c68-6403b7a2772d", + "name": "IMAGE_5", + "type": "IMAGE", + "linkIds": [ + 196 + ], + "label": "middle_right", + "pos": [ + 3450, + 5390 + ] + }, + { + "id": "5b11949c-f4cc-4525-86ae-690e30d3dada", + "name": "IMAGE_6", + "type": "IMAGE", + "linkIds": [ + 197 + ], + "label": "bottom_left", + "pos": [ + 3450, + 5410 + ] + }, + { + "id": "82c69fd9-de36-4c8f-8311-a9e49159640b", + "name": "IMAGE_7", + "type": "IMAGE", + "linkIds": [ + 198 + ], + "label": "bottom_center", + "pos": [ + 3450, + 5430 + ] + }, + { + "id": "aef678db-20aa-47d4-be8a-978065f078c6", + "name": "IMAGE_8", + "type": "IMAGE", + "linkIds": [ + 199 + ], + "label": "bottom_right", + "pos": [ + 3450, + 5450 + ] + }, + { + "id": "77574277-edde-439c-8720-7daa849f4f27", + "name": "IMAGE_9", + "type": "IMAGE", + "linkIds": [ + 226 + ], + "label": "images", + "pos": [ + 3450, + 5470 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 50, + "type": "ComfyMathExpression", + "pos": [ + 770, + 5310 + ], + "size": [ + 370, + 190 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 73 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 108 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 77, + 85, + 89, + 97, + 99, + 127, + 142, + 146, + 152, + 300 + ] + } + ], + "title": "Math Expression (Width)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, int(a/b))" + ] + }, + { + "id": 51, + "type": "GetImageSize", + "pos": [ + 440, + 5390 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 74 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 73, + 300 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 79, + 305 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "GetImageSize" + } + }, + { + "id": 52, + "type": "PrimitiveInt", + "pos": [ + 440, + 5590 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 80, + 108 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 3, + "fixed" + ] + }, + { + "id": 53, + "type": "ImageCropV2", + "pos": [ + 2080, + 3020 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 75 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 101, + 227 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 54, + "type": "PrimitiveBoundingBox", + "pos": [ + 1740, + 3160 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 77 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 78 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 76 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 0, + 0, + 512, + 512 + ] + }, + { + "id": 55, + "type": "ComfyMathExpression", + "pos": [ + 780, + 5570 + ], + "size": [ + 370, + 190 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 79 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 80 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 78, + 84, + 86, + 88, + 90, + 98, + 100, + 121, + 123, + 126, + 161 + ] + } + ], + "title": "Math Expression(Height)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, int(a/b))" + ] + }, + { + "id": 57, + "type": "ImageCropV2", + "pos": [ + 2080, + 4700 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 82 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 83 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 194, + 230 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 58, + "type": "PrimitiveBoundingBox", + "pos": [ + 1740, + 4830 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 84 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 85 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 86 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 83 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 0, + 0, + 512, + 512 + ] + }, + { + "id": 60, + "type": "PrimitiveBoundingBox", + "pos": [ + 1740, + 3700 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 88 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 89 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 90 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 92 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 61, + "type": "ImageCropV2", + "pos": [ + 2100, + 3570 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 91 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 92 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 192, + 228 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 63, + "type": "ImageCropV2", + "pos": [ + 2080, + 5310 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 94 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 95 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 195, + 231 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 65, + "type": "PrimitiveBoundingBox", + "pos": [ + 1750, + 5330 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 97 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 98 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 99 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 100 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 95 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 71, + "type": "ComfyMathExpression", + "pos": [ + 780, + 6090 + ], + "size": [ + 400, + 190 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 126 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 136, + 147, + 156, + 306 + ] + } + ], + "title": "Math Expression(height)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "2 * a" + ] + }, + { + "id": 75, + "type": "ImageCropV2", + "pos": [ + 2100, + 5900 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 117 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 118 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 196, + 232 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 77, + "type": "PrimitiveBoundingBox", + "pos": [ + 1750, + 5970 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 128 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 121 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 302 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 123 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 118 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 78, + "type": "ComfyMathExpression", + "pos": [ + 780, + 5820 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 127 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 128, + 132, + 163, + 301 + ] + } + ], + "title": "Math Expression(width)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "2 * a" + ] + }, + { + "id": 140, + "type": "ComfyMathExpression", + "pos": [ + 1240, + 5640 + ], + "size": [ + 420, + 190 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 300 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 301 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 302, + 303, + 304 + ] + } + ], + "title": "Math Expression (Right Width)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, a - b)" + ] + }, + { + "id": 141, + "type": "ComfyMathExpression", + "pos": [ + 1230, + 6340 + ], + "size": [ + 420, + 190 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 305 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 306 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 307, + 308, + 309 + ] + } + ], + "title": "Math Expression (Bottom Height)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(1, a - b)" + ] + }, + { + "id": 79, + "type": "ImageCropV2", + "pos": [ + 2120, + 7580 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 129 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 130 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 199, + 235 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 81, + "type": "PrimitiveBoundingBox", + "pos": [ + 1720, + 7620 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 132 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 136 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 303 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 307 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 130 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 82, + "type": "ImageCropV2", + "pos": [ + 2120, + 7040 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 137 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 138 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 198, + 234 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 84, + "type": "PrimitiveBoundingBox", + "pos": [ + 1720, + 7080 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 146 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 147 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 142 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 308 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 138 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 85, + "type": "ImageCropV2", + "pos": [ + 2110, + 6480 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 148 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 149 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 197, + 233 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 86, + "type": "PrimitiveBoundingBox", + "pos": [ + 1670, + 6570 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": 156 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 152 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 309 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 149 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 0, + 0, + 512, + 512 + ] + }, + { + "id": 88, + "type": "ImageCropV2", + "pos": [ + 2060, + 4140 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 157 + }, + { + "localized_name": "crop_region", + "name": "crop_region", + "type": "BOUNDING_BOX", + "widget": { + "name": "crop_region" + }, + "link": 158 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 193, + 229 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ImageCropV2" + }, + "widgets_values": [ + { + "x": 0, + "y": 0, + "width": 512, + "height": 512 + }, + 0, + 0, + 512, + 512 + ] + }, + { + "id": 89, + "type": "PrimitiveBoundingBox", + "pos": [ + 1720, + 4150 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": 163 + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 304 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 161 + } + ], + "outputs": [ + { + "localized_name": "BOUNDING_BOX", + "name": "BOUNDING_BOX", + "type": "BOUNDING_BOX", + "links": [ + 158 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveBoundingBox" + }, + "widgets_values": [ + 6, + 0, + 512, + 512 + ] + }, + { + "id": 136, + "type": "BatchImagesNode", + "pos": [ + 3170, + 5640 + ], + "size": [ + 230, + 290 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 227 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "type": "IMAGE", + "link": 228 + }, + { + "label": "image2", + "localized_name": "images.image2", + "name": "images.image2", + "shape": 7, + "type": "IMAGE", + "link": 229 + }, + { + "label": "image3", + "localized_name": "images.image3", + "name": "images.image3", + "shape": 7, + "type": "IMAGE", + "link": 230 + }, + { + "label": "image4", + "localized_name": "images.image4", + "name": "images.image4", + "shape": 7, + "type": "IMAGE", + "link": 231 + }, + { + "label": "image5", + "localized_name": "images.image5", + "name": "images.image5", + "shape": 7, + "type": "IMAGE", + "link": 232 + }, + { + "label": "image6", + "localized_name": "images.image6", + "name": "images.image6", + "shape": 7, + "type": "IMAGE", + "link": 233 + }, + { + "label": "image7", + "localized_name": "images.image7", + "name": "images.image7", + "shape": 7, + "type": "IMAGE", + "link": 234 + }, + { + "label": "image8", + "localized_name": "images.image8", + "name": "images.image8", + "shape": 7, + "type": "IMAGE", + "link": 235 + }, + { + "label": "image9", + "localized_name": "images.image9", + "name": "images.image9", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 226 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "BatchImagesNode" + } + } + ], + "groups": [ + { + "id": 3, + "title": "Crop Images 3x3", + "bounding": [ + 100, + 2700, + 2640, + 5480 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 73, + "origin_id": 51, + "origin_slot": 0, + "target_id": 50, + "target_slot": 0, + "type": "INT" + }, + { + "id": 108, + "origin_id": 52, + "origin_slot": 0, + "target_id": 50, + "target_slot": 1, + "type": "INT" + }, + { + "id": 76, + "origin_id": 54, + "origin_slot": 0, + "target_id": 53, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 77, + "origin_id": 50, + "origin_slot": 1, + "target_id": 54, + "target_slot": 2, + "type": "INT" + }, + { + "id": 78, + "origin_id": 55, + "origin_slot": 1, + "target_id": 54, + "target_slot": 3, + "type": "INT" + }, + { + "id": 79, + "origin_id": 51, + "origin_slot": 1, + "target_id": 55, + "target_slot": 0, + "type": "INT" + }, + { + "id": 80, + "origin_id": 52, + "origin_slot": 0, + "target_id": 55, + "target_slot": 1, + "type": "INT" + }, + { + "id": 83, + "origin_id": 58, + "origin_slot": 0, + "target_id": 57, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 84, + "origin_id": 55, + "origin_slot": 1, + "target_id": 58, + "target_slot": 1, + "type": "INT" + }, + { + "id": 85, + "origin_id": 50, + "origin_slot": 1, + "target_id": 58, + "target_slot": 2, + "type": "INT" + }, + { + "id": 86, + "origin_id": 55, + "origin_slot": 1, + "target_id": 58, + "target_slot": 3, + "type": "INT" + }, + { + "id": 88, + "origin_id": 50, + "origin_slot": 1, + "target_id": 60, + "target_slot": 0, + "type": "INT" + }, + { + "id": 89, + "origin_id": 50, + "origin_slot": 1, + "target_id": 60, + "target_slot": 2, + "type": "INT" + }, + { + "id": 90, + "origin_id": 55, + "origin_slot": 1, + "target_id": 60, + "target_slot": 3, + "type": "INT" + }, + { + "id": 92, + "origin_id": 60, + "origin_slot": 0, + "target_id": 61, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 95, + "origin_id": 65, + "origin_slot": 0, + "target_id": 63, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 97, + "origin_id": 50, + "origin_slot": 1, + "target_id": 65, + "target_slot": 0, + "type": "INT" + }, + { + "id": 98, + "origin_id": 55, + "origin_slot": 1, + "target_id": 65, + "target_slot": 1, + "type": "INT" + }, + { + "id": 99, + "origin_id": 50, + "origin_slot": 1, + "target_id": 65, + "target_slot": 2, + "type": "INT" + }, + { + "id": 100, + "origin_id": 55, + "origin_slot": 1, + "target_id": 65, + "target_slot": 3, + "type": "INT" + }, + { + "id": 126, + "origin_id": 55, + "origin_slot": 1, + "target_id": 71, + "target_slot": 0, + "type": "INT" + }, + { + "id": 118, + "origin_id": 77, + "origin_slot": 0, + "target_id": 75, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 128, + "origin_id": 78, + "origin_slot": 1, + "target_id": 77, + "target_slot": 0, + "type": "INT" + }, + { + "id": 121, + "origin_id": 55, + "origin_slot": 1, + "target_id": 77, + "target_slot": 1, + "type": "INT" + }, + { + "id": 302, + "origin_id": 140, + "origin_slot": 1, + "target_id": 77, + "target_slot": 2, + "type": "INT" + }, + { + "id": 123, + "origin_id": 55, + "origin_slot": 1, + "target_id": 77, + "target_slot": 3, + "type": "INT" + }, + { + "id": 127, + "origin_id": 50, + "origin_slot": 1, + "target_id": 78, + "target_slot": 0, + "type": "INT" + }, + { + "id": 130, + "origin_id": 81, + "origin_slot": 0, + "target_id": 79, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 132, + "origin_id": 78, + "origin_slot": 1, + "target_id": 81, + "target_slot": 0, + "type": "INT" + }, + { + "id": 136, + "origin_id": 71, + "origin_slot": 1, + "target_id": 81, + "target_slot": 1, + "type": "INT" + }, + { + "id": 303, + "origin_id": 140, + "origin_slot": 1, + "target_id": 81, + "target_slot": 2, + "type": "INT" + }, + { + "id": 307, + "origin_id": 141, + "origin_slot": 1, + "target_id": 81, + "target_slot": 3, + "type": "INT" + }, + { + "id": 138, + "origin_id": 84, + "origin_slot": 0, + "target_id": 82, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 146, + "origin_id": 50, + "origin_slot": 1, + "target_id": 84, + "target_slot": 0, + "type": "INT" + }, + { + "id": 147, + "origin_id": 71, + "origin_slot": 1, + "target_id": 84, + "target_slot": 1, + "type": "INT" + }, + { + "id": 142, + "origin_id": 50, + "origin_slot": 1, + "target_id": 84, + "target_slot": 2, + "type": "INT" + }, + { + "id": 308, + "origin_id": 141, + "origin_slot": 1, + "target_id": 84, + "target_slot": 3, + "type": "INT" + }, + { + "id": 149, + "origin_id": 86, + "origin_slot": 0, + "target_id": 85, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 156, + "origin_id": 71, + "origin_slot": 1, + "target_id": 86, + "target_slot": 1, + "type": "INT" + }, + { + "id": 152, + "origin_id": 50, + "origin_slot": 1, + "target_id": 86, + "target_slot": 2, + "type": "INT" + }, + { + "id": 309, + "origin_id": 141, + "origin_slot": 1, + "target_id": 86, + "target_slot": 3, + "type": "INT" + }, + { + "id": 158, + "origin_id": 89, + "origin_slot": 0, + "target_id": 88, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 163, + "origin_id": 78, + "origin_slot": 1, + "target_id": 89, + "target_slot": 0, + "type": "INT" + }, + { + "id": 304, + "origin_id": 140, + "origin_slot": 1, + "target_id": 89, + "target_slot": 2, + "type": "INT" + }, + { + "id": 161, + "origin_id": 55, + "origin_slot": 1, + "target_id": 89, + "target_slot": 3, + "type": "INT" + }, + { + "id": 300, + "origin_id": 51, + "origin_slot": 0, + "target_id": 140, + "target_slot": 0, + "type": "INT" + }, + { + "id": 301, + "origin_id": 78, + "origin_slot": 1, + "target_id": 140, + "target_slot": 1, + "type": "INT" + }, + { + "id": 305, + "origin_id": 51, + "origin_slot": 1, + "target_id": 141, + "target_slot": 0, + "type": "INT" + }, + { + "id": 306, + "origin_id": 71, + "origin_slot": 1, + "target_id": 141, + "target_slot": 1, + "type": "INT" + }, + { + "id": 74, + "origin_id": -10, + "origin_slot": 0, + "target_id": 51, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 75, + "origin_id": -10, + "origin_slot": 0, + "target_id": 53, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 82, + "origin_id": -10, + "origin_slot": 0, + "target_id": 57, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 91, + "origin_id": -10, + "origin_slot": 0, + "target_id": 61, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 94, + "origin_id": -10, + "origin_slot": 0, + "target_id": 63, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 117, + "origin_id": -10, + "origin_slot": 0, + "target_id": 75, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 129, + "origin_id": -10, + "origin_slot": 0, + "target_id": 79, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 137, + "origin_id": -10, + "origin_slot": 0, + "target_id": 82, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 148, + "origin_id": -10, + "origin_slot": 0, + "target_id": 85, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 157, + "origin_id": -10, + "origin_slot": 0, + "target_id": 88, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 101, + "origin_id": 53, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 192, + "origin_id": 61, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 193, + "origin_id": 88, + "origin_slot": 0, + "target_id": -20, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 194, + "origin_id": 57, + "origin_slot": 0, + "target_id": -20, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 195, + "origin_id": 63, + "origin_slot": 0, + "target_id": -20, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 196, + "origin_id": 75, + "origin_slot": 0, + "target_id": -20, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 197, + "origin_id": 85, + "origin_slot": 0, + "target_id": -20, + "target_slot": 6, + "type": "IMAGE" + }, + { + "id": 198, + "origin_id": 82, + "origin_slot": 0, + "target_id": -20, + "target_slot": 7, + "type": "IMAGE" + }, + { + "id": 199, + "origin_id": 79, + "origin_slot": 0, + "target_id": -20, + "target_slot": 8, + "type": "IMAGE" + }, + { + "id": 226, + "origin_id": 136, + "origin_slot": 0, + "target_id": -20, + "target_slot": 9, + "type": "IMAGE" + }, + { + "id": 227, + "origin_id": 53, + "origin_slot": 0, + "target_id": 136, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 228, + "origin_id": 61, + "origin_slot": 0, + "target_id": 136, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 229, + "origin_id": 88, + "origin_slot": 0, + "target_id": 136, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 230, + "origin_id": 57, + "origin_slot": 0, + "target_id": 136, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 231, + "origin_id": 63, + "origin_slot": 0, + "target_id": 136, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 232, + "origin_id": 75, + "origin_slot": 0, + "target_id": 136, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 233, + "origin_id": 85, + "origin_slot": 0, + "target_id": 136, + "target_slot": 6, + "type": "IMAGE" + }, + { + "id": 234, + "origin_id": 82, + "origin_slot": 0, + "target_id": 136, + "target_slot": 7, + "type": "IMAGE" + }, + { + "id": 235, + "origin_id": 79, + "origin_slot": 0, + "target_id": 136, + "target_slot": 8, + "type": "IMAGE" + } + ], + "extra": {}, + "category": "Image Tools/Crop", + "description": "Splits an image into a 3×3 grid of nine equal tiles." + } + ] + }, + "extra": { + "ue_links": [], + "links_added_by_ue": [] + } +} \ No newline at end of file diff --git a/blueprints/Depth to Image (Z-Image-Turbo).json b/blueprints/Depth to Image (Z-Image-Turbo).json index baffc4fc9..fe9ef0f72 100644 --- a/blueprints/Depth to Image (Z-Image-Turbo).json +++ b/blueprints/Depth to Image (Z-Image-Turbo).json @@ -1 +1,2487 @@ -{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 76, "last_link_id": 259, "nodes": [{"id": 13, "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "pos": [400, 3630], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "control image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}, {"label": "lotus_model", "name": "unet_name_1", "type": "COMBO", "widget": {"name": "unet_name_1"}, "link": null}, {"label": "sd15_vae", "name": "vae_name_1", "type": "COMBO", "widget": {"name": "vae_name_1"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"], ["-1", "unet_name_1"], ["-1", "vae_name_1"], ["7", "control_after_generate"], ["7", "seed"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 76, "lastLinkId": 259, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Depth to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [27.60368520069494, 4936.043696127976, 120, 200]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [25], "label": "control image", "pos": [127.60368520069494, 4956.043696127976]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [16], "label": "prompt", "pos": [127.60368520069494, 4976.043696127976]}, {"id": "add4a703-1185-4848-9494-b27dd37ff434", "name": "unet_name", "type": "COMBO", "linkIds": [252], "pos": [127.60368520069494, 4996.043696127976]}, {"id": "03233f9e-df65-4e05-b5c5-34d83129e85e", "name": "clip_name", "type": "COMBO", "linkIds": [253], "pos": [127.60368520069494, 5016.043696127976]}, {"id": "0c643ffb-326d-40ca-8a89-ebc585cf5015", "name": "vae_name", "type": "COMBO", "linkIds": [254], "pos": [127.60368520069494, 5036.043696127976]}, {"id": "409cdebe-632b-410f-a66c-711c2a1527e1", "name": "name", "type": "COMBO", "linkIds": [255], "pos": [127.60368520069494, 5056.043696127976]}, {"id": "80e6915f-5d59-4d6b-a197-d8c565ad2922", "name": "unet_name_1", "type": "COMBO", "linkIds": [258], "label": "lotus_model", "pos": [127.60368520069494, 5076.043696127976]}, {"id": "4207ec84-4409-4816-8444-76062bf6310c", "name": "vae_name_1", "type": "COMBO", "linkIds": [259], "label": "sd15_vae", "pos": [127.60368520069494, 5096.043696127976]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [18], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 1, "type": "CLIPLoader", "pos": [228.60381716506714, 4700.188262345759], "size": [269.9479166666667, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 253}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 2, "type": "UNETLoader", "pos": [228.60381716506714, 4550.188402733727], "size": [269.9479166666667, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 252}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 3, "type": "VAELoader", "pos": [228.60381716506714, 4880.188283008492], "size": [269.9479166666667, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 254}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [2, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 4, "type": "ModelPatchLoader", "pos": [228.60381716506714, 5010.1883654774], "size": [269.9479166666667, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 255}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 6, "type": "ModelSamplingAuraFlow", "pos": [998.6041081931173, 4490.1880693746825], "size": [289.97395833333337, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 3}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 7, "type": "KSampler", "pos": [998.6041081931173, 4600.188363442829], "size": [300, 262], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 8, "type": "ConditioningZeroOut", "pos": [748.2706508086186, 5044.854997097082], "size": [204.134765625, 26], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 8}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 10, "type": "EmptySD3LatentImage", "pos": [1028.2702326451792, 5334.855683329977], "size": [259.9479166666667, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 12}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 13}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 5, "type": "VAEDecode", "pos": [1338.604012131086, 4500.188453282262], "size": [200, 46], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 1}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 2}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 9, "type": "QwenImageDiffsynthControlnet", "pos": [608.2704996459613, 5204.85528564724], "size": [289.97395833333337, 138], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 9}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 10}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 11}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 248}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [3]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 11, "type": "GetImageSize", "pos": [530, 5440], "size": [140, 66], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 247}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [12]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [13]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 12, "type": "CLIPTextEncode", "pos": [548.2706278500244, 4544.854827124228], "size": [400, 420], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 14}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 16}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [5, 8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 14, "type": "ImageScaleToTotalPixels", "pos": [90, 5180], "size": [270, 106], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 25}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [248, 250]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["lanczos", 1, 1]}, {"id": 15, "type": "PreviewImage", "pos": [90, 5530], "size": [380, 260], "flags": {}, "order": 13, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 251}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 76, "type": "458bdf3c-4b58-421c-af50-c9c663a4d74c", "pos": [90, 5340], "size": [400, 150], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 250}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 258}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 259}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [247, 251]}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "groups": [{"id": 1, "title": "Prompt", "bounding": [530, 4470, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [210, 4470, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [530, 5120, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 1, "origin_id": 7, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "LATENT"}, {"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 5, "target_slot": 1, "type": "VAE"}, {"id": 3, "origin_id": 9, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "MODEL"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 12, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 10, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 12, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "CONDITIONING"}, {"id": 9, "origin_id": 2, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "MODEL"}, {"id": 10, "origin_id": 4, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 11, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "VAE"}, {"id": 12, "origin_id": 11, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "INT"}, {"id": 13, "origin_id": 11, "origin_slot": 1, "target_id": 10, "target_slot": 1, "type": "INT"}, {"id": 14, "origin_id": 1, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 12, "target_slot": 1, "type": "STRING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 25, "origin_id": -10, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 247, "origin_id": 76, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 248, "origin_id": 14, "origin_slot": 0, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 250, "origin_id": 14, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "IMAGE"}, {"id": 251, "origin_id": 76, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 252, "origin_id": -10, "origin_slot": 2, "target_id": 2, "target_slot": 0, "type": "COMBO"}, {"id": 253, "origin_id": -10, "origin_slot": 3, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 254, "origin_id": -10, "origin_slot": 4, "target_id": 3, "target_slot": 0, "type": "COMBO"}, {"id": 255, "origin_id": -10, "origin_slot": 5, "target_id": 4, "target_slot": 0, "type": "COMBO"}, {"id": 258, "origin_id": -10, "origin_slot": 6, "target_id": 76, "target_slot": 2, "type": "COMBO"}, {"id": 259, "origin_id": -10, "origin_slot": 7, "target_id": 76, "target_slot": 3, "type": "COMBO"}], "extra": {"ds": {"scale": 1.3889423076923078, "offset": [22.056074766355096, -3503.3333333333335]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Depth to image"}, {"id": "458bdf3c-4b58-421c-af50-c9c663a4d74c", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 76, "lastLinkId": 259, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "d721b249-fd2a-441b-9a78-2805f04e2644", "name": "unet_name", "type": "COMBO", "linkIds": [256], "pos": [46.625, -112.61268043518066]}, {"id": "0430e2ea-f8b5-4191-9b72-b7d62176f97c", "name": "vae_name", "type": "COMBO", "linkIds": [257], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 8, "type": "VAEDecode", "pos": [1380.0000135211146, -240.0000135211144], "size": [210, 60], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 10, "type": "UNETLoader", "pos": [135.34178335388546, -290.1947851765315], "size": [305.9244791666667, 97.7734375], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 256}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 14, "type": "VAELoader", "pos": [134.53144605616137, -165.18194011768782], "size": [305.9244791666667, 68.88020833333334], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 257}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [990.6585475753939, -319.91444852782104], "size": [355.1953125, 325.98958333333337], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 18, "type": "DisableNoise", "pos": [730.4769792883567, -320.00005408445816], "size": [210, 40], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 19, "type": "BasicGuider", "pos": [730.2630921572128, -251.22541185314978], "size": [210, 60], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [488.64457755981744, -147.67201223931278], "size": [210, 122.21354166666667], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}, {"id": 21, "type": "KSamplerSelect", "pos": [730.2630921572128, -161.22540847287118], "size": [210, 68.88020833333334], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 22, "type": "ImageInvert", "pos": [1373.3333333333335, -318.33333333333337], "size": [210, 40], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [730.2630921572128, 38.774608428522015], "size": [210, 60], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [730.2630921572128, -61.225357768691524], "size": [210, 66.66666666666667], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 68, "type": "LotusConditioning", "pos": [489.99998478874613, -229.99996619721344], "size": [210, 40], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "widget_ue_connectable": {}}, "widgets_values": []}], "groups": [{"id": 2, "title": "Models", "bounding": [123.33333333333334, -351.6666666666667, 323.4014831310574, 263.55972005884377], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 256, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 257, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"ds": {"scale": 1.2354281696404266, "offset": [-114.15605447786857, -754.3368938705543]}, "workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 0.7886233956111374, "offset": [741.6589462093539, -3278.0806447095165]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} +{ + "id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", + "revision": 0, + "last_node_id": 76, + "last_link_id": 259, + "nodes": [ + { + "id": 13, + "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", + "pos": [ + 400, + 3630 + ], + "size": [ + 400, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "control image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": null + }, + { + "label": "lotus_model", + "name": "unet_name_1", + "type": "COMBO", + "widget": { + "name": "unet_name_1" + }, + "link": null + }, + { + "label": "sd15_vae", + "name": "vae_name_1", + "type": "COMBO", + "widget": { + "name": "vae_name_1" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "name" + ], + [ + "-1", + "unet_name_1" + ], + [ + "-1", + "vae_name_1" + ], + [ + "7", + "control_after_generate" + ], + [ + "7", + "seed" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + "z_image_turbo_bf16.safetensors", + "qwen_3_4b.safetensors", + "ae.safetensors", + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "lotus-depth-d-v1-1.safetensors", + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 76, + "lastLinkId": 259, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Depth to Image (Z-Image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + 27.60368520069494, + 4936.043696127976, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1598.6038576146689, + 4936.043696127976, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 25 + ], + "label": "control image", + "pos": [ + 127.60368520069494, + 4956.043696127976 + ] + }, + { + "id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", + "name": "text", + "type": "STRING", + "linkIds": [ + 16 + ], + "label": "prompt", + "pos": [ + 127.60368520069494, + 4976.043696127976 + ] + }, + { + "id": "add4a703-1185-4848-9494-b27dd37ff434", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 252 + ], + "pos": [ + 127.60368520069494, + 4996.043696127976 + ] + }, + { + "id": "03233f9e-df65-4e05-b5c5-34d83129e85e", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 253 + ], + "pos": [ + 127.60368520069494, + 5016.043696127976 + ] + }, + { + "id": "0c643ffb-326d-40ca-8a89-ebc585cf5015", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 254 + ], + "pos": [ + 127.60368520069494, + 5036.043696127976 + ] + }, + { + "id": "409cdebe-632b-410f-a66c-711c2a1527e1", + "name": "name", + "type": "COMBO", + "linkIds": [ + 255 + ], + "pos": [ + 127.60368520069494, + 5056.043696127976 + ] + }, + { + "id": "80e6915f-5d59-4d6b-a197-d8c565ad2922", + "name": "unet_name_1", + "type": "COMBO", + "linkIds": [ + 258 + ], + "label": "lotus_model", + "pos": [ + 127.60368520069494, + 5076.043696127976 + ] + }, + { + "id": "4207ec84-4409-4816-8444-76062bf6310c", + "name": "vae_name_1", + "type": "COMBO", + "linkIds": [ + 259 + ], + "label": "sd15_vae", + "pos": [ + 127.60368520069494, + 5096.043696127976 + ] + } + ], + "outputs": [ + { + "id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 18 + ], + "pos": [ + 1618.6038576146689, + 4956.043696127976 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "CLIPLoader", + "pos": [ + 228.60381716506714, + 4700.188262345759 + ], + "size": [ + 269.9479166666667, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 253 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 14 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 2, + "type": "UNETLoader", + "pos": [ + 228.60381716506714, + 4550.188402733727 + ], + "size": [ + 269.9479166666667, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 252 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 3, + "type": "VAELoader", + "pos": [ + 228.60381716506714, + 4880.188283008492 + ], + "size": [ + 269.9479166666667, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 254 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 2, + 11 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 4, + "type": "ModelPatchLoader", + "pos": [ + 228.60381716506714, + 5010.1883654774 + ], + "size": [ + 269.9479166666667, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "name", + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": 255 + } + ], + "outputs": [ + { + "localized_name": "MODEL_PATCH", + "name": "MODEL_PATCH", + "type": "MODEL_PATCH", + "links": [ + 10 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ModelPatchLoader", + "models": [ + { + "name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "directory": "model_patches" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + }, + { + "id": 6, + "type": "ModelSamplingAuraFlow", + "pos": [ + 998.6041081931173, + 4490.1880693746825 + ], + "size": [ + 289.97395833333337, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 3 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 4 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 7, + "type": "KSampler", + "pos": [ + 998.6041081931173, + 4600.188363442829 + ], + "size": [ + 300, + 262 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 4 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 5 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 7 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 1 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 9, + 1, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "ConditioningZeroOut", + "pos": [ + 748.2706508086186, + 5044.854997097082 + ], + "size": [ + 204.134765625, + 26 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 6 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 10, + "type": "EmptySD3LatentImage", + "pos": [ + 1028.2702326451792, + 5334.855683329977 + ], + "size": [ + 259.9479166666667, + 106 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 12 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 13 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 5, + "type": "VAEDecode", + "pos": [ + 1338.604012131086, + 4500.188453282262 + ], + "size": [ + 200, + 46 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 1 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 2 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 18 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 9, + "type": "QwenImageDiffsynthControlnet", + "pos": [ + 608.2704996459613, + 5204.85528564724 + ], + "size": [ + 289.97395833333337, + 138 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 9 + }, + { + "localized_name": "model_patch", + "name": "model_patch", + "type": "MODEL_PATCH", + "link": 10 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 11 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 248 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 3 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "QwenImageDiffsynthControlnet", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 11, + "type": "GetImageSize", + "pos": [ + 530, + 5440 + ], + "size": [ + 140, + 66 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 247 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 12 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 13 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 12, + "type": "CLIPTextEncode", + "pos": [ + 548.2706278500244, + 4544.854827124228 + ], + "size": [ + 400, + 420 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 14 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 16 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 5, + 8 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 14, + "type": "ImageScaleToTotalPixels", + "pos": [ + 90, + 5180 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 25 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "megapixels", + "name": "megapixels", + "type": "FLOAT", + "widget": { + "name": "megapixels" + }, + "link": null + }, + { + "localized_name": "resolution_steps", + "name": "resolution_steps", + "type": "INT", + "widget": { + "name": "resolution_steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 248, + 250 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "ImageScaleToTotalPixels" + }, + "widgets_values": [ + "lanczos", + 1, + 1 + ] + }, + { + "id": 15, + "type": "PreviewImage", + "pos": [ + 90, + 5530 + ], + "size": [ + 380, + 260 + ], + "flags": {}, + "order": 13, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 251 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "PreviewImage" + }, + "widgets_values": [] + }, + { + "id": 76, + "type": "458bdf3c-4b58-421c-af50-c9c663a4d74c", + "pos": [ + 90, + 5340 + ], + "size": [ + 400, + 150 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 250 + }, + { + "label": "depth_intensity", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 258 + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 259 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 247, + 251 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "sigma" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.14.1" + }, + "widgets_values": [ + 999.0000000000002, + "lotus-depth-d-v1-1.safetensors", + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt", + "bounding": [ + 530, + 4470, + 440, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Models", + "bounding": [ + 210, + 4470, + 300, + 640 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Apple ControlNet", + "bounding": [ + 530, + 5120, + 440, + 260 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 1, + "origin_id": 7, + "origin_slot": 0, + "target_id": 5, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 2, + "origin_id": 3, + "origin_slot": 0, + "target_id": 5, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 3, + "origin_id": 9, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 4, + "origin_id": 6, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 5, + "origin_id": 12, + "origin_slot": 0, + "target_id": 7, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 6, + "origin_id": 8, + "origin_slot": 0, + "target_id": 7, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 7, + "origin_id": 10, + "origin_slot": 0, + "target_id": 7, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 8, + "origin_id": 12, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 9, + "origin_id": 2, + "origin_slot": 0, + "target_id": 9, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 10, + "origin_id": 4, + "origin_slot": 0, + "target_id": 9, + "target_slot": 1, + "type": "MODEL_PATCH" + }, + { + "id": 11, + "origin_id": 3, + "origin_slot": 0, + "target_id": 9, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 12, + "origin_id": 11, + "origin_slot": 0, + "target_id": 10, + "target_slot": 0, + "type": "INT" + }, + { + "id": 13, + "origin_id": 11, + "origin_slot": 1, + "target_id": 10, + "target_slot": 1, + "type": "INT" + }, + { + "id": 14, + "origin_id": 1, + "origin_slot": 0, + "target_id": 12, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 16, + "origin_id": -10, + "origin_slot": 1, + "target_id": 12, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 18, + "origin_id": 5, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 25, + "origin_id": -10, + "origin_slot": 0, + "target_id": 14, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 247, + "origin_id": 76, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 248, + "origin_id": 14, + "origin_slot": 0, + "target_id": 9, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 250, + "origin_id": 14, + "origin_slot": 0, + "target_id": 76, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 251, + "origin_id": 76, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 252, + "origin_id": -10, + "origin_slot": 2, + "target_id": 2, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 253, + "origin_id": -10, + "origin_slot": 3, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 254, + "origin_id": -10, + "origin_slot": 4, + "target_id": 3, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 255, + "origin_id": -10, + "origin_slot": 5, + "target_id": 4, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 258, + "origin_id": -10, + "origin_slot": 6, + "target_id": 76, + "target_slot": 2, + "type": "COMBO" + }, + { + "id": 259, + "origin_id": -10, + "origin_slot": 7, + "target_id": 76, + "target_slot": 3, + "type": "COMBO" + } + ], + "extra": { + "ds": { + "scale": 1.3889423076923078, + "offset": [ + 22.056074766355096, + -3503.3333333333335 + ] + }, + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "category": "Image generation and editing/Depth to image", + "description": "Generates an image from a depth map using Z-Image-Turbo with text conditioning." + }, + { + "id": "458bdf3c-4b58-421c-af50-c9c663a4d74c", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 76, + "lastLinkId": 259, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Depth Map (Lotus)", + "inputNode": { + "id": -10, + "bounding": [ + -60, + -172.61268043518066, + 126.625, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1650, + -172.61268043518066, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 37 + ], + "localized_name": "pixels", + "pos": [ + 46.625, + -152.61268043518066 + ] + }, + { + "id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", + "name": "sigma", + "type": "FLOAT", + "linkIds": [ + 243 + ], + "label": "depth_intensity", + "pos": [ + 46.625, + -132.61268043518066 + ] + }, + { + "id": "d721b249-fd2a-441b-9a78-2805f04e2644", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 256 + ], + "pos": [ + 46.625, + -112.61268043518066 + ] + }, + { + "id": "0430e2ea-f8b5-4191-9b72-b7d62176f97c", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 257 + ], + "pos": [ + 46.625, + -92.61268043518066 + ] + } + ], + "outputs": [ + { + "id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 242 + ], + "localized_name": "IMAGE", + "pos": [ + 1670, + -152.61268043518066 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1380.0000135211146, + -240.0000135211144 + ], + "size": [ + 210, + 60 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 232 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 240 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEDecode", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 10, + "type": "UNETLoader", + "pos": [ + 135.34178335388546, + -290.1947851765315 + ], + "size": [ + 305.9244791666667, + 97.7734375 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 256 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 31, + 241 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "lotus-depth-d-v1-1.safetensors", + "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", + "directory": "diffusion_models" + } + ], + "widget_ue_connectable": {} + }, + "widgets_values": [ + "lotus-depth-d-v1-1.safetensors", + "default" + ] + }, + { + "id": 14, + "type": "VAELoader", + "pos": [ + 134.53144605616137, + -165.18194011768782 + ], + "size": [ + 305.9244791666667, + 68.88020833333334 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 257 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 38, + 240 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "vae-ft-mse-840000-ema-pruned.safetensors", + "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", + "directory": "vae" + } + ], + "widget_ue_connectable": {} + }, + "widgets_values": [ + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + }, + { + "id": 16, + "type": "SamplerCustomAdvanced", + "pos": [ + 990.6585475753939, + -319.91444852782104 + ], + "size": [ + 355.1953125, + 325.98958333333337 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 237 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 27 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 33 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 194 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 201 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 232 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SamplerCustomAdvanced", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 18, + "type": "DisableNoise", + "pos": [ + 730.4769792883567, + -320.00005408445816 + ], + "size": [ + 210, + 40 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [ + 237 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "DisableNoise", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 19, + "type": "BasicGuider", + "pos": [ + 730.2630921572128, + -251.22541185314978 + ], + "size": [ + 210, + 60 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 241 + }, + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 238 + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicGuider", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 20, + "type": "BasicScheduler", + "pos": [ + 488.64457755981744, + -147.67201223931278 + ], + "size": [ + 210, + 122.21354166666667 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 31 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 66 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicScheduler", + "widget_ue_connectable": {} + }, + "widgets_values": [ + "normal", + 1, + 1 + ] + }, + { + "id": 21, + "type": "KSamplerSelect", + "pos": [ + 730.2630921572128, + -161.22540847287118 + ], + "size": [ + 210, + 68.88020833333334 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "KSamplerSelect", + "widget_ue_connectable": {} + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 22, + "type": "ImageInvert", + "pos": [ + 1373.3333333333335, + -318.33333333333337 + ], + "size": [ + 210, + 40 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 35 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 242 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "ImageInvert", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 23, + "type": "VAEEncode", + "pos": [ + 730.2630921572128, + 38.774608428522015 + ], + "size": [ + 210, + 60 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 37 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 38 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 201 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEEncode", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SetFirstSigma", + "pos": [ + 730.2630921572128, + -61.225357768691524 + ], + "size": [ + 210, + 66.66666666666667 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 66 + }, + { + "localized_name": "sigma", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": 243 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 194 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SetFirstSigma", + "widget_ue_connectable": {} + }, + "widgets_values": [ + 999.0000000000002 + ] + }, + { + "id": 68, + "type": "LotusConditioning", + "pos": [ + 489.99998478874613, + -229.99996619721344 + ], + "size": [ + 210, + 40 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 238 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "LotusConditioning", + "widget_ue_connectable": {} + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 2, + "title": "Models", + "bounding": [ + 123.33333333333334, + -351.6666666666667, + 323.4014831310574, + 263.55972005884377 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 232, + "origin_id": 16, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 240, + "origin_id": 14, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 237, + "origin_id": 18, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 27, + "origin_id": 19, + "origin_slot": 0, + "target_id": 16, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 33, + "origin_id": 21, + "origin_slot": 0, + "target_id": 16, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 194, + "origin_id": 28, + "origin_slot": 0, + "target_id": 16, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 201, + "origin_id": 23, + "origin_slot": 0, + "target_id": 16, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 241, + "origin_id": 10, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 238, + "origin_id": 68, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 31, + "origin_id": 10, + "origin_slot": 0, + "target_id": 20, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 35, + "origin_id": 8, + "origin_slot": 0, + "target_id": 22, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 38, + "origin_id": 14, + "origin_slot": 0, + "target_id": 23, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 66, + "origin_id": 20, + "origin_slot": 0, + "target_id": 28, + "target_slot": 0, + "type": "SIGMAS" + }, + { + "id": 37, + "origin_id": -10, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 242, + "origin_id": 22, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 243, + "origin_id": -10, + "origin_slot": 1, + "target_id": 28, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 256, + "origin_id": -10, + "origin_slot": 2, + "target_id": 10, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 257, + "origin_id": -10, + "origin_slot": 3, + "target_id": 14, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "ds": { + "scale": 1.2354281696404266, + "offset": [ + -114.15605447786857, + -754.3368938705543 + ] + }, + "workflowRendererVersion": "LG" + }, + "description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 0.7886233956111374, + "offset": [ + 741.6589462093539, + -3278.0806447095165 + ] + }, + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Depth to Video (ltx 2.0).json b/blueprints/Depth to Video (ltx 2.0).json index 9656b6253..bd51e4476 100644 --- a/blueprints/Depth to Video (ltx 2.0).json +++ b/blueprints/Depth to Video (ltx 2.0).json @@ -1 +1,5213 @@ -{"id": "ec176c82-4db5-4ab9-b5a0-8aa8e5684a81", "revision": 0, "last_node_id": 191, "last_link_id": 433, "nodes": [{"id": 143, "type": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", "pos": [289.99998661973035, 3960.0002084505168], "size": [400, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "image_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "depth reference video", "name": "video", "type": "VIDEO", "link": null}, {"label": "first frame", "name": "image_2", "type": "IMAGE", "link": null}, {"label": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": null}, {"label": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": null}, {"label": "distill_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"label": "lotus_depth_model", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"label": "sd15_vae", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "bypass"], ["-1", "strength"], ["-1", "resize_type.width"], ["-1", "resize_type.height"], ["-1", "length"], ["126", "noise_seed"], ["143", "control_after_generate"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "text_encoder"], ["-1", "lora_name_1"], ["-1", "model_name"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", false, 1, 1280, 720, 121, null, null, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-depth-control.safetensors", "gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-distilled-lora-384.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors", "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 191, "lastLinkId": 433, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Depth to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2730, 4020, 165.30859375, 340]}, "outputNode": {"id": -20, "bounding": [1750, 4090, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "label": "prompt", "pos": [-2584.69140625, 4040]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "image_strength", "pos": [-2584.69140625, 4060]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2584.69140625, 4080]}, {"id": "de434962-832a-485c-a016-869b3f2176ca", "name": "video", "type": "VIDEO", "linkIds": [419], "label": "depth reference video", "pos": [-2584.69140625, 4100]}, {"id": "a1189d3d-bbff-4933-875d-cffa58dd4cb0", "name": "image_2", "type": "IMAGE", "linkIds": [410], "label": "first frame", "pos": [-2584.69140625, 4120]}, {"id": "577dae4c-447b-4c84-9973-56381fdbc6a9", "name": "resize_type.width", "type": "INT", "linkIds": [420], "label": "width", "pos": [-2584.69140625, 4140]}, {"id": "fb30c570-128c-46b8-a140-054aff294edc", "name": "resize_type.height", "type": "INT", "linkIds": [421], "label": "height", "pos": [-2584.69140625, 4160]}, {"id": "33d5f598-00ae-4e2d-8eb2-2da23ae5ba46", "name": "length", "type": "INT", "linkIds": [422], "pos": [-2584.69140625, 4180]}, {"id": "68cc58b0-2013-4c3a-81ff-3d1e86232d76", "name": "ckpt_name", "type": "COMBO", "linkIds": [425, 433], "pos": [-2584.69140625, 4200]}, {"id": "0c65a06b-e12a-4298-8d81-69e57a123188", "name": "lora_name", "type": "COMBO", "linkIds": [426], "pos": [-2584.69140625, 4220]}, {"id": "eba96545-b8c6-4fba-b086-ddeeb4a9130d", "name": "text_encoder", "type": "COMBO", "linkIds": [427], "pos": [-2584.69140625, 4240]}, {"id": "848f9d82-3fde-4b95-b226-4b0db7082112", "name": "lora_name_1", "type": "COMBO", "linkIds": [429], "label": "distill_lora", "pos": [-2584.69140625, 4260]}, {"id": "32ace7dd-4da8-416b-b1e3-00652b3e6838", "name": "model_name", "type": "COMBO", "linkIds": [430], "pos": [-2584.69140625, 4280]}, {"id": "d6ad1978-71b6-425b-be13-c8f1e1d798d9", "name": "unet_name", "type": "COMBO", "linkIds": [431], "label": "lotus_depth_model", "pos": [-2584.69140625, 4300]}, {"id": "b0545a5d-65e8-4baa-a7be-d5f3d2b8b6e3", "name": "vae_name", "type": "COMBO", "linkIds": [432], "label": "sd15_vae", "pos": [-2584.69140625, 4320]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770, 4110]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-697.9999467324425, 3670.0001318308678], "size": [270, 106.66666666666667], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-697.9999467324425, 3840.0000630985346], "size": [270, 68.88020833333334], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [409.9999946478922, 3850.0001667604133], "size": [270, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [409.9999946478922, 4100.000194929402], "size": [270, 110], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [409.9999946478922, 3700.00007661965], "size": [270, 106.66666666666667], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1100.000003380279, 4810.000230985708], "size": [270, 120], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 285}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": 329}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-387.99998321128277, 3520.0000416901034], "size": [213.125, 120], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1134.000099492868, 4140.000243380063], "size": [270, 86.66666666666667], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1164.0000442816504, 3880.0001115491955], "size": [400, 200], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-519.9999874648, 4700.000189295605], "size": [187.5, 60], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-393.9999813239605, 3800.0000146478747], "size": [240, 60], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.9999569014471, 3514.0002724504593], "size": [410, 320], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [409.9999946478922, 3980.00004957742], "size": [270, 68.88020833333334], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 95, "type": "LTXVScheduler", "pos": [-699.9999766197394, 3980.00004957742], "size": [270, 170], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-697.9999467324425, 3520.0000416901034], "size": [270, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [709.9999918309934, 3570.000193802643], "size": [212.3828125, 120], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [409.9999946478922, 3570.000193802643], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1129.9999512676497, 3840.0000630985346], "size": [240, 60], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 340}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1650.0000287323687, 3760.0003323940673], "size": [420, 95.546875], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 426}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-depth-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Depth-Control/resolve/main/ltx-2-19b-ic-lora-depth-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-depth-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730.0000160563236, 3730.0000214084316], "size": [193.2916015625, 60], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1119.9999391549845, 3640.000187042085], "size": [270, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1129.9999512676497, 3530.000145351982], "size": [240, 60], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1630.0000045070383, 4620.0000923942835], "size": [270, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1630.0000045070383, 4749.99997521129], "size": [270, 66.66666666666667], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1100.000003380279, 4609.999988732406], "size": [270, 146.66666666666669], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 330}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1089.9999912676137, 4400.000009014077], "size": [270, 151.9921875], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 417}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 118, "type": "Reroute", "pos": [-229.99999095071237, 4210.000236619506], "size": [75, 26], "flags": {}, "order": 27, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-19.999999788732577, 4070.0002501406198], "size": [270, 181.9921875], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 410}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-9.999999119719098, 3840.0000630985346], "size": [240, 80], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-9.999999119719098, 3960.0002084505168], "size": [260, 80], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 132, "type": "LTXVAddGuide", "pos": [-599.9999928169079, 4420.000216337834], "size": [270, 209.16666666666669], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 418}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 96, "type": "LTXVAudioVAELoader", "pos": [-1650.0000287323687, 3910.000056337978], "size": [420, 68.88020833333334], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 377}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [285, 340]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVAudioVAELoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1650.0000287323687, 3590.0000349295465], "size": [420, 108.88020833333334], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 425}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-69.99999741197416, 3570.000193802643], "size": [390, 95.546875], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 429}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-69.99999741197416, 3700.00007661965], "size": [390, 68.88020833333334], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 430}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 110, "type": "GetImageSize", "pos": [-1630.0000045070383, 4450.000161126616], "size": [260, 80], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 416}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [329, 330]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1419.9999363380857, 3760.0003323940673], "size": [270, 86.66666666666667], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 187, "type": "ImageFromBatch", "pos": [-2310.000095774562, 3689.999972957771], "size": [260, 93.33333333333334], "flags": {}, "order": 39, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 412}, {"localized_name": "batch_index", "name": "batch_index", "type": "INT", "widget": {"name": "batch_index"}, "link": null}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 422}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [415]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ImageFromBatch", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 121]}, {"id": 191, "type": "ResizeImageMaskNode", "pos": [-2320.0000163380137, 3850.0001667604133], "size": [284.375, 154], "flags": {}, "order": 43, "mode": 0, "inputs": [{"localized_name": "input", "name": "input", "type": "IMAGE,MASK", "link": 415}, {"localized_name": "resize_type", "name": "resize_type", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "resize_type"}, "link": null}, {"localized_name": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": 420}, {"localized_name": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": 421}, {"localized_name": "crop", "name": "resize_type.crop", "type": "COMBO", "widget": {"name": "resize_type.crop"}, "link": null}, {"localized_name": "scale_method", "name": "scale_method", "type": "COMBO", "widget": {"name": "scale_method"}, "link": null}], "outputs": [{"localized_name": "resized", "name": "resized", "type": "IMAGE", "links": [413]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ResizeImageMaskNode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["scale dimensions", 1280, 720, "center", "lanczos"]}, {"id": 188, "type": "GetVideoComponents", "pos": [-2320.0000163380137, 3520.0000416901034], "size": [280, 80], "flags": {"collapsed": false}, "order": 40, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 419}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [412]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": []}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GetVideoComponents", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 189, "type": "ImageScaleBy", "pos": [-1990.0000743661303, 3670.0001318308678], "size": [280, 125.546875], "flags": {}, "order": 41, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 413}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "scale_by", "name": "scale_by", "type": "FLOAT", "widget": {"name": "scale_by"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [414]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ImageScaleBy", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["lanczos", 0.5]}, {"id": 154, "type": "MarkdownNote", "pos": [-1659.9999492958204, 4870.000120563272], "size": [350, 170], "flags": {"collapsed": false}, "order": 7, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#222", "bgcolor": "#000"}, {"id": 190, "type": "38b60539-50a7-42f9-a5fe-bdeca26272e2", "pos": [-1999.9999949295823, 3910.000056337978], "size": [310, 106], "flags": {}, "order": 42, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 414}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 431}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 432}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [416, 417, 418]}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"], "color": "#322", "bgcolor": "#533"}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1650.0000287323687, 4040.0003053518376], "size": [420, 124.44010416666667], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 427}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 433}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1640, 4550, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 16, "title": "Video Preprocess", "bounding": [-2330, 3450, 650, 567.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 15, "title": "video length", "bounding": [-2320, 3620, 290, 180], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 285, "origin_id": 96, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 329, "origin_id": 110, "origin_slot": 2, "target_id": 111, "target_slot": 1, "type": "INT"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 330, "origin_id": 110, "origin_slot": 2, "target_id": 115, "target_slot": 2, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 340, "origin_id": 96, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 1, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 1, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 377, "origin_id": -10, "origin_slot": 6, "target_id": 96, "target_slot": 0, "type": "COMBO"}, {"id": 410, "origin_id": -10, "origin_slot": 4, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 412, "origin_id": 188, "origin_slot": 0, "target_id": 187, "target_slot": 0, "type": "IMAGE"}, {"id": 413, "origin_id": 191, "origin_slot": 0, "target_id": 189, "target_slot": 0, "type": "IMAGE"}, {"id": 414, "origin_id": 189, "origin_slot": 0, "target_id": 190, "target_slot": 0, "type": "IMAGE"}, {"id": 415, "origin_id": 187, "origin_slot": 0, "target_id": 191, "target_slot": 0, "type": "IMAGE"}, {"id": 416, "origin_id": 190, "origin_slot": 0, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 417, "origin_id": 190, "origin_slot": 0, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 418, "origin_id": 190, "origin_slot": 0, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 419, "origin_id": -10, "origin_slot": 3, "target_id": 188, "target_slot": 0, "type": "VIDEO"}, {"id": 420, "origin_id": -10, "origin_slot": 5, "target_id": 191, "target_slot": 2, "type": "INT"}, {"id": 421, "origin_id": -10, "origin_slot": 6, "target_id": 191, "target_slot": 3, "type": "INT"}, {"id": 422, "origin_id": -10, "origin_slot": 7, "target_id": 187, "target_slot": 2, "type": "INT"}, {"id": 425, "origin_id": -10, "origin_slot": 8, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 426, "origin_id": -10, "origin_slot": 9, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 427, "origin_id": -10, "origin_slot": 10, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 429, "origin_id": -10, "origin_slot": 11, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 430, "origin_id": -10, "origin_slot": 12, "target_id": 100, "target_slot": 0, "type": "COMBO"}, {"id": 431, "origin_id": -10, "origin_slot": 13, "target_id": 190, "target_slot": 2, "type": "COMBO"}, {"id": 432, "origin_id": -10, "origin_slot": 14, "target_id": 190, "target_slot": 3, "type": "COMBO"}, {"id": 433, "origin_id": -10, "origin_slot": 8, "target_id": 97, "target_slot": 1, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Depth to video"}, {"id": "38b60539-50a7-42f9-a5fe-bdeca26272e2", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 191, "lastLinkId": 433, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "374bfecc-34bb-47f9-82b6-cbe9383f8756", "name": "unet_name", "type": "COMBO", "linkIds": [423], "pos": [46.625, -112.61268043518066]}, {"id": "bb8707a1-46c3-44be-a15a-0adc908d871d", "name": "vae_name", "type": "COMBO", "linkIds": [424], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 8, "type": "VAEDecode", "pos": [1380, -240], "size": [210, 46], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 10, "type": "UNETLoader", "pos": [135.34181213378906, -290.1947937011719], "size": [305.93701171875, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 423}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 14, "type": "VAELoader", "pos": [134.531494140625, -165.18197631835938], "size": [305.93701171875, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 424}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [990.6585693359375, -319.9144287109375], "size": [355.20001220703125, 326], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 18, "type": "DisableNoise", "pos": [730.47705078125, -320], "size": [210, 26], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 19, "type": "BasicGuider", "pos": [730.2631225585938, -251.22537231445312], "size": [210, 46], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [488.64459228515625, -147.67201232910156], "size": [210, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}, {"id": 21, "type": "KSamplerSelect", "pos": [730.2631225585938, -161.22537231445312], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 22, "type": "ImageInvert", "pos": [1380, -310], "size": [210, 26], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [730.2631225585938, 38.77463912963867], "size": [210, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [730.2631225585938, -61.22536087036133], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 68, "type": "LotusConditioning", "pos": [490, -230], "size": [210, 26], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}], "groups": [{"id": 1, "title": "Load Models", "bounding": [120, -370, 335, 281.6000061035156], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 423, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 424, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 1.313181818181818, "offset": [271.9196871428176, -3845.0123774536323]}, "workflowRendererVersion": "LG"}, "version": 0.4} +{ + "id": "ec176c82-4db5-4ab9-b5a0-8aa8e5684a81", + "revision": 0, + "last_node_id": 191, + "last_link_id": 433, + "nodes": [ + { + "id": 143, + "type": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", + "pos": [ + 289.99998661973035, + 3960.0002084505168 + ], + "size": [ + 400, + 500 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "image_strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "label": "disable_first_frame", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": null + }, + { + "label": "depth reference video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "first frame", + "name": "image_2", + "type": "IMAGE", + "link": null + }, + { + "label": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": null + }, + { + "label": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": null + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + }, + { + "label": "distill_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + }, + { + "label": "lotus_depth_model", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "sd15_vae", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "bypass" + ], + [ + "-1", + "strength" + ], + [ + "-1", + "resize_type.width" + ], + [ + "-1", + "resize_type.height" + ], + [ + "-1", + "length" + ], + [ + "126", + "noise_seed" + ], + [ + "143", + "control_after_generate" + ], + [ + "-1", + "ckpt_name" + ], + [ + "-1", + "lora_name" + ], + [ + "-1", + "text_encoder" + ], + [ + "-1", + "lora_name_1" + ], + [ + "-1", + "model_name" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.7.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + false, + 1, + 1280, + 720, + 121, + null, + null, + "ltx-2-19b-dev-fp8.safetensors", + "ltx-2-19b-ic-lora-depth-control.safetensors", + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2-19b-distilled-lora-384.safetensors", + "ltx-2-spatial-upscaler-x2-1.0.safetensors", + "lotus-depth-d-v1-1.safetensors", + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", + "version": 1, + "state": { + "lastGroupId": 16, + "lastNodeId": 191, + "lastLinkId": 433, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Depth to Video (LTX 2.0)", + "inputNode": { + "id": -10, + "bounding": [ + -2730, + 4020, + 165.30859375, + 340 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1750, + 4090, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", + "name": "text", + "type": "STRING", + "linkIds": [ + 345 + ], + "label": "prompt", + "pos": [ + -2584.69140625, + 4040 + ] + }, + { + "id": "59430efe-1090-4e36-8afe-b21ce7f4268b", + "name": "strength", + "type": "FLOAT", + "linkIds": [ + 370, + 371 + ], + "label": "image_strength", + "pos": [ + -2584.69140625, + 4060 + ] + }, + { + "id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", + "name": "bypass", + "type": "BOOLEAN", + "linkIds": [ + 363, + 368 + ], + "label": "disable_first_frame", + "pos": [ + -2584.69140625, + 4080 + ] + }, + { + "id": "de434962-832a-485c-a016-869b3f2176ca", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 419 + ], + "label": "depth reference video", + "pos": [ + -2584.69140625, + 4100 + ] + }, + { + "id": "a1189d3d-bbff-4933-875d-cffa58dd4cb0", + "name": "image_2", + "type": "IMAGE", + "linkIds": [ + 410 + ], + "label": "first frame", + "pos": [ + -2584.69140625, + 4120 + ] + }, + { + "id": "577dae4c-447b-4c84-9973-56381fdbc6a9", + "name": "resize_type.width", + "type": "INT", + "linkIds": [ + 420 + ], + "label": "width", + "pos": [ + -2584.69140625, + 4140 + ] + }, + { + "id": "fb30c570-128c-46b8-a140-054aff294edc", + "name": "resize_type.height", + "type": "INT", + "linkIds": [ + 421 + ], + "label": "height", + "pos": [ + -2584.69140625, + 4160 + ] + }, + { + "id": "33d5f598-00ae-4e2d-8eb2-2da23ae5ba46", + "name": "length", + "type": "INT", + "linkIds": [ + 422 + ], + "pos": [ + -2584.69140625, + 4180 + ] + }, + { + "id": "68cc58b0-2013-4c3a-81ff-3d1e86232d76", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 425, + 433 + ], + "pos": [ + -2584.69140625, + 4200 + ] + }, + { + "id": "0c65a06b-e12a-4298-8d81-69e57a123188", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 426 + ], + "pos": [ + -2584.69140625, + 4220 + ] + }, + { + "id": "eba96545-b8c6-4fba-b086-ddeeb4a9130d", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 427 + ], + "pos": [ + -2584.69140625, + 4240 + ] + }, + { + "id": "848f9d82-3fde-4b95-b226-4b0db7082112", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 429 + ], + "label": "distill_lora", + "pos": [ + -2584.69140625, + 4260 + ] + }, + { + "id": "32ace7dd-4da8-416b-b1e3-00652b3e6838", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 430 + ], + "pos": [ + -2584.69140625, + 4280 + ] + }, + { + "id": "d6ad1978-71b6-425b-be13-c8f1e1d798d9", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 431 + ], + "label": "lotus_depth_model", + "pos": [ + -2584.69140625, + 4300 + ] + }, + { + "id": "b0545a5d-65e8-4baa-a7be-d5f3d2b8b6e3", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 432 + ], + "label": "sd15_vae", + "pos": [ + -2584.69140625, + 4320 + ] + } + ], + "outputs": [ + { + "id": "4e837941-de2d-4df8-8f94-686e24036897", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 304 + ], + "localized_name": "VIDEO", + "pos": [ + 1770, + 4110 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 93, + "type": "CFGGuider", + "pos": [ + -697.9999467324425, + 3670.0001318308678 + ], + "size": [ + 270, + 106.66666666666667 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 326 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 309 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 311 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 261 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 94, + "type": "KSamplerSelect", + "pos": [ + -697.9999467324425, + 3840.0000630985346 + ], + "size": [ + 270, + 68.88020833333334 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 262 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 99, + "type": "ManualSigmas", + "pos": [ + 409.9999946478922, + 3850.0001667604133 + ], + "size": [ + 270, + 70 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 278 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 101, + "type": "LTXVConcatAVLatent", + "pos": [ + 409.9999946478922, + 4100.000194929402 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 365 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 279 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 108, + "type": "CFGGuider", + "pos": [ + 409.9999946478922, + 3700.00007661965 + ], + "size": [ + 270, + 106.66666666666667 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 280 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 281 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 282 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 276 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 111, + "type": "LTXVEmptyLatentAudio", + "pos": [ + -1100.000003380279, + 4810.000230985708 + ], + "size": [ + 270, + 120 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 285 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 329 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 354 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 300 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 123, + "type": "SamplerCustomAdvanced", + "pos": [ + -387.99998321128277, + 3520.0000416901034 + ], + "size": [ + 213.125, + 120 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 260 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 261 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 262 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 263 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 323 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 272 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 114, + "type": "LTXVConditioning", + "pos": [ + -1134.000099492868, + 4140.000243380063 + ], + "size": [ + 270, + 86.66666666666667 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 292 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 293 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 355 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 313 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 314 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 119, + "type": "CLIPTextEncode", + "pos": [ + -1164.0000442816504, + 3880.0001115491955 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 294 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 293 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 116, + "type": "LTXVConcatAVLatent", + "pos": [ + -519.9999874648, + 4700.000189295605 + ], + "size": [ + 187.5, + 60 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 324 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 300 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 322, + 323 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 122, + "type": "LTXVSeparateAVLatent", + "pos": [ + -393.9999813239605, + 3800.0000146478747 + ], + "size": [ + 240, + 60 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 270 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 266 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 124, + "type": "CLIPTextEncode", + "pos": [ + -1174.9999569014471, + 3514.0002724504593 + ], + "size": [ + 410, + 320 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 295 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 345 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 292 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 98, + "type": "KSamplerSelect", + "pos": [ + 409.9999946478922, + 3980.00004957742 + ], + "size": [ + 270, + 68.88020833333334 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 277 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "gradient_estimation" + ] + }, + { + "id": 95, + "type": "LTXVScheduler", + "pos": [ + -699.9999766197394, + 3980.00004957742 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 322 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "max_shift", + "name": "max_shift", + "type": "FLOAT", + "widget": { + "name": "max_shift" + }, + "link": null + }, + { + "localized_name": "base_shift", + "name": "base_shift", + "type": "FLOAT", + "widget": { + "name": "base_shift" + }, + "link": null + }, + { + "localized_name": "stretch", + "name": "stretch", + "type": "BOOLEAN", + "widget": { + "name": "stretch" + }, + "link": null + }, + { + "localized_name": "terminal", + "name": "terminal", + "type": "FLOAT", + "widget": { + "name": "terminal" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 263 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVScheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 126, + "type": "RandomNoise", + "pos": [ + -697.9999467324425, + 3520.0000416901034 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 260 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 107, + "type": "SamplerCustomAdvanced", + "pos": [ + 709.9999918309934, + 3570.000193802643 + ], + "size": [ + 212.3828125, + 120 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 347 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 276 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 277 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 278 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 279 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 336 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 143, + "type": "RandomNoise", + "pos": [ + 409.9999946478922, + 3570.000193802643 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 347 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize" + ] + }, + { + "id": 139, + "type": "LTXVAudioVAEDecode", + "pos": [ + 1129.9999512676497, + 3840.0000630985346 + ], + "size": [ + 240, + 60 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 338 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 340 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 339 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 134, + "type": "LoraLoaderModelOnly", + "pos": [ + -1650.0000287323687, + 3760.0003323940673 + ], + "size": [ + 420, + 95.546875 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 325 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 426 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 326, + 327 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "ltx-2-19b-ic-lora-depth-control.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Depth-Control/resolve/main/ltx-2-19b-ic-lora-depth-control.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-ic-lora-depth-control.safetensors", + 1 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 138, + "type": "LTXVSeparateAVLatent", + "pos": [ + 730.0000160563236, + 3730.0000214084316 + ], + "size": [ + 193.2916015625, + 60 + ], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 336 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 337, + 351 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 338 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 144, + "type": "VAEDecodeTiled", + "pos": [ + 1119.9999391549845, + 3640.000187042085 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 351 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 353 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 352 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 512, + 64, + 4096, + 8 + ] + }, + { + "id": 113, + "type": "VAEDecode", + "pos": [ + 1129.9999512676497, + 3530.000145351982 + ], + "size": [ + 240, + 60 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 337 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 291 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 145, + "type": "PrimitiveInt", + "pos": [ + -1630.0000045070383, + 4620.0000923942835 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 354 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24, + "fixed" + ] + }, + { + "id": 148, + "type": "PrimitiveFloat", + "pos": [ + -1630.0000045070383, + 4749.99997521129 + ], + "size": [ + 270, + 66.66666666666667 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 355, + 356 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 115, + "type": "EmptyLTXVLatentVideo", + "pos": [ + -1100.000003380279, + 4609.999988732406 + ], + "size": [ + 270, + 146.66666666666669 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 296 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 297 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 330 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 360 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 149, + "type": "LTXVImgToVideoInplace", + "pos": [ + -1089.9999912676137, + 4400.000009014077 + ], + "size": [ + 270, + 151.9921875 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 359 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 417 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 360 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 370 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 363 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 357 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 118, + "type": "Reroute", + "pos": [ + -229.99999095071237, + 4210.000236619506 + ], + "size": [ + 75, + 26 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 303 + } + ], + "outputs": [ + { + "name": "", + "type": "VAE", + "links": [ + 289, + 291, + 367 + ] + } + ], + "properties": { + "showOutputText": false, + "horizontal": false + } + }, + { + "id": 151, + "type": "LTXVImgToVideoInplace", + "pos": [ + -19.999999788732577, + 4070.0002501406198 + ], + "size": [ + 270, + 181.9921875 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 367 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 410 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 366 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 371 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 368 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 365 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 104, + "type": "LTXVCropGuides", + "pos": [ + -9.999999119719098, + 3840.0000630985346 + ], + "size": [ + 240, + 80 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 310 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 312 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 270 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 281 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 282 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 287 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 112, + "type": "LTXVLatentUpsampler", + "pos": [ + -9.999999119719098, + 3960.0002084505168 + ], + "size": [ + 260, + 80 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 287 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 288 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 289 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 366 + ] + } + ], + "title": "spatial", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVLatentUpsampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 132, + "type": "LTXVAddGuide", + "pos": [ + -599.9999928169079, + 4420.000216337834 + ], + "size": [ + 270, + 209.16666666666669 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 313 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 314 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 328 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 357 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 418 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 309, + 310 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 311, + 312 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 324 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 96, + "type": "LTXVAudioVAELoader", + "pos": [ + -1650.0000287323687, + 3910.000056337978 + ], + "size": [ + 420, + 68.88020833333334 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 377 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 285, + 340 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVAudioVAELoader", + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + }, + { + "id": 103, + "type": "CheckpointLoaderSimple", + "pos": [ + -1650.0000287323687, + 3590.0000349295465 + ], + "size": [ + 420, + 108.88020833333334 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 425 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 325 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 303, + 328, + 353, + 359 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CheckpointLoaderSimple", + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + }, + { + "id": 105, + "type": "LoraLoaderModelOnly", + "pos": [ + -69.99999741197416, + 3570.000193802643 + ], + "size": [ + 390, + 95.546875 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 327 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 429 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 280 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "ltx-2-19b-distilled-lora-384.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-19b-distilled-lora-384.safetensors", + 1 + ] + }, + { + "id": 100, + "type": "LatentUpscaleModelLoader", + "pos": [ + -69.99999741197416, + 3700.00007661965 + ], + "size": [ + 390, + 68.88020833333334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 430 + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "links": [ + 288 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LatentUpscaleModelLoader", + "models": [ + { + "name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", + "directory": "latent_upscale_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ltx-2-spatial-upscaler-x2-1.0.safetensors" + ] + }, + { + "id": 110, + "type": "GetImageSize", + "pos": [ + -1630.0000045070383, + 4450.000161126616 + ], + "size": [ + 260, + 80 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 416 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 296 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 297 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 329, + 330 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 106, + "type": "CreateVideo", + "pos": [ + 1419.9999363380857, + 3760.0003323940673 + ], + "size": [ + 270, + 86.66666666666667 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 352 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 339 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 356 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 304 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 187, + "type": "ImageFromBatch", + "pos": [ + -2310.000095774562, + 3689.999972957771 + ], + "size": [ + 260, + 93.33333333333334 + ], + "flags": {}, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 412 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": null + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 422 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 415 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "ImageFromBatch", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 121 + ] + }, + { + "id": 191, + "type": "ResizeImageMaskNode", + "pos": [ + -2320.0000163380137, + 3850.0001667604133 + ], + "size": [ + 284.375, + 154 + ], + "flags": {}, + "order": 43, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 415 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 420 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 421 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "IMAGE", + "links": [ + 413 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 1280, + 720, + "center", + "lanczos" + ] + }, + { + "id": 188, + "type": "GetVideoComponents", + "pos": [ + -2320.0000163380137, + 3520.0000416901034 + ], + "size": [ + 280, + 80 + ], + "flags": { + "collapsed": false + }, + "order": 40, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 419 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 412 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "GetVideoComponents", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 189, + "type": "ImageScaleBy", + "pos": [ + -1990.0000743661303, + 3670.0001318308678 + ], + "size": [ + 280, + 125.546875 + ], + "flags": {}, + "order": 41, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 413 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "scale_by", + "name": "scale_by", + "type": "FLOAT", + "widget": { + "name": "scale_by" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 414 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ImageScaleBy", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lanczos", + 0.5 + ] + }, + { + "id": 154, + "type": "MarkdownNote", + "pos": [ + -1659.9999492958204, + 4870.000120563272 + ], + "size": [ + 350, + 170 + ], + "flags": { + "collapsed": false + }, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Frame Rate Note", + "properties": {}, + "widgets_values": [ + "Please make sure the frame rate value is the same in both boxes" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 190, + "type": "38b60539-50a7-42f9-a5fe-bdeca26272e2", + "pos": [ + -1999.9999949295823, + 3910.000056337978 + ], + "size": [ + 310, + 106 + ], + "flags": {}, + "order": 42, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 414 + }, + { + "label": "depth_intensity", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 431 + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 432 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 416, + 417, + 418 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "sigma" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.5.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 999.0000000000002, + "lotus-depth-d-v1-1.safetensors", + "vae-ft-mse-840000-ema-pruned.safetensors" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 97, + "type": "LTXAVTextEncoderLoader", + "pos": [ + -1650.0000287323687, + 4040.0003053518376 + ], + "size": [ + 420, + 124.44010416666667 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 427 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 433 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 294, + 295 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXAVTextEncoderLoader", + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + }, + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2-19b-dev-fp8.safetensors", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -1660, + 3440, + 440, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Basic Sampling", + "bounding": [ + -700, + 3440, + 570, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -1180, + 3440, + 440, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Latent", + "bounding": [ + -1180, + 4290, + 1050, + 680 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 9, + "title": "Upscale Sampling(2x)", + "bounding": [ + -100, + 3440, + 1090, + 820 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Sampler", + "bounding": [ + 350, + 3480, + 620, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Model", + "bounding": [ + -90, + 3480, + 430, + 310 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Frame rate", + "bounding": [ + -1640, + 4550, + 290, + 271.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 16, + "title": "Video Preprocess", + "bounding": [ + -2330, + 3450, + 650, + 567.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 15, + "title": "video length", + "bounding": [ + -2320, + 3620, + 290, + 180 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 326, + "origin_id": 134, + "origin_slot": 0, + "target_id": 93, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 309, + "origin_id": 132, + "origin_slot": 0, + "target_id": 93, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 311, + "origin_id": 132, + "origin_slot": 1, + "target_id": 93, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 266, + "origin_id": 122, + "origin_slot": 1, + "target_id": 101, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 280, + "origin_id": 105, + "origin_slot": 0, + "target_id": 108, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 281, + "origin_id": 104, + "origin_slot": 0, + "target_id": 108, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 282, + "origin_id": 104, + "origin_slot": 1, + "target_id": 108, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 285, + "origin_id": 96, + "origin_slot": 0, + "target_id": 111, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 329, + "origin_id": 110, + "origin_slot": 2, + "target_id": 111, + "target_slot": 1, + "type": "INT" + }, + { + "id": 260, + "origin_id": 126, + "origin_slot": 0, + "target_id": 123, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 261, + "origin_id": 93, + "origin_slot": 0, + "target_id": 123, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 262, + "origin_id": 94, + "origin_slot": 0, + "target_id": 123, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 263, + "origin_id": 95, + "origin_slot": 0, + "target_id": 123, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 323, + "origin_id": 116, + "origin_slot": 0, + "target_id": 123, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 296, + "origin_id": 110, + "origin_slot": 0, + "target_id": 115, + "target_slot": 0, + "type": "INT" + }, + { + "id": 297, + "origin_id": 110, + "origin_slot": 1, + "target_id": 115, + "target_slot": 1, + "type": "INT" + }, + { + "id": 330, + "origin_id": 110, + "origin_slot": 2, + "target_id": 115, + "target_slot": 2, + "type": "INT" + }, + { + "id": 325, + "origin_id": 103, + "origin_slot": 0, + "target_id": 134, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 292, + "origin_id": 124, + "origin_slot": 0, + "target_id": 114, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 293, + "origin_id": 119, + "origin_slot": 0, + "target_id": 114, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 294, + "origin_id": 97, + "origin_slot": 0, + "target_id": 119, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 324, + "origin_id": 132, + "origin_slot": 2, + "target_id": 116, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 300, + "origin_id": 111, + "origin_slot": 0, + "target_id": 116, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 313, + "origin_id": 114, + "origin_slot": 0, + "target_id": 132, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 314, + "origin_id": 114, + "origin_slot": 1, + "target_id": 132, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 328, + "origin_id": 103, + "origin_slot": 2, + "target_id": 132, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 272, + "origin_id": 123, + "origin_slot": 0, + "target_id": 122, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 336, + "origin_id": 107, + "origin_slot": 1, + "target_id": 138, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 339, + "origin_id": 139, + "origin_slot": 0, + "target_id": 106, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 295, + "origin_id": 97, + "origin_slot": 0, + "target_id": 124, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 303, + "origin_id": 103, + "origin_slot": 2, + "target_id": 118, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 338, + "origin_id": 138, + "origin_slot": 1, + "target_id": 139, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 340, + "origin_id": 96, + "origin_slot": 0, + "target_id": 139, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 337, + "origin_id": 138, + "origin_slot": 0, + "target_id": 113, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 291, + "origin_id": 118, + "origin_slot": 0, + "target_id": 113, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 276, + "origin_id": 108, + "origin_slot": 0, + "target_id": 107, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 277, + "origin_id": 98, + "origin_slot": 0, + "target_id": 107, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 278, + "origin_id": 99, + "origin_slot": 0, + "target_id": 107, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 279, + "origin_id": 101, + "origin_slot": 0, + "target_id": 107, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 327, + "origin_id": 134, + "origin_slot": 0, + "target_id": 105, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 310, + "origin_id": 132, + "origin_slot": 0, + "target_id": 104, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 312, + "origin_id": 132, + "origin_slot": 1, + "target_id": 104, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 270, + "origin_id": 122, + "origin_slot": 0, + "target_id": 104, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 287, + "origin_id": 104, + "origin_slot": 2, + "target_id": 112, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 288, + "origin_id": 100, + "origin_slot": 0, + "target_id": 112, + "target_slot": 1, + "type": "LATENT_UPSCALE_MODEL" + }, + { + "id": 289, + "origin_id": 118, + "origin_slot": 0, + "target_id": 112, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 322, + "origin_id": 116, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 304, + "origin_id": 106, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 345, + "origin_id": -10, + "origin_slot": 0, + "target_id": 124, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 347, + "origin_id": 143, + "origin_slot": 0, + "target_id": 107, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 351, + "origin_id": 138, + "origin_slot": 0, + "target_id": 144, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 352, + "origin_id": 144, + "origin_slot": 0, + "target_id": 106, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 353, + "origin_id": 103, + "origin_slot": 2, + "target_id": 144, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 354, + "origin_id": 145, + "origin_slot": 0, + "target_id": 111, + "target_slot": 2, + "type": "INT" + }, + { + "id": 355, + "origin_id": 148, + "origin_slot": 0, + "target_id": 114, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 356, + "origin_id": 148, + "origin_slot": 0, + "target_id": 106, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 357, + "origin_id": 149, + "origin_slot": 0, + "target_id": 132, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 359, + "origin_id": 103, + "origin_slot": 2, + "target_id": 149, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 360, + "origin_id": 115, + "origin_slot": 0, + "target_id": 149, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 363, + "origin_id": -10, + "origin_slot": 2, + "target_id": 149, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 365, + "origin_id": 151, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 366, + "origin_id": 112, + "origin_slot": 0, + "target_id": 151, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 367, + "origin_id": 118, + "origin_slot": 0, + "target_id": 151, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 368, + "origin_id": -10, + "origin_slot": 2, + "target_id": 151, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 370, + "origin_id": -10, + "origin_slot": 1, + "target_id": 149, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 371, + "origin_id": -10, + "origin_slot": 1, + "target_id": 151, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 377, + "origin_id": -10, + "origin_slot": 6, + "target_id": 96, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 410, + "origin_id": -10, + "origin_slot": 4, + "target_id": 151, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 412, + "origin_id": 188, + "origin_slot": 0, + "target_id": 187, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 413, + "origin_id": 191, + "origin_slot": 0, + "target_id": 189, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 414, + "origin_id": 189, + "origin_slot": 0, + "target_id": 190, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 415, + "origin_id": 187, + "origin_slot": 0, + "target_id": 191, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 416, + "origin_id": 190, + "origin_slot": 0, + "target_id": 110, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 417, + "origin_id": 190, + "origin_slot": 0, + "target_id": 149, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 418, + "origin_id": 190, + "origin_slot": 0, + "target_id": 132, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 419, + "origin_id": -10, + "origin_slot": 3, + "target_id": 188, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 420, + "origin_id": -10, + "origin_slot": 5, + "target_id": 191, + "target_slot": 2, + "type": "INT" + }, + { + "id": 421, + "origin_id": -10, + "origin_slot": 6, + "target_id": 191, + "target_slot": 3, + "type": "INT" + }, + { + "id": 422, + "origin_id": -10, + "origin_slot": 7, + "target_id": 187, + "target_slot": 2, + "type": "INT" + }, + { + "id": 425, + "origin_id": -10, + "origin_slot": 8, + "target_id": 103, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 426, + "origin_id": -10, + "origin_slot": 9, + "target_id": 134, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 427, + "origin_id": -10, + "origin_slot": 10, + "target_id": 97, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 429, + "origin_id": -10, + "origin_slot": 11, + "target_id": 105, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 430, + "origin_id": -10, + "origin_slot": 12, + "target_id": 100, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 431, + "origin_id": -10, + "origin_slot": 13, + "target_id": 190, + "target_slot": 2, + "type": "COMBO" + }, + { + "id": 432, + "origin_id": -10, + "origin_slot": 14, + "target_id": 190, + "target_slot": 3, + "type": "COMBO" + }, + { + "id": 433, + "origin_id": -10, + "origin_slot": 8, + "target_id": 97, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Depth to video", + "description": "Generates depth-controlled video with LTX-2: motion and structure follow a depth-reference video alongside text prompting, optional first-frame image conditioning, with optional synchronized audio." + }, + { + "id": "38b60539-50a7-42f9-a5fe-bdeca26272e2", + "version": 1, + "state": { + "lastGroupId": 16, + "lastNodeId": 191, + "lastLinkId": 433, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Depth Map (Lotus)", + "inputNode": { + "id": -10, + "bounding": [ + -60, + -172.61268043518066, + 126.625, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1650, + -172.61268043518066, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 37 + ], + "localized_name": "pixels", + "pos": [ + 46.625, + -152.61268043518066 + ] + }, + { + "id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", + "name": "sigma", + "type": "FLOAT", + "linkIds": [ + 243 + ], + "label": "depth_intensity", + "pos": [ + 46.625, + -132.61268043518066 + ] + }, + { + "id": "374bfecc-34bb-47f9-82b6-cbe9383f8756", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 423 + ], + "pos": [ + 46.625, + -112.61268043518066 + ] + }, + { + "id": "bb8707a1-46c3-44be-a15a-0adc908d871d", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 424 + ], + "pos": [ + 46.625, + -92.61268043518066 + ] + } + ], + "outputs": [ + { + "id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 242 + ], + "localized_name": "IMAGE", + "pos": [ + 1670, + -152.61268043518066 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1380, + -240 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 232 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 240 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 10, + "type": "UNETLoader", + "pos": [ + 135.34181213378906, + -290.1947937011719 + ], + "size": [ + 305.93701171875, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 423 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 31, + 241 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "lotus-depth-d-v1-1.safetensors", + "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "lotus-depth-d-v1-1.safetensors", + "default" + ] + }, + { + "id": 14, + "type": "VAELoader", + "pos": [ + 134.531494140625, + -165.18197631835938 + ], + "size": [ + 305.93701171875, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 424 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 38, + 240 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "vae-ft-mse-840000-ema-pruned.safetensors", + "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + }, + { + "id": 16, + "type": "SamplerCustomAdvanced", + "pos": [ + 990.6585693359375, + -319.9144287109375 + ], + "size": [ + 355.20001220703125, + 326 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 237 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 27 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 33 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 194 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 201 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 232 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 18, + "type": "DisableNoise", + "pos": [ + 730.47705078125, + -320 + ], + "size": [ + 210, + 26 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [ + 237 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "DisableNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 19, + "type": "BasicGuider", + "pos": [ + 730.2631225585938, + -251.22537231445312 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 241 + }, + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 238 + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 20, + "type": "BasicScheduler", + "pos": [ + 488.64459228515625, + -147.67201232910156 + ], + "size": [ + 210, + 106 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 31 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 66 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicScheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "normal", + 1, + 1 + ] + }, + { + "id": 21, + "type": "KSamplerSelect", + "pos": [ + 730.2631225585938, + -161.22537231445312 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 22, + "type": "ImageInvert", + "pos": [ + 1380, + -310 + ], + "size": [ + 210, + 26 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 35 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 242 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "ImageInvert", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 23, + "type": "VAEEncode", + "pos": [ + 730.2631225585938, + 38.77463912963867 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 37 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 38 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 201 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SetFirstSigma", + "pos": [ + 730.2631225585938, + -61.22536087036133 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 66 + }, + { + "localized_name": "sigma", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": 243 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 194 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SetFirstSigma", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 999.0000000000002 + ] + }, + { + "id": 68, + "type": "LotusConditioning", + "pos": [ + 490, + -230 + ], + "size": [ + 210, + 26 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 238 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "LotusConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 1, + "title": "Load Models", + "bounding": [ + 120, + -370, + 335, + 281.6000061035156 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 232, + "origin_id": 16, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 240, + "origin_id": 14, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 237, + "origin_id": 18, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 27, + "origin_id": 19, + "origin_slot": 0, + "target_id": 16, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 33, + "origin_id": 21, + "origin_slot": 0, + "target_id": 16, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 194, + "origin_id": 28, + "origin_slot": 0, + "target_id": 16, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 201, + "origin_id": 23, + "origin_slot": 0, + "target_id": 16, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 241, + "origin_id": 10, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 238, + "origin_id": 68, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 31, + "origin_id": 10, + "origin_slot": 0, + "target_id": 20, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 35, + "origin_id": 8, + "origin_slot": 0, + "target_id": 22, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 38, + "origin_id": 14, + "origin_slot": 0, + "target_id": 23, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 66, + "origin_id": 20, + "origin_slot": 0, + "target_id": 28, + "target_slot": 0, + "type": "SIGMAS" + }, + { + "id": 37, + "origin_id": -10, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 242, + "origin_id": 22, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 243, + "origin_id": -10, + "origin_slot": 1, + "target_id": 28, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 423, + "origin_id": -10, + "origin_slot": 2, + "target_id": 10, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 424, + "origin_id": -10, + "origin_slot": 3, + "target_id": 14, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 1.313181818181818, + "offset": [ + 271.9196871428176, + -3845.0123774536323 + ] + }, + "workflowRendererVersion": "LG" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Edge-Preserving Blur.json b/blueprints/Edge-Preserving Blur.json index 4f2416e9b..fbda9f126 100644 --- a/blueprints/Edge-Preserving Blur.json +++ b/blueprints/Edge-Preserving Blur.json @@ -1 +1,459 @@ -{"revision": 0, "last_node_id": 136, "last_link_id": 0, "nodes": [{"id": 136, "type": "c6dc0f88-416b-4db1-bed1-442d793de5ad", "pos": [669.0822222222221, 835.5507407407408], "size": [210, 106], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["130", "value"], ["131", "value"], ["133", "value"]]}, "widgets_values": [], "title": "Edge-Preserving Blur"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "c6dc0f88-416b-4db1-bed1-442d793de5ad", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 138, "lastLinkId": 109, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Edge-Preserving Blur", "inputNode": {"id": -10, "bounding": [1750, -620, 120, 60]}, "outputNode": {"id": -20, "bounding": [2700, -620, 120, 60]}, "inputs": [{"id": "06a6d0ad-25d7-4784-8c72-7fc8e7110a22", "name": "images.image0", "type": "IMAGE", "linkIds": [106], "localized_name": "images.image0", "label": "image", "pos": [1850, -600]}], "outputs": [{"id": "3ae9f5d7-be63-4c9f-9893-6f848defa377", "name": "IMAGE0", "type": "IMAGE", "linkIds": [99], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [2720, -600]}], "widgets": [], "nodes": [{"id": 128, "type": "GLSLShader", "pos": [2220, -860], "size": [420, 252], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 106}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 100}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 101}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 107}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 103}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [99]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Blur radius (0–20, default ~5)\nuniform float u_float1; // Edge threshold (0–100, default ~30)\nuniform int u_int0; // Step size (0/1 = every pixel, 2+ = skip pixels)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MAX_RADIUS = 20;\nconst float EPSILON = 0.0001;\n\n// Perceptual luminance\nfloat getLuminance(vec3 rgb) {\n return dot(rgb, vec3(0.299, 0.587, 0.114));\n}\n\nvec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,\n float sigmaSpatial, float sigmaColor)\n{\n vec4 center = texture(u_image0, uv);\n vec3 centerRGB = center.rgb;\n\n float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);\n float invColor2 = -0.5 / (sigmaColor * sigmaColor + EPSILON);\n\n vec3 sumRGB = vec3(0.0);\n float sumWeight = 0.0;\n\n int step = max(u_int0, 1);\n float radius2 = float(radius * radius);\n\n for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {\n if (dy < -radius || dy > radius) continue;\n if (abs(dy) % step != 0) continue;\n\n for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {\n if (dx < -radius || dx > radius) continue;\n if (abs(dx) % step != 0) continue;\n\n vec2 offset = vec2(float(dx), float(dy));\n float dist2 = dot(offset, offset);\n if (dist2 > radius2) continue;\n\n vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;\n\n // Spatial Gaussian\n float spatialWeight = exp(dist2 * invSpatial2);\n\n // Perceptual color distance (weighted RGB)\n vec3 diff = sampleRGB - centerRGB;\n float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));\n float colorWeight = exp(colorDist * invColor2);\n\n float w = spatialWeight * colorWeight;\n sumRGB += sampleRGB * w;\n sumWeight += w;\n }\n }\n\n vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);\n return vec4(resultRGB, center.a); // preserve center alpha\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n\n float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));\n int radius = int(radiusF + 0.5);\n\n if (radius == 0) {\n fragColor = texture(u_image0, v_texCoord);\n return;\n }\n\n // Edge threshold → color sigma\n // Squared curve for better low-end control\n float t = clamp(u_float1, 0.0, 100.0) / 100.0;\n t *= t;\n float sigmaColor = mix(0.01, 0.5, t);\n\n // Spatial sigma tied to radius\n float sigmaSpatial = max(radiusF * 0.75, 0.5);\n\n fragColor = bilateralFilter(\n v_texCoord,\n texelSize,\n radius,\n sigmaSpatial,\n sigmaColor\n );\n}", "from_input"]}, {"id": 130, "type": "PrimitiveFloat", "pos": [1930, -860], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "blur_radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [100]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 20, "step": 0.5, "precision": 1}, "widgets_values": [20]}, {"id": 131, "type": "PrimitiveFloat", "pos": [1930, -760], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "edge_threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [101]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "step": 1}, "widgets_values": [50]}, {"id": 133, "type": "PrimitiveInt", "pos": [1930, -660], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "step_size", "localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [103, 107]}], "properties": {"Node name for S&R": "PrimitiveInt", "min": 0}, "widgets_values": [1, "fixed"]}], "groups": [], "links": [{"id": 100, "origin_id": 130, "origin_slot": 0, "target_id": 128, "target_slot": 2, "type": "FLOAT"}, {"id": 101, "origin_id": 131, "origin_slot": 0, "target_id": 128, "target_slot": 3, "type": "FLOAT"}, {"id": 107, "origin_id": 133, "origin_slot": 0, "target_id": 128, "target_slot": 5, "type": "INT"}, {"id": 103, "origin_id": 133, "origin_slot": 0, "target_id": 128, "target_slot": 6, "type": "INT"}, {"id": 106, "origin_id": -10, "origin_slot": 0, "target_id": 128, "target_slot": 0, "type": "IMAGE"}, {"id": 99, "origin_id": 128, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Blur"}]}, "extra": {}} +{ + "revision": 0, + "last_node_id": 136, + "last_link_id": 0, + "nodes": [ + { + "id": 136, + "type": "c6dc0f88-416b-4db1-bed1-442d793de5ad", + "pos": [ + 669.0822222222221, + 835.5507407407408 + ], + "size": [ + 210, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "130", + "value" + ], + [ + "131", + "value" + ], + [ + "133", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Edge-Preserving Blur" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "c6dc0f88-416b-4db1-bed1-442d793de5ad", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 138, + "lastLinkId": 109, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Edge-Preserving Blur", + "inputNode": { + "id": -10, + "bounding": [ + 1750, + -620, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2700, + -620, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "06a6d0ad-25d7-4784-8c72-7fc8e7110a22", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 106 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 1850, + -600 + ] + } + ], + "outputs": [ + { + "id": "3ae9f5d7-be63-4c9f-9893-6f848defa377", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 99 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 2720, + -600 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 128, + "type": "GLSLShader", + "pos": [ + 2220, + -860 + ], + "size": [ + 420, + 252 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 106 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 100 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 101 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 107 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": 103 + }, + { + "label": "u_int2", + "localized_name": "ints.u_int2", + "name": "ints.u_int2", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 99 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Blur radius (0–20, default ~5)\nuniform float u_float1; // Edge threshold (0–100, default ~30)\nuniform int u_int0; // Step size (0/1 = every pixel, 2+ = skip pixels)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MAX_RADIUS = 20;\nconst float EPSILON = 0.0001;\n\n// Perceptual luminance\nfloat getLuminance(vec3 rgb) {\n return dot(rgb, vec3(0.299, 0.587, 0.114));\n}\n\nvec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,\n float sigmaSpatial, float sigmaColor)\n{\n vec4 center = texture(u_image0, uv);\n vec3 centerRGB = center.rgb;\n\n float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);\n float invColor2 = -0.5 / (sigmaColor * sigmaColor + EPSILON);\n\n vec3 sumRGB = vec3(0.0);\n float sumWeight = 0.0;\n\n int step = max(u_int0, 1);\n float radius2 = float(radius * radius);\n\n for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {\n if (dy < -radius || dy > radius) continue;\n if (abs(dy) % step != 0) continue;\n\n for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {\n if (dx < -radius || dx > radius) continue;\n if (abs(dx) % step != 0) continue;\n\n vec2 offset = vec2(float(dx), float(dy));\n float dist2 = dot(offset, offset);\n if (dist2 > radius2) continue;\n\n vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;\n\n // Spatial Gaussian\n float spatialWeight = exp(dist2 * invSpatial2);\n\n // Perceptual color distance (weighted RGB)\n vec3 diff = sampleRGB - centerRGB;\n float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));\n float colorWeight = exp(colorDist * invColor2);\n\n float w = spatialWeight * colorWeight;\n sumRGB += sampleRGB * w;\n sumWeight += w;\n }\n }\n\n vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);\n return vec4(resultRGB, center.a); // preserve center alpha\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n\n float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));\n int radius = int(radiusF + 0.5);\n\n if (radius == 0) {\n fragColor = texture(u_image0, v_texCoord);\n return;\n }\n\n // Edge threshold → color sigma\n // Squared curve for better low-end control\n float t = clamp(u_float1, 0.0, 100.0) / 100.0;\n t *= t;\n float sigmaColor = mix(0.01, 0.5, t);\n\n // Spatial sigma tied to radius\n float sigmaSpatial = max(radiusF * 0.75, 0.5);\n\n fragColor = bilateralFilter(\n v_texCoord,\n texelSize,\n radius,\n sigmaSpatial,\n sigmaColor\n );\n}", + "from_input" + ] + }, + { + "id": 130, + "type": "PrimitiveFloat", + "pos": [ + 1930, + -860 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "blur_radius", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 100 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 20, + "step": 0.5, + "precision": 1 + }, + "widgets_values": [ + 20 + ] + }, + { + "id": 131, + "type": "PrimitiveFloat", + "pos": [ + 1930, + -760 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "edge_threshold", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 101 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "step": 1 + }, + "widgets_values": [ + 50 + ] + }, + { + "id": 133, + "type": "PrimitiveInt", + "pos": [ + 1930, + -660 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "step_size", + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 103, + 107 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveInt", + "min": 0 + }, + "widgets_values": [ + 1, + "fixed" + ] + } + ], + "groups": [], + "links": [ + { + "id": 100, + "origin_id": 130, + "origin_slot": 0, + "target_id": 128, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 101, + "origin_id": 131, + "origin_slot": 0, + "target_id": 128, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 107, + "origin_id": 133, + "origin_slot": 0, + "target_id": 128, + "target_slot": 5, + "type": "INT" + }, + { + "id": 103, + "origin_id": 133, + "origin_slot": 0, + "target_id": 128, + "target_slot": 6, + "type": "INT" + }, + { + "id": 106, + "origin_id": -10, + "origin_slot": 0, + "target_id": 128, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 99, + "origin_id": 128, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Blur", + "description": "Applies bilateral (edge-preserving) blur to soften images while retaining detail." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Film Grain.json b/blueprints/Film Grain.json index b7ebe2a36..3226ea9aa 100644 --- a/blueprints/Film Grain.json +++ b/blueprints/Film Grain.json @@ -1 +1,588 @@ -{"revision": 0, "last_node_id": 22, "last_link_id": 0, "nodes": [{"id": 22, "type": "3324cf54-bcff-405f-a4bf-c5122c72fe56", "pos": [4800, -1180], "size": [250, 154], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Film Grain", "properties": {"proxyWidgets": [["17", "value"], ["18", "value"], ["19", "value"], ["20", "value"], ["21", "choice"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "3324cf54-bcff-405f-a4bf-c5122c72fe56", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 21, "lastLinkId": 30, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Film Grain", "inputNode": {"id": -10, "bounding": [4096.671470760602, -948.2184031393472, 120, 60]}, "outputNode": {"id": -20, "bounding": [4900, -948.2184031393472, 120, 60]}, "inputs": [{"id": "062968ea-da25-47e7-a180-d913c267f148", "name": "images.image0", "type": "IMAGE", "linkIds": [22], "localized_name": "images.image0", "label": "image", "pos": [4196.671470760602, -928.2184031393472]}], "outputs": [{"id": "43247d06-a39f-4733-9828-c39400fe02a4", "name": "IMAGE0", "type": "IMAGE", "linkIds": [23], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4920, -928.2184031393472]}], "widgets": [], "nodes": [{"id": 15, "type": "GLSLShader", "pos": [4510, -1180], "size": [330, 272], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 22}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 26}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 27}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 28}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 29}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 30}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [23]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // grain amount [0.0 – 1.0] typical: 0.2–0.8\nuniform float u_float1; // grain size [0.3 – 3.0] lower = finer grain\nuniform float u_float2; // color amount [0.0 – 1.0] 0 = monochrome, 1 = RGB grain\nuniform float u_float3; // luminance bias [0.0 – 1.0] 0 = uniform, 1 = shadows only\nuniform int u_int0; // noise mode [0 or 1] 0 = smooth, 1 = grainy\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\n// High-quality integer hash (pcg-like)\nuint pcg(uint v) {\n uint state = v * 747796405u + 2891336453u;\n uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;\n return (word >> 22u) ^ word;\n}\n\n// 2D -> 1D hash input\nuint hash2d(uvec2 p) {\n return pcg(p.x + pcg(p.y));\n}\n\n// Hash to float [0, 1]\nfloat hashf(uvec2 p) {\n return float(hash2d(p)) / float(0xffffffffu);\n}\n\n// Hash to float with offset (for RGB channels)\nfloat hashf(uvec2 p, uint offset) {\n return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);\n}\n\n// Convert uniform [0,1] to roughly Gaussian distribution\n// Using simple approximation: average of multiple samples\nfloat toGaussian(uvec2 p) {\n float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);\n return (sum - 2.0) * 0.7; // Centered, scaled\n}\n\nfloat toGaussian(uvec2 p, uint offset) {\n float sum = hashf(p, offset) + hashf(p, offset + 1u) \n + hashf(p, offset + 2u) + hashf(p, offset + 3u);\n return (sum - 2.0) * 0.7;\n}\n\n// Smooth noise with better interpolation\nfloat smoothNoise(vec2 p) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n // Quintic interpolation (less banding than cubic)\n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui);\n float b = toGaussian(ui + uvec2(1u, 0u));\n float c = toGaussian(ui + uvec2(0u, 1u));\n float d = toGaussian(ui + uvec2(1u, 1u));\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nfloat smoothNoise(vec2 p, uint offset) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui, offset);\n float b = toGaussian(ui + uvec2(1u, 0u), offset);\n float c = toGaussian(ui + uvec2(0u, 1u), offset);\n float d = toGaussian(ui + uvec2(1u, 1u), offset);\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n \n // Luminance (Rec.709)\n float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));\n \n // Grain UV (resolution-independent)\n vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);\n uvec2 grainPixel = uvec2(grainUV);\n \n float g;\n vec3 grainRGB;\n \n if (u_int0 == 1) {\n // Grainy mode: pure hash noise (no interpolation = no banding)\n g = toGaussian(grainPixel);\n grainRGB = vec3(\n toGaussian(grainPixel, 100u),\n toGaussian(grainPixel, 200u),\n toGaussian(grainPixel, 300u)\n );\n } else {\n // Smooth mode: interpolated with quintic curve\n g = smoothNoise(grainUV);\n grainRGB = vec3(\n smoothNoise(grainUV, 100u),\n smoothNoise(grainUV, 200u),\n smoothNoise(grainUV, 300u)\n );\n }\n \n // Luminance weighting (less grain in highlights)\n float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));\n \n // Strength\n float strength = u_float0 * 0.15;\n \n // Color vs monochrome grain\n vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));\n \n color.rgb += grainColor * strength * lumWeight;\n fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);\n}\n", "from_input"]}, {"id": 21, "type": "CustomCombo", "pos": [4280, -780], "size": [210, 153.8888931274414], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "grain_mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [30]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Smooth", 0, "Smooth", "Grainy", ""]}, {"id": 17, "type": "PrimitiveFloat", "pos": [4276.671470760602, -1180.3256994061358], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "grain_amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [26]}], "title": "Grain amount", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "step": 0.05, "precision": 2}, "widgets_values": [0.25]}, {"id": 18, "type": "PrimitiveFloat", "pos": [4280, -1080], "size": [210, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "grain_size", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [27]}], "title": "Grain size", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0.05, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.1]}, {"id": 19, "type": "PrimitiveFloat", "pos": [4280, -980], "size": [210, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "color_amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [28]}], "title": "Color amount", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}, {"id": 20, "type": "PrimitiveFloat", "pos": [4280, -880], "size": [210, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "shadow_focus", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [29]}], "title": "Luminance bias", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}], "groups": [], "links": [{"id": 26, "origin_id": 17, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "FLOAT"}, {"id": 27, "origin_id": 18, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "FLOAT"}, {"id": 28, "origin_id": 19, "origin_slot": 0, "target_id": 15, "target_slot": 4, "type": "FLOAT"}, {"id": 29, "origin_id": 20, "origin_slot": 0, "target_id": 15, "target_slot": 5, "type": "FLOAT"}, {"id": 30, "origin_id": 21, "origin_slot": 1, "target_id": 15, "target_slot": 7, "type": "INT"}, {"id": 22, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 23, "origin_id": 15, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 22, + "last_link_id": 0, + "nodes": [ + { + "id": 22, + "type": "3324cf54-bcff-405f-a4bf-c5122c72fe56", + "pos": [ + 4800, + -1180 + ], + "size": [ + 250, + 154 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "title": "Film Grain", + "properties": { + "proxyWidgets": [ + [ + "17", + "value" + ], + [ + "18", + "value" + ], + [ + "19", + "value" + ], + [ + "20", + "value" + ], + [ + "21", + "choice" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "3324cf54-bcff-405f-a4bf-c5122c72fe56", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 21, + "lastLinkId": 30, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Film Grain", + "inputNode": { + "id": -10, + "bounding": [ + 4096.671470760602, + -948.2184031393472, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4900, + -948.2184031393472, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "062968ea-da25-47e7-a180-d913c267f148", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 22 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 4196.671470760602, + -928.2184031393472 + ] + } + ], + "outputs": [ + { + "id": "43247d06-a39f-4733-9828-c39400fe02a4", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 23 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4920, + -928.2184031393472 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 15, + "type": "GLSLShader", + "pos": [ + 4510, + -1180 + ], + "size": [ + 330, + 272 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 22 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 26 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 27 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 28 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": 29 + }, + { + "label": "u_float4", + "localized_name": "floats.u_float4", + "name": "floats.u_float4", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 30 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 23 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // grain amount [0.0 – 1.0] typical: 0.2–0.8\nuniform float u_float1; // grain size [0.3 – 3.0] lower = finer grain\nuniform float u_float2; // color amount [0.0 – 1.0] 0 = monochrome, 1 = RGB grain\nuniform float u_float3; // luminance bias [0.0 – 1.0] 0 = uniform, 1 = shadows only\nuniform int u_int0; // noise mode [0 or 1] 0 = smooth, 1 = grainy\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\n// High-quality integer hash (pcg-like)\nuint pcg(uint v) {\n uint state = v * 747796405u + 2891336453u;\n uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;\n return (word >> 22u) ^ word;\n}\n\n// 2D -> 1D hash input\nuint hash2d(uvec2 p) {\n return pcg(p.x + pcg(p.y));\n}\n\n// Hash to float [0, 1]\nfloat hashf(uvec2 p) {\n return float(hash2d(p)) / float(0xffffffffu);\n}\n\n// Hash to float with offset (for RGB channels)\nfloat hashf(uvec2 p, uint offset) {\n return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);\n}\n\n// Convert uniform [0,1] to roughly Gaussian distribution\n// Using simple approximation: average of multiple samples\nfloat toGaussian(uvec2 p) {\n float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);\n return (sum - 2.0) * 0.7; // Centered, scaled\n}\n\nfloat toGaussian(uvec2 p, uint offset) {\n float sum = hashf(p, offset) + hashf(p, offset + 1u) \n + hashf(p, offset + 2u) + hashf(p, offset + 3u);\n return (sum - 2.0) * 0.7;\n}\n\n// Smooth noise with better interpolation\nfloat smoothNoise(vec2 p) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n // Quintic interpolation (less banding than cubic)\n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui);\n float b = toGaussian(ui + uvec2(1u, 0u));\n float c = toGaussian(ui + uvec2(0u, 1u));\n float d = toGaussian(ui + uvec2(1u, 1u));\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nfloat smoothNoise(vec2 p, uint offset) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui, offset);\n float b = toGaussian(ui + uvec2(1u, 0u), offset);\n float c = toGaussian(ui + uvec2(0u, 1u), offset);\n float d = toGaussian(ui + uvec2(1u, 1u), offset);\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n \n // Luminance (Rec.709)\n float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));\n \n // Grain UV (resolution-independent)\n vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);\n uvec2 grainPixel = uvec2(grainUV);\n \n float g;\n vec3 grainRGB;\n \n if (u_int0 == 1) {\n // Grainy mode: pure hash noise (no interpolation = no banding)\n g = toGaussian(grainPixel);\n grainRGB = vec3(\n toGaussian(grainPixel, 100u),\n toGaussian(grainPixel, 200u),\n toGaussian(grainPixel, 300u)\n );\n } else {\n // Smooth mode: interpolated with quintic curve\n g = smoothNoise(grainUV);\n grainRGB = vec3(\n smoothNoise(grainUV, 100u),\n smoothNoise(grainUV, 200u),\n smoothNoise(grainUV, 300u)\n );\n }\n \n // Luminance weighting (less grain in highlights)\n float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));\n \n // Strength\n float strength = u_float0 * 0.15;\n \n // Color vs monochrome grain\n vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));\n \n color.rgb += grainColor * strength * lumWeight;\n fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);\n}\n", + "from_input" + ] + }, + { + "id": 21, + "type": "CustomCombo", + "pos": [ + 4280, + -780 + ], + "size": [ + 210, + 153.8888931274414 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "grain_mode", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 30 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "Smooth", + 0, + "Smooth", + "Grainy", + "" + ] + }, + { + "id": 17, + "type": "PrimitiveFloat", + "pos": [ + 4276.671470760602, + -1180.3256994061358 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "grain_amount", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 26 + ] + } + ], + "title": "Grain amount", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 1, + "step": 0.05, + "precision": 2 + }, + "widgets_values": [ + 0.25 + ] + }, + { + "id": 18, + "type": "PrimitiveFloat", + "pos": [ + 4280, + -1080 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "grain_size", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 27 + ] + } + ], + "title": "Grain size", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0.05, + "max": 3, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 0.1 + ] + }, + { + "id": 19, + "type": "PrimitiveFloat", + "pos": [ + 4280, + -980 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "color_amount", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 28 + ] + } + ], + "title": "Color amount", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 1, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 20, + "type": "PrimitiveFloat", + "pos": [ + 4280, + -880 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "shadow_focus", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 29 + ] + } + ], + "title": "Luminance bias", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 1, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 0 + ] + } + ], + "groups": [], + "links": [ + { + "id": 26, + "origin_id": 17, + "origin_slot": 0, + "target_id": 15, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 27, + "origin_id": 18, + "origin_slot": 0, + "target_id": 15, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 28, + "origin_id": 19, + "origin_slot": 0, + "target_id": 15, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 29, + "origin_id": 20, + "origin_slot": 0, + "target_id": 15, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 30, + "origin_id": 21, + "origin_slot": 1, + "target_id": 15, + "target_slot": 7, + "type": "INT" + }, + { + "id": 22, + "origin_id": -10, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 23, + "origin_id": 15, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adds procedural film grain texture for a cinematic look via GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/First-Last-Frame to Video (LTX-2.3).json b/blueprints/First-Last-Frame to Video (LTX-2.3).json new file mode 100644 index 000000000..f509aefe0 --- /dev/null +++ b/blueprints/First-Last-Frame to Video (LTX-2.3).json @@ -0,0 +1,3361 @@ +{ + "revision": 0, + "last_node_id": 228, + "last_link_id": 0, + "nodes": [ + { + "id": 228, + "type": "a5982aee-8136-4819-86a0-cf9d9e510ad6", + "pos": [ + 1490, + 4730 + ], + "size": [ + 274.8169921875, + 276 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "first_frame", + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": null + }, + { + "label": "last_frame", + "localized_name": "input_1", + "name": "input_1", + "type": "IMAGE,MASK", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "width", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "height", + "name": "value_1", + "type": "INT", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "label": "duration", + "name": "value_2", + "type": "INT", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "fps", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "label": "ckpt_name", + "name": "ckpt_name_1", + "type": "COMBO", + "widget": { + "name": "ckpt_name_1" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "222", + "text" + ], + [ + "215", + "value" + ], + [ + "216", + "value" + ], + [ + "198", + "value" + ], + [ + "205", + "value" + ], + [ + "196", + "noise_seed" + ], + [ + "224", + "ckpt_name" + ], + [ + "225", + "text_encoder" + ] + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "First-Last-Frame to Video (LTX-2.3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "a5982aee-8136-4819-86a0-cf9d9e510ad6", + "version": 1, + "state": { + "lastGroupId": 22, + "lastNodeId": 228, + "lastLinkId": 276, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "First-Last-Frame to Video (LTX-2.3)", + "inputNode": { + "id": -10, + "bounding": [ + 270, + 3100, + 120, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3620, + 3120, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "6fe179c4-d96f-4383-b202-844f6de4922e", + "name": "input", + "type": "IMAGE,MASK", + "linkIds": [ + 251 + ], + "localized_name": "input", + "label": "first_frame", + "pos": [ + 370, + 3120 + ] + }, + { + "id": "e80df1ae-5f39-4f86-91bd-0467635e2f2d", + "name": "input_1", + "type": "IMAGE,MASK", + "linkIds": [ + 253 + ], + "localized_name": "input_1", + "label": "last_frame", + "pos": [ + 370, + 3140 + ] + }, + { + "id": "433148fa-bf73-4ab1-81d9-09e2e38ed861", + "name": "text", + "type": "STRING", + "linkIds": [ + 265 + ], + "pos": [ + 370, + 3160 + ] + }, + { + "id": "36915bc8-a6ed-4d48-8619-e0e8723228e9", + "name": "value", + "type": "INT", + "linkIds": [ + 266 + ], + "label": "width", + "pos": [ + 370, + 3180 + ] + }, + { + "id": "425a36b8-91ab-41b7-81e9-496eba064ec8", + "name": "value_1", + "type": "INT", + "linkIds": [ + 267 + ], + "label": "height", + "pos": [ + 370, + 3200 + ] + }, + { + "id": "0c9e003b-bd07-4b7d-aa6d-789e138ed161", + "name": "value_2", + "type": "INT", + "linkIds": [ + 268 + ], + "label": "duration", + "pos": [ + 370, + 3220 + ] + }, + { + "id": "581b52ff-21c5-4774-ac2a-8f69a7e09e2e", + "name": "value_3", + "type": "INT", + "linkIds": [ + 269 + ], + "label": "fps", + "pos": [ + 370, + 3240 + ] + }, + { + "id": "d03cc171-45da-4658-99aa-77252bbcf522", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 270 + ], + "pos": [ + 370, + 3260 + ] + }, + { + "id": "e68e61c8-905e-43ac-8c76-65ac52270a08", + "name": "ckpt_name_1", + "type": "COMBO", + "linkIds": [ + 272, + 275, + 276 + ], + "label": "ckpt_name", + "pos": [ + 370, + 3280 + ] + }, + { + "id": "5d065f3b-891b-499f-950b-c2df0be24536", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 273 + ], + "pos": [ + 370, + 3300 + ] + } + ], + "outputs": [ + { + "id": "0c8c2dc0-c67c-4bc2-9e57-6aa00db2e3a9", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 252 + ], + "localized_name": "VIDEO", + "pos": [ + 3640, + 3140 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 195, + "type": "LTXVPreprocess", + "pos": [ + 1480, + 3780 + ], + "size": [ + 230, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 203 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 229 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 196, + "type": "RandomNoise", + "pos": [ + 1990, + 2320 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 270 + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 246 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": { + "noise_seed": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 315253765879496, + "randomize" + ] + }, + { + "id": 197, + "type": "LTXVEmptyLatentAudio", + "pos": [ + 2090, + 3820 + ], + "size": [ + 280, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 205 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 262 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 207 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 245 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 198, + "type": "PrimitiveInt", + "pos": [ + 760, + 3650 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 268 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 260 + ] + } + ], + "title": "Duration", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5, + "fixed" + ] + }, + { + "id": 199, + "type": "LTXVPreprocess", + "pos": [ + 1480, + 3340 + ], + "size": [ + 230, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 210 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 240 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 200, + "type": "LTXVCropGuides", + "pos": [ + 2820, + 2450 + ], + "size": [ + 280, + 120 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 213 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 214 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 215 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 211 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 201, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 2090, + 3580 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 218 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 219 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 263 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 239 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 202, + "type": "LTXVConditioning", + "pos": [ + 2090, + 3400 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 221 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 222 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 223 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 236 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 237 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 203, + "type": "GetImageSize", + "pos": [ + 1480, + 3500 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 224 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 218 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 219 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 204, + "type": "LTXVAddGuide", + "pos": [ + 2750, + 3700 + ], + "size": [ + 280, + 240 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 225 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 226 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 227 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 228 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 229 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 213, + 242 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 214, + 243 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 244 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + -1, + 0.7 + ] + }, + { + "id": 205, + "type": "PrimitiveInt", + "pos": [ + 760, + 3800 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 269 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 207, + 235, + 261 + ] + } + ], + "title": "Frame Rate(int)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25, + "fixed" + ] + }, + { + "id": 206, + "type": "LTXVAddGuide", + "pos": [ + 2750, + 3430 + ], + "size": [ + 280, + 240 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 236 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 237 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 238 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 239 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 240 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 226 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 228 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 0.7 + ] + }, + { + "id": 207, + "type": "CFGGuider", + "pos": [ + 1990, + 2500 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 241 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 242 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 243 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 247 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 208, + "type": "SamplerEulerAncestral", + "pos": [ + 1990, + 2720 + ], + "size": [ + 280, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "eta", + "name": "eta", + "type": "FLOAT", + "widget": { + "name": "eta" + }, + "link": null + }, + { + "localized_name": "s_noise", + "name": "s_noise", + "type": "FLOAT", + "widget": { + "name": "s_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 248 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "SamplerEulerAncestral", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 209, + "type": "ManualSigmas", + "pos": [ + 1990, + 2910 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 249 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "1., 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 210, + "type": "LTXVConcatAVLatent", + "pos": [ + 1990, + 3090 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 244 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 245 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 250 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 211, + "type": "SamplerCustomAdvanced", + "pos": [ + 2460, + 2330 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 246 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 247 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 248 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 249 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 250 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 204 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 212, + "type": "ComfyMathExpression", + "pos": [ + 760, + 3970 + ], + "size": [ + 230, + 170 + ], + "flags": { + "collapsed": true + }, + "order": 17, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 235 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 223, + 234 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.17.0", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a" + ] + }, + { + "id": 213, + "type": "ResizeImageMaskNode", + "pos": [ + 1130, + 3340 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 251 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 208 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 209 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 210, + 224 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": { + "resize_type.width": true, + "resize_type.height": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 640, + 360, + "center", + "nearest-exact" + ] + }, + { + "id": 214, + "type": "ResizeImageMaskNode", + "pos": [ + 1130, + 3780 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 253 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 201 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 202 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 203 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": { + "resize_type.width": true, + "resize_type.height": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 640, + 360, + "center", + "nearest-exact" + ] + }, + { + "id": 215, + "type": "PrimitiveInt", + "pos": [ + 760, + 3340 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 201, + 208 + ] + } + ], + "title": "Width", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1280, + "fixed" + ] + }, + { + "id": 216, + "type": "PrimitiveInt", + "pos": [ + 760, + 3490 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 267 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 202, + 209 + ] + } + ], + "title": "height", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 720, + "fixed" + ] + }, + { + "id": 217, + "type": "CLIPTextEncode", + "pos": [ + 1320, + 2870 + ], + "size": [ + 590, + 200 + ], + "flags": { + "collapsed": false + }, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 230 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 222 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, unreadable text on shirt or hat, incorrect lettering on cap (“PNTR”), incorrect t-shirt slogan (“JUST DO IT”), missing microphone, misplaced microphone, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, smiling, laughing, exaggerated sadness, wrong gaze direction, eyes looking at camera, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, missing sniff sounds, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, missing door or shelves, missing shallow depth of field, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts." + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 218, + "type": "CreateVideo", + "pos": [ + 3280, + 2320 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 232 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 233 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 234 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 252 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 219, + "type": "VAEDecodeTiled", + "pos": [ + 2820, + 2630 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 211 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 212 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 232 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 64, + 4096, + 64 + ] + }, + { + "id": 220, + "type": "LTXVAudioVAEDecode", + "pos": [ + 2820, + 2920 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 216 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 217 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 233 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 221, + "type": "LTXVSeparateAVLatent", + "pos": [ + 2460, + 2580 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 204 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 215 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 222, + "type": "CLIPTextEncode", + "pos": [ + 1310, + 2380 + ], + "size": [ + 620, + 420 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 231 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 265 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 221 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 223, + "type": "CheckpointLoaderSimple", + "pos": [ + 770, + 2380 + ], + "size": [ + 420, + 160 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 276 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 241 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 212, + 227, + 238 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-fp8.safetensors" + ] + }, + { + "id": 224, + "type": "LTXVAudioVAELoader", + "pos": [ + 770, + 2660 + ], + "size": [ + 420, + 110 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 205, + 217 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "LTXVAudioVAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-fp8.safetensors" + ] + }, + { + "id": 225, + "type": "LTXAVTextEncoderLoader", + "pos": [ + 770, + 2890 + ], + "size": [ + 410, + 160 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 273 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 275 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 230, + 231 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "LTXAVTextEncoderLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + }, + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2.3-22b-distilled-fp8.safetensors", + "default" + ] + }, + { + "id": 226, + "type": "ComfyMathExpression", + "pos": [ + 760, + 4020 + ], + "size": [ + 400, + 200 + ], + "flags": { + "collapsed": true + }, + "order": 31, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 260 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 261 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 262, + 263 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a * b + 1" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Conditioning", + "bounding": [ + 1850, + 3250, + 1370, + 800 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Settings", + "bounding": [ + 730, + 3250, + 290, + 800 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "FIrst Frame", + "bounding": [ + 1050, + 3250, + 770, + 400 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Last Frame", + "bounding": [ + 1050, + 3680, + 770, + 370 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Model", + "bounding": [ + 730, + 2240, + 500, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Prompt", + "bounding": [ + 1260, + 2240, + 680, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Sampling", + "bounding": [ + 1970, + 2240, + 770, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 8, + "title": "Decoding", + "bounding": [ + 2770, + 2240, + 450, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 203, + "origin_id": 214, + "origin_slot": 0, + "target_id": 195, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 205, + "origin_id": 224, + "origin_slot": 0, + "target_id": 197, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 207, + "origin_id": 205, + "origin_slot": 0, + "target_id": 197, + "target_slot": 2, + "type": "INT" + }, + { + "id": 210, + "origin_id": 213, + "origin_slot": 0, + "target_id": 199, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 213, + "origin_id": 204, + "origin_slot": 0, + "target_id": 200, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 214, + "origin_id": 204, + "origin_slot": 1, + "target_id": 200, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 215, + "origin_id": 221, + "origin_slot": 0, + "target_id": 200, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 218, + "origin_id": 203, + "origin_slot": 0, + "target_id": 201, + "target_slot": 0, + "type": "INT" + }, + { + "id": 219, + "origin_id": 203, + "origin_slot": 1, + "target_id": 201, + "target_slot": 1, + "type": "INT" + }, + { + "id": 221, + "origin_id": 222, + "origin_slot": 0, + "target_id": 202, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 222, + "origin_id": 217, + "origin_slot": 0, + "target_id": 202, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 223, + "origin_id": 212, + "origin_slot": 0, + "target_id": 202, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 224, + "origin_id": 213, + "origin_slot": 0, + "target_id": 203, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 225, + "origin_id": 206, + "origin_slot": 0, + "target_id": 204, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 226, + "origin_id": 206, + "origin_slot": 1, + "target_id": 204, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 227, + "origin_id": 223, + "origin_slot": 2, + "target_id": 204, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 228, + "origin_id": 206, + "origin_slot": 2, + "target_id": 204, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 229, + "origin_id": 195, + "origin_slot": 0, + "target_id": 204, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 236, + "origin_id": 202, + "origin_slot": 0, + "target_id": 206, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 237, + "origin_id": 202, + "origin_slot": 1, + "target_id": 206, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 238, + "origin_id": 223, + "origin_slot": 2, + "target_id": 206, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 239, + "origin_id": 201, + "origin_slot": 0, + "target_id": 206, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 240, + "origin_id": 199, + "origin_slot": 0, + "target_id": 206, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 241, + "origin_id": 223, + "origin_slot": 0, + "target_id": 207, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 242, + "origin_id": 204, + "origin_slot": 0, + "target_id": 207, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 243, + "origin_id": 204, + "origin_slot": 1, + "target_id": 207, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 244, + "origin_id": 204, + "origin_slot": 2, + "target_id": 210, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 245, + "origin_id": 197, + "origin_slot": 0, + "target_id": 210, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 246, + "origin_id": 196, + "origin_slot": 0, + "target_id": 211, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 247, + "origin_id": 207, + "origin_slot": 0, + "target_id": 211, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 248, + "origin_id": 208, + "origin_slot": 0, + "target_id": 211, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 249, + "origin_id": 209, + "origin_slot": 0, + "target_id": 211, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 250, + "origin_id": 210, + "origin_slot": 0, + "target_id": 211, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 235, + "origin_id": 205, + "origin_slot": 0, + "target_id": 212, + "target_slot": 0, + "type": "INT" + }, + { + "id": 208, + "origin_id": 215, + "origin_slot": 0, + "target_id": 213, + "target_slot": 2, + "type": "INT" + }, + { + "id": 209, + "origin_id": 216, + "origin_slot": 0, + "target_id": 213, + "target_slot": 3, + "type": "INT" + }, + { + "id": 201, + "origin_id": 215, + "origin_slot": 0, + "target_id": 214, + "target_slot": 2, + "type": "INT" + }, + { + "id": 202, + "origin_id": 216, + "origin_slot": 0, + "target_id": 214, + "target_slot": 3, + "type": "INT" + }, + { + "id": 230, + "origin_id": 225, + "origin_slot": 0, + "target_id": 217, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 232, + "origin_id": 219, + "origin_slot": 0, + "target_id": 218, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 233, + "origin_id": 220, + "origin_slot": 0, + "target_id": 218, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 234, + "origin_id": 212, + "origin_slot": 0, + "target_id": 218, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 211, + "origin_id": 200, + "origin_slot": 2, + "target_id": 219, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 212, + "origin_id": 223, + "origin_slot": 2, + "target_id": 219, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 216, + "origin_id": 221, + "origin_slot": 1, + "target_id": 220, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 217, + "origin_id": 224, + "origin_slot": 0, + "target_id": 220, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 204, + "origin_id": 211, + "origin_slot": 1, + "target_id": 221, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 231, + "origin_id": 225, + "origin_slot": 0, + "target_id": 222, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 251, + "origin_id": -10, + "origin_slot": 0, + "target_id": 213, + "target_slot": 0, + "type": "IMAGE,MASK" + }, + { + "id": 253, + "origin_id": -10, + "origin_slot": 1, + "target_id": 214, + "target_slot": 0, + "type": "IMAGE,MASK" + }, + { + "id": 252, + "origin_id": 218, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 260, + "origin_id": 198, + "origin_slot": 0, + "target_id": 226, + "target_slot": 0, + "type": "INT" + }, + { + "id": 261, + "origin_id": 205, + "origin_slot": 0, + "target_id": 226, + "target_slot": 1, + "type": "INT" + }, + { + "id": 262, + "origin_id": 226, + "origin_slot": 1, + "target_id": 197, + "target_slot": 1, + "type": "INT" + }, + { + "id": 263, + "origin_id": 226, + "origin_slot": 1, + "target_id": 201, + "target_slot": 2, + "type": "INT" + }, + { + "id": 265, + "origin_id": -10, + "origin_slot": 2, + "target_id": 222, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 266, + "origin_id": -10, + "origin_slot": 3, + "target_id": 215, + "target_slot": 0, + "type": "INT" + }, + { + "id": 267, + "origin_id": -10, + "origin_slot": 4, + "target_id": 216, + "target_slot": 0, + "type": "INT" + }, + { + "id": 268, + "origin_id": -10, + "origin_slot": 5, + "target_id": 198, + "target_slot": 0, + "type": "INT" + }, + { + "id": 269, + "origin_id": -10, + "origin_slot": 6, + "target_id": 205, + "target_slot": 0, + "type": "INT" + }, + { + "id": 270, + "origin_id": -10, + "origin_slot": 7, + "target_id": 196, + "target_slot": 0, + "type": "INT" + }, + { + "id": 272, + "origin_id": -10, + "origin_slot": 8, + "target_id": 224, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 273, + "origin_id": -10, + "origin_slot": 9, + "target_id": 225, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 275, + "origin_id": -10, + "origin_slot": 8, + "target_id": 225, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 276, + "origin_id": -10, + "origin_slot": 8, + "target_id": 223, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video generation and editing/First-Last-Frame to Video", + "description": "Generates a video interpolating between first and last keyframes using LTX-2.3." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/First-Last-Frame to Video.json b/blueprints/First-Last-Frame to Video.json new file mode 100644 index 000000000..84dfafbcd --- /dev/null +++ b/blueprints/First-Last-Frame to Video.json @@ -0,0 +1,3361 @@ +{ + "revision": 0, + "last_node_id": 227, + "last_link_id": 0, + "nodes": [ + { + "id": 227, + "type": "283e4561-61a2-4538-b960-265736eb041f", + "pos": [ + 620, + 3140 + ], + "size": [ + 540, + 0 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "first_frame", + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": null + }, + { + "label": "last_frame", + "localized_name": "input_1", + "name": "input_1", + "type": "IMAGE,MASK", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "width", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "height", + "name": "value_1", + "type": "INT", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "label": "duration", + "name": "value_2", + "type": "INT", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "fps", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "label": "ckpt_name", + "name": "ckpt_name_1", + "type": "COMBO", + "widget": { + "name": "ckpt_name_1" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "title": "First-Last-Frame to Video", + "properties": { + "proxyWidgets": [ + [ + "222", + "text" + ], + [ + "215", + "value" + ], + [ + "216", + "value" + ], + [ + "198", + "value" + ], + [ + "205", + "value" + ], + [ + "196", + "noise_seed" + ], + [ + "224", + "ckpt_name" + ], + [ + "225", + "text_encoder" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + } + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "283e4561-61a2-4538-b960-265736eb041f", + "version": 1, + "state": { + "lastGroupId": 22, + "lastNodeId": 227, + "lastLinkId": 276, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "First-Last-Frame to Video", + "inputNode": { + "id": -10, + "bounding": [ + 270, + 3100, + 120, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3620, + 3120, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "6fe179c4-d96f-4383-b202-844f6de4922e", + "name": "input", + "type": "IMAGE,MASK", + "linkIds": [ + 251 + ], + "localized_name": "input", + "label": "first_frame", + "pos": [ + 370, + 3120 + ] + }, + { + "id": "e80df1ae-5f39-4f86-91bd-0467635e2f2d", + "name": "input_1", + "type": "IMAGE,MASK", + "linkIds": [ + 253 + ], + "localized_name": "input_1", + "label": "last_frame", + "pos": [ + 370, + 3140 + ] + }, + { + "id": "433148fa-bf73-4ab1-81d9-09e2e38ed861", + "name": "text", + "type": "STRING", + "linkIds": [ + 265 + ], + "pos": [ + 370, + 3160 + ] + }, + { + "id": "36915bc8-a6ed-4d48-8619-e0e8723228e9", + "name": "value", + "type": "INT", + "linkIds": [ + 266 + ], + "label": "width", + "pos": [ + 370, + 3180 + ] + }, + { + "id": "425a36b8-91ab-41b7-81e9-496eba064ec8", + "name": "value_1", + "type": "INT", + "linkIds": [ + 267 + ], + "label": "height", + "pos": [ + 370, + 3200 + ] + }, + { + "id": "0c9e003b-bd07-4b7d-aa6d-789e138ed161", + "name": "value_2", + "type": "INT", + "linkIds": [ + 268 + ], + "label": "duration", + "pos": [ + 370, + 3220 + ] + }, + { + "id": "581b52ff-21c5-4774-ac2a-8f69a7e09e2e", + "name": "value_3", + "type": "INT", + "linkIds": [ + 269 + ], + "label": "fps", + "pos": [ + 370, + 3240 + ] + }, + { + "id": "d03cc171-45da-4658-99aa-77252bbcf522", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 270 + ], + "pos": [ + 370, + 3260 + ] + }, + { + "id": "e68e61c8-905e-43ac-8c76-65ac52270a08", + "name": "ckpt_name_1", + "type": "COMBO", + "linkIds": [ + 272, + 275, + 276 + ], + "label": "ckpt_name", + "pos": [ + 370, + 3280 + ] + }, + { + "id": "5d065f3b-891b-499f-950b-c2df0be24536", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 273 + ], + "pos": [ + 370, + 3300 + ] + } + ], + "outputs": [ + { + "id": "0c8c2dc0-c67c-4bc2-9e57-6aa00db2e3a9", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 252 + ], + "localized_name": "VIDEO", + "pos": [ + 3640, + 3140 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 195, + "type": "LTXVPreprocess", + "pos": [ + 1480, + 3780 + ], + "size": [ + 230, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 203 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 229 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 196, + "type": "RandomNoise", + "pos": [ + 1990, + 2320 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 270 + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 246 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": { + "noise_seed": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 315253765879496, + "randomize" + ] + }, + { + "id": 197, + "type": "LTXVEmptyLatentAudio", + "pos": [ + 2090, + 3820 + ], + "size": [ + 280, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 205 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 262 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 207 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 245 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 198, + "type": "PrimitiveInt", + "pos": [ + 760, + 3650 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 268 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 260 + ] + } + ], + "title": "Duration", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5, + "fixed" + ] + }, + { + "id": 199, + "type": "LTXVPreprocess", + "pos": [ + 1480, + 3340 + ], + "size": [ + 230, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 210 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 240 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 200, + "type": "LTXVCropGuides", + "pos": [ + 2820, + 2450 + ], + "size": [ + 280, + 120 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 213 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 214 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 215 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 211 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 201, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 2090, + 3580 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 218 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 219 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 263 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 239 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 202, + "type": "LTXVConditioning", + "pos": [ + 2090, + 3400 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 221 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 222 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 223 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 236 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 237 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 203, + "type": "GetImageSize", + "pos": [ + 1480, + 3500 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 224 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 218 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 219 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 204, + "type": "LTXVAddGuide", + "pos": [ + 2750, + 3700 + ], + "size": [ + 280, + 240 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 225 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 226 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 227 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 228 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 229 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 213, + 242 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 214, + 243 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 244 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.12.3", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + -1, + 0.7 + ] + }, + { + "id": 205, + "type": "PrimitiveInt", + "pos": [ + 760, + 3800 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 269 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 207, + 235, + 261 + ] + } + ], + "title": "Frame Rate(int)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25, + "fixed" + ] + }, + { + "id": 206, + "type": "LTXVAddGuide", + "pos": [ + 2750, + 3430 + ], + "size": [ + 280, + 240 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 236 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 237 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 238 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 239 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 240 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 226 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 228 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.12.3", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 0.7 + ] + }, + { + "id": 207, + "type": "CFGGuider", + "pos": [ + 1990, + 2500 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 241 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 242 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 243 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 247 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 208, + "type": "SamplerEulerAncestral", + "pos": [ + 1990, + 2720 + ], + "size": [ + 280, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "eta", + "name": "eta", + "type": "FLOAT", + "widget": { + "name": "eta" + }, + "link": null + }, + { + "localized_name": "s_noise", + "name": "s_noise", + "type": "FLOAT", + "widget": { + "name": "s_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 248 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerEulerAncestral", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 209, + "type": "ManualSigmas", + "pos": [ + 1990, + 2910 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 249 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "1., 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 210, + "type": "LTXVConcatAVLatent", + "pos": [ + 1990, + 3090 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 244 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 245 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 250 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 211, + "type": "SamplerCustomAdvanced", + "pos": [ + 2460, + 2330 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 246 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 247 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 248 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 249 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 250 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 204 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 212, + "type": "ComfyMathExpression", + "pos": [ + 760, + 3970 + ], + "size": [ + 230, + 170 + ], + "flags": { + "collapsed": true + }, + "order": 17, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 235 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 223, + 234 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.17.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a" + ] + }, + { + "id": 213, + "type": "ResizeImageMaskNode", + "pos": [ + 1130, + 3340 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 251 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 208 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 209 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 210, + 224 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": { + "resize_type.width": true, + "resize_type.height": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 640, + 360, + "center", + "nearest-exact" + ] + }, + { + "id": 214, + "type": "ResizeImageMaskNode", + "pos": [ + 1130, + 3780 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 253 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 201 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 202 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 203 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": { + "resize_type.width": true, + "resize_type.height": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 640, + 360, + "center", + "nearest-exact" + ] + }, + { + "id": 215, + "type": "PrimitiveInt", + "pos": [ + 760, + 3340 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 201, + 208 + ] + } + ], + "title": "Width", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1280, + "fixed" + ] + }, + { + "id": 216, + "type": "PrimitiveInt", + "pos": [ + 760, + 3490 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 267 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 202, + 209 + ] + } + ], + "title": "height", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 720, + "fixed" + ] + }, + { + "id": 217, + "type": "CLIPTextEncode", + "pos": [ + 1320, + 2870 + ], + "size": [ + 590, + 200 + ], + "flags": { + "collapsed": false + }, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 230 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 222 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, unreadable text on shirt or hat, incorrect lettering on cap (“PNTR”), incorrect t-shirt slogan (“JUST DO IT”), missing microphone, misplaced microphone, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, smiling, laughing, exaggerated sadness, wrong gaze direction, eyes looking at camera, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, missing sniff sounds, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, missing door or shelves, missing shallow depth of field, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts." + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 218, + "type": "CreateVideo", + "pos": [ + 3280, + 2320 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 232 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 233 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 234 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 252 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 219, + "type": "VAEDecodeTiled", + "pos": [ + 2820, + 2630 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 211 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 212 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 232 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 64, + 4096, + 64 + ] + }, + { + "id": 220, + "type": "LTXVAudioVAEDecode", + "pos": [ + 2820, + 2920 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 216 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 217 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 233 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 221, + "type": "LTXVSeparateAVLatent", + "pos": [ + 2460, + 2580 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 204 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 215 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 222, + "type": "CLIPTextEncode", + "pos": [ + 1310, + 2380 + ], + "size": [ + 620, + 420 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 231 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 265 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 221 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.5.2", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 223, + "type": "CheckpointLoaderSimple", + "pos": [ + 770, + 2380 + ], + "size": [ + 420, + 160 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 276 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 241 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 212, + 227, + 238 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-fp8.safetensors" + ] + }, + { + "id": 224, + "type": "LTXVAudioVAELoader", + "pos": [ + 770, + 2660 + ], + "size": [ + 420, + 110 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 205, + 217 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "Node name for S&R": "LTXVAudioVAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-fp8.safetensors" + ] + }, + { + "id": 225, + "type": "LTXAVTextEncoderLoader", + "pos": [ + 770, + 2890 + ], + "size": [ + 410, + 160 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 273 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 275 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 230, + 231 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.5.2" + }, + "Node name for S&R": "LTXAVTextEncoderLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + }, + { + "name": "ltx-2.3-22b-distilled-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2.3-22b-distilled-fp8.safetensors", + "default" + ] + }, + { + "id": 226, + "type": "ComfyMathExpression", + "pos": [ + 760, + 4020 + ], + "size": [ + 400, + 200 + ], + "flags": { + "collapsed": true + }, + "order": 31, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 260 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 261 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 262, + 263 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a * b + 1" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Conditioning", + "bounding": [ + 1850, + 3250, + 1370, + 800 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Settings", + "bounding": [ + 730, + 3250, + 290, + 800 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "FIrst Frame", + "bounding": [ + 1050, + 3250, + 770, + 400 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Last Frame", + "bounding": [ + 1050, + 3680, + 770, + 370 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Model", + "bounding": [ + 730, + 2240, + 500, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Prompt", + "bounding": [ + 1260, + 2240, + 680, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Sampling", + "bounding": [ + 1970, + 2240, + 770, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 8, + "title": "Decoding", + "bounding": [ + 2770, + 2240, + 450, + 980 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 203, + "origin_id": 214, + "origin_slot": 0, + "target_id": 195, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 205, + "origin_id": 224, + "origin_slot": 0, + "target_id": 197, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 207, + "origin_id": 205, + "origin_slot": 0, + "target_id": 197, + "target_slot": 2, + "type": "INT" + }, + { + "id": 210, + "origin_id": 213, + "origin_slot": 0, + "target_id": 199, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 213, + "origin_id": 204, + "origin_slot": 0, + "target_id": 200, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 214, + "origin_id": 204, + "origin_slot": 1, + "target_id": 200, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 215, + "origin_id": 221, + "origin_slot": 0, + "target_id": 200, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 218, + "origin_id": 203, + "origin_slot": 0, + "target_id": 201, + "target_slot": 0, + "type": "INT" + }, + { + "id": 219, + "origin_id": 203, + "origin_slot": 1, + "target_id": 201, + "target_slot": 1, + "type": "INT" + }, + { + "id": 221, + "origin_id": 222, + "origin_slot": 0, + "target_id": 202, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 222, + "origin_id": 217, + "origin_slot": 0, + "target_id": 202, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 223, + "origin_id": 212, + "origin_slot": 0, + "target_id": 202, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 224, + "origin_id": 213, + "origin_slot": 0, + "target_id": 203, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 225, + "origin_id": 206, + "origin_slot": 0, + "target_id": 204, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 226, + "origin_id": 206, + "origin_slot": 1, + "target_id": 204, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 227, + "origin_id": 223, + "origin_slot": 2, + "target_id": 204, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 228, + "origin_id": 206, + "origin_slot": 2, + "target_id": 204, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 229, + "origin_id": 195, + "origin_slot": 0, + "target_id": 204, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 236, + "origin_id": 202, + "origin_slot": 0, + "target_id": 206, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 237, + "origin_id": 202, + "origin_slot": 1, + "target_id": 206, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 238, + "origin_id": 223, + "origin_slot": 2, + "target_id": 206, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 239, + "origin_id": 201, + "origin_slot": 0, + "target_id": 206, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 240, + "origin_id": 199, + "origin_slot": 0, + "target_id": 206, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 241, + "origin_id": 223, + "origin_slot": 0, + "target_id": 207, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 242, + "origin_id": 204, + "origin_slot": 0, + "target_id": 207, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 243, + "origin_id": 204, + "origin_slot": 1, + "target_id": 207, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 244, + "origin_id": 204, + "origin_slot": 2, + "target_id": 210, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 245, + "origin_id": 197, + "origin_slot": 0, + "target_id": 210, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 246, + "origin_id": 196, + "origin_slot": 0, + "target_id": 211, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 247, + "origin_id": 207, + "origin_slot": 0, + "target_id": 211, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 248, + "origin_id": 208, + "origin_slot": 0, + "target_id": 211, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 249, + "origin_id": 209, + "origin_slot": 0, + "target_id": 211, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 250, + "origin_id": 210, + "origin_slot": 0, + "target_id": 211, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 235, + "origin_id": 205, + "origin_slot": 0, + "target_id": 212, + "target_slot": 0, + "type": "INT" + }, + { + "id": 208, + "origin_id": 215, + "origin_slot": 0, + "target_id": 213, + "target_slot": 2, + "type": "INT" + }, + { + "id": 209, + "origin_id": 216, + "origin_slot": 0, + "target_id": 213, + "target_slot": 3, + "type": "INT" + }, + { + "id": 201, + "origin_id": 215, + "origin_slot": 0, + "target_id": 214, + "target_slot": 2, + "type": "INT" + }, + { + "id": 202, + "origin_id": 216, + "origin_slot": 0, + "target_id": 214, + "target_slot": 3, + "type": "INT" + }, + { + "id": 230, + "origin_id": 225, + "origin_slot": 0, + "target_id": 217, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 232, + "origin_id": 219, + "origin_slot": 0, + "target_id": 218, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 233, + "origin_id": 220, + "origin_slot": 0, + "target_id": 218, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 234, + "origin_id": 212, + "origin_slot": 0, + "target_id": 218, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 211, + "origin_id": 200, + "origin_slot": 2, + "target_id": 219, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 212, + "origin_id": 223, + "origin_slot": 2, + "target_id": 219, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 216, + "origin_id": 221, + "origin_slot": 1, + "target_id": 220, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 217, + "origin_id": 224, + "origin_slot": 0, + "target_id": 220, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 204, + "origin_id": 211, + "origin_slot": 1, + "target_id": 221, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 231, + "origin_id": 225, + "origin_slot": 0, + "target_id": 222, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 251, + "origin_id": -10, + "origin_slot": 0, + "target_id": 213, + "target_slot": 0, + "type": "IMAGE,MASK" + }, + { + "id": 253, + "origin_id": -10, + "origin_slot": 1, + "target_id": 214, + "target_slot": 0, + "type": "IMAGE,MASK" + }, + { + "id": 252, + "origin_id": 218, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 260, + "origin_id": 198, + "origin_slot": 0, + "target_id": 226, + "target_slot": 0, + "type": "INT" + }, + { + "id": 261, + "origin_id": 205, + "origin_slot": 0, + "target_id": 226, + "target_slot": 1, + "type": "INT" + }, + { + "id": 262, + "origin_id": 226, + "origin_slot": 1, + "target_id": 197, + "target_slot": 1, + "type": "INT" + }, + { + "id": 263, + "origin_id": 226, + "origin_slot": 1, + "target_id": 201, + "target_slot": 2, + "type": "INT" + }, + { + "id": 265, + "origin_id": -10, + "origin_slot": 2, + "target_id": 222, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 266, + "origin_id": -10, + "origin_slot": 3, + "target_id": 215, + "target_slot": 0, + "type": "INT" + }, + { + "id": 267, + "origin_id": -10, + "origin_slot": 4, + "target_id": 216, + "target_slot": 0, + "type": "INT" + }, + { + "id": 268, + "origin_id": -10, + "origin_slot": 5, + "target_id": 198, + "target_slot": 0, + "type": "INT" + }, + { + "id": 269, + "origin_id": -10, + "origin_slot": 6, + "target_id": 205, + "target_slot": 0, + "type": "INT" + }, + { + "id": 270, + "origin_id": -10, + "origin_slot": 7, + "target_id": 196, + "target_slot": 0, + "type": "INT" + }, + { + "id": 272, + "origin_id": -10, + "origin_slot": 8, + "target_id": 224, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 273, + "origin_id": -10, + "origin_slot": 9, + "target_id": 225, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 275, + "origin_id": -10, + "origin_slot": 8, + "target_id": 225, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 276, + "origin_id": -10, + "origin_slot": 8, + "target_id": 223, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video generation and editing/First-Last-Frame to Video", + "description": "Generates a video that interpolates between the first and last keyframes using LTX-2.3, including optional audio." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Frame Interpolation.json b/blueprints/Frame Interpolation.json new file mode 100644 index 000000000..8e183de7e --- /dev/null +++ b/blueprints/Frame Interpolation.json @@ -0,0 +1,858 @@ +{ + "revision": 0, + "last_node_id": 16, + "last_link_id": 0, + "nodes": [ + { + "id": 16, + "type": "022693be-2baa-4009-870a-28921508a7ef", + "pos": [ + -2990, + -3240 + ], + "size": [ + 410, + 200 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "multiplier", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "enable_fps_multiplier", + "name": "value_1", + "type": "BOOLEAN", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "label": "VIDEO", + "name": "VIDEO_1", + "type": "VIDEO", + "links": [] + }, + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "9", + "value" + ], + [ + "13", + "value" + ], + [ + "1", + "model_name" + ] + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [], + "title": "Frame Interpolation" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "022693be-2baa-4009-870a-28921508a7ef", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 17, + "lastLinkId": 28, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Frame Interpolation", + "inputNode": { + "id": -10, + "bounding": [ + -2810, + -3070, + 159.7421875, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -1270, + -3075, + 120, + 80 + ] + }, + "inputs": [ + { + "id": "05e31c51-dcb6-4a1e-9651-1b9ad4f7a287", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 2 + ], + "localized_name": "video", + "pos": [ + -2670.2578125, + -3050 + ] + }, + { + "id": "feecb409-7d1c-4a99-9c63-50c5fecdd3c9", + "name": "value", + "type": "INT", + "linkIds": [ + 22 + ], + "label": "multiplier", + "pos": [ + -2670.2578125, + -3030 + ] + }, + { + "id": "0b8a861b-b581-4068-9e8c-f8d15daf1ca6", + "name": "value_1", + "type": "BOOLEAN", + "linkIds": [ + 23 + ], + "label": "enable_fps_multiplier", + "pos": [ + -2670.2578125, + -3010 + ] + }, + { + "id": "a22b101e-8773-4e17-a297-7ee3aae09162", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 24 + ], + "pos": [ + -2670.2578125, + -2990 + ] + } + ], + "outputs": [ + { + "id": "ef2ada05-d5aa-492a-9394-6c3e71e39ebb", + "name": "VIDEO_1", + "type": "VIDEO", + "linkIds": [ + 26 + ], + "label": "VIDEO", + "pos": [ + -1250, + -3055 + ] + }, + { + "id": "5aacc622-2a07-4983-b31c-e04461f7f953", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 28 + ], + "pos": [ + -1250, + -3035 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "FrameInterpolationModelLoader", + "pos": [ + -2510, + -3370 + ], + "size": [ + 370, + 90 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 24 + } + ], + "outputs": [ + { + "localized_name": "INTERP_MODEL", + "name": "INTERP_MODEL", + "type": "INTERP_MODEL", + "links": [ + 1 + ] + } + ], + "properties": { + "Node name for S&R": "FrameInterpolationModelLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3", + "models": [ + { + "name": "film_net_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/frame_interpolation/resolve/main/frame_interpolation/film_net_fp16.safetensors", + "directory": "frame_interpolation" + } + ] + }, + "widgets_values": [ + "film_net_fp16.safetensors" + ] + }, + { + "id": 2, + "type": "FrameInterpolate", + "pos": [ + -2040, + -3370 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "interp_model", + "name": "interp_model", + "type": "INTERP_MODEL", + "link": 1 + }, + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 3 + }, + { + "localized_name": "multiplier", + "name": "multiplier", + "type": "INT", + "widget": { + "name": "multiplier" + }, + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 4, + 28 + ] + } + ], + "properties": { + "Node name for S&R": "FrameInterpolate", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + 2 + ] + }, + { + "id": 5, + "type": "CreateVideo", + "pos": [ + -1600, + -3370 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 4 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 5 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 12 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 26 + ] + } + ], + "properties": { + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + 30 + ] + }, + { + "id": 9, + "type": "PrimitiveInt", + "pos": [ + -2500, + -2970 + ], + "size": [ + 270, + 90 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 22 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 8, + 19 + ] + } + ], + "title": "Int (Multiplier)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + 2, + "fixed" + ] + }, + { + "id": 10, + "type": "ComfySwitchNode", + "pos": [ + -1610, + -3120 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 11 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 13 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 15 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 12 + ] + } + ], + "properties": { + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + true + ] + }, + { + "id": 13, + "type": "PrimitiveBoolean", + "pos": [ + -2500, + -2770 + ], + "size": [ + 310, + 90 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 23 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 15 + ] + } + ], + "title": "Boolean (Apply multiplier to FPS?)", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + true + ] + }, + { + "id": 3, + "type": "GetVideoComponents", + "pos": [ + -2500, + -3170 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 2 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 3 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 5 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 11, + 18 + ] + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + } + }, + { + "id": 11, + "type": "ComfyMathExpression", + "pos": [ + -2090, + -3070 + ], + "size": [ + 400, + 210 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 18 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 19 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 13 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "cnr_id": "comfy-core", + "ver": "0.19.3" + }, + "widgets_values": [ + "min(abs(b), 16) * a" + ] + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": 1, + "origin_slot": 0, + "target_id": 2, + "target_slot": 0, + "type": "INTERP_MODEL" + }, + { + "id": 3, + "origin_id": 3, + "origin_slot": 0, + "target_id": 2, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 8, + "origin_id": 9, + "origin_slot": 0, + "target_id": 2, + "target_slot": 2, + "type": "INT" + }, + { + "id": 4, + "origin_id": 2, + "origin_slot": 0, + "target_id": 5, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 5, + "origin_id": 3, + "origin_slot": 1, + "target_id": 5, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 12, + "origin_id": 10, + "origin_slot": 0, + "target_id": 5, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 11, + "origin_id": 3, + "origin_slot": 2, + "target_id": 10, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 13, + "origin_id": 11, + "origin_slot": 0, + "target_id": 10, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 15, + "origin_id": 13, + "origin_slot": 0, + "target_id": 10, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 18, + "origin_id": 3, + "origin_slot": 2, + "target_id": 11, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 19, + "origin_id": 9, + "origin_slot": 0, + "target_id": 11, + "target_slot": 1, + "type": "INT" + }, + { + "id": 2, + "origin_id": -10, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 22, + "origin_id": -10, + "origin_slot": 1, + "target_id": 9, + "target_slot": 0, + "type": "INT" + }, + { + "id": 23, + "origin_id": -10, + "origin_slot": 2, + "target_id": 13, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 24, + "origin_id": -10, + "origin_slot": 3, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 26, + "origin_id": 5, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 28, + "origin_id": 2, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + } + ], + "extra": {}, + "category": "Video Tools", + "description": "Increases video frame rate by synthesizing intermediate frames with a frame interpolation model." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Get Any Video Frame.json b/blueprints/Get Any Video Frame.json new file mode 100644 index 000000000..9ff0f8e6e --- /dev/null +++ b/blueprints/Get Any Video Frame.json @@ -0,0 +1,485 @@ +{ + "revision": 0, + "last_node_id": 98, + "last_link_id": 0, + "nodes": [ + { + "id": 98, + "type": "dca6e78d-fb06-421e-97f7-6ce17a665260", + "pos": [ + -410, + -2230 + ], + "size": [ + 270, + 104 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "frame_index", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "title": "Get Any Video Frame", + "properties": { + "proxyWidgets": [ + [ + "100", + "value" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "dca6e78d-fb06-421e-97f7-6ce17a665260", + "version": 1, + "state": { + "lastGroupId": 1, + "lastNodeId": 136, + "lastLinkId": 302, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Get Any Video Frame", + "inputNode": { + "id": -10, + "bounding": [ + 380, + -57, + 120, + 80 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1460, + -57, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "2ceec378-8dcf-4340-8570-155967f59a93", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 4 + ], + "pos": [ + 480, + -37 + ] + }, + { + "id": "819955f6-c686-4896-8032-ff2d0059109a", + "name": "value", + "type": "INT", + "linkIds": [ + 283 + ], + "label": "frame_index", + "pos": [ + 480, + -17 + ] + } + ], + "outputs": [ + { + "id": "1ab0684d-6a44-45b6-8aa4-a0b971a1d41e", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 5 + ], + "pos": [ + 1480, + -37 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "GetVideoComponents", + "pos": [ + 560, + -150 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 4 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 1, + 2 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents" + } + }, + { + "id": 2, + "type": "GetImageSize", + "pos": [ + 560, + 50 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 1 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": null + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": null + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 285 + ] + } + ], + "properties": { + "Node name for S&R": "GetImageSize" + } + }, + { + "id": 3, + "type": "ImageFromBatch", + "pos": [ + 1130, + -150 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 2 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": 286 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 5 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch" + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 99, + "type": "ComfyMathExpression", + "pos": [ + 910, + 100 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 284 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 285 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 286 + ] + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "min(max(int(a if a >= 0 else b + a), 0), b - 1)" + ] + }, + { + "id": 100, + "type": "PrimitiveInt", + "pos": [ + 560, + 250 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 283 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 284 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 0, + "fixed" + ] + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": 1, + "origin_slot": 0, + "target_id": 2, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 2, + "origin_id": 1, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 4, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 5, + "origin_id": 3, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 283, + "origin_id": -10, + "origin_slot": 1, + "target_id": 100, + "target_slot": 0, + "type": "INT" + }, + { + "id": 284, + "origin_id": 100, + "origin_slot": 0, + "target_id": 99, + "target_slot": 0, + "type": "INT" + }, + { + "id": 285, + "origin_id": 2, + "origin_slot": 2, + "target_id": 99, + "target_slot": 1, + "type": "INT" + }, + { + "id": 286, + "origin_id": 99, + "origin_slot": 1, + "target_id": 3, + "target_slot": 1, + "type": "INT" + } + ], + "extra": {}, + "category": "Video Tools", + "description": "Extracts one image frame from a video at a chosen index, with optional trim and FPS control." + } + ] + }, + "extra": { + "ds": { + "scale": 1.197015527856339, + "offset": [ + -168.76833554248222, + 540.6638955283997 + ] + }, + "frontendVersion": "1.42.8" + } +} \ No newline at end of file diff --git a/blueprints/Glow.json b/blueprints/Glow.json index 590445c06..2bbfdee51 100644 --- a/blueprints/Glow.json +++ b/blueprints/Glow.json @@ -1 +1,583 @@ -{"revision": 0, "last_node_id": 37, "last_link_id": 0, "nodes": [{"id": 37, "type": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", "pos": [2160, -360], "size": [260, 154], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["34", "value"], ["35", "value"], ["33", "value"], ["31", "choice"], ["32", "color"]]}, "widgets_values": [], "title": "Glow"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 36, "lastLinkId": 53, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Glow", "inputNode": {"id": -10, "bounding": [2110, -165, 120, 60]}, "outputNode": {"id": -20, "bounding": [3170, -165, 120, 60]}, "inputs": [{"id": "ffc7cf94-be90-4d56-a3b8-d0514d61c015", "name": "images.image0", "type": "IMAGE", "linkIds": [45], "localized_name": "images.image0", "label": "image", "pos": [2210, -145]}], "outputs": [{"id": "04986101-50be-4762-8957-8e2a5e460bbb", "name": "IMAGE0", "type": "IMAGE", "linkIds": [53], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [3190, -145]}], "widgets": [], "nodes": [{"id": 30, "type": "GLSLShader", "pos": [2590, -520], "size": [520, 272], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 45}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 51}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 50}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 52}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 46}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 47}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [53]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Blend mode\nuniform int u_int1; // Color tint\nuniform float u_float0; // Intensity\nuniform float u_float1; // Radius\nuniform float u_float2; // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD = 0;\nconst int BLEND_SCREEN = 1;\nconst int BLEND_SOFT = 2;\nconst int BLEND_OVERLAY = 3;\nconst int BLEND_LIGHTEN = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n p = fract(p * vec2(123.34, 456.21));\n p += dot(p, p + 45.32);\n return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n return vec3(\n float((h >> 16) & 255),\n float((h >> 8) & 255),\n float(h & 255)\n ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n if (mode == BLEND_SCREEN) {\n return 1.0 - (1.0 - base) * (1.0 - glow);\n }\n if (mode == BLEND_SOFT) {\n return mix(\n base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n step(0.5, glow)\n );\n }\n if (mode == BLEND_OVERLAY) {\n return mix(\n 2.0 * base * glow,\n 1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n step(0.5, base)\n );\n }\n if (mode == BLEND_LIGHTEN) {\n return max(base, glow);\n }\n return base + glow;\n}\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n \n float intensity = u_float0 * 0.05;\n float radius = u_float1 * u_float1 * 0.012;\n \n if (intensity < 0.001 || radius < 0.1) {\n fragColor = original;\n return;\n }\n \n float threshold = 1.0 - u_float2 * 0.01;\n float t0 = threshold - 0.15;\n float t1 = threshold + 0.15;\n \n vec2 texelSize = 1.0 / u_resolution;\n float radius2 = radius * radius;\n \n float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n int samples = int(float(MAX_SAMPLES) * sampleScale);\n \n float noise = hash(gl_FragCoord.xy);\n float angleOffset = noise * GOLDEN_ANGLE;\n float radiusJitter = 0.85 + noise * 0.3;\n \n float ca = cos(GOLDEN_ANGLE);\n float sa = sin(GOLDEN_ANGLE);\n vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n \n vec3 glow = vec3(0.0);\n float totalWeight = 0.0;\n \n // Center tap\n float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n glow += original.rgb * centerMask * 2.0;\n totalWeight += 2.0;\n \n for (int i = 1; i < MAX_SAMPLES; i++) {\n if (i >= samples) break;\n \n float fi = float(i);\n float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n \n vec2 offset = dir * dist * texelSize;\n vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n float mask = smoothstep(t0, t1, dot(c, LUMA));\n \n float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n w = max(w, 0.0);\n w *= w;\n \n glow += c * mask * w;\n totalWeight += w;\n \n dir = vec2(\n dir.x * ca - dir.y * sa,\n dir.x * sa + dir.y * ca\n );\n }\n \n glow *= intensity / max(totalWeight, 0.001);\n \n if (u_int1 > 0) {\n glow *= hexToRgb(u_int1);\n }\n \n vec3 result = blend(original.rgb, glow, u_int0);\n result += (noise - 0.5) * (1.0 / 255.0);\n \n fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}", "from_input"]}, {"id": 34, "type": "PrimitiveFloat", "pos": [2290, -510], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "intensity", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [51]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [30]}, {"id": 35, "type": "PrimitiveFloat", "pos": [2290, -410], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [50]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [25]}, {"id": 33, "type": "PrimitiveFloat", "pos": [2290, -310], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [52]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [100]}, {"id": 32, "type": "ColorToRGBInt", "pos": [2290, -210], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "color_tint", "localized_name": "color", "name": "color", "type": "COLOR", "widget": {"name": "color"}, "link": null}], "outputs": [{"localized_name": "rgb_int", "name": "rgb_int", "type": "INT", "links": [47]}], "properties": {"Node name for S&R": "ColorToRGBInt"}, "widgets_values": ["#45edf5"]}, {"id": 31, "type": "CustomCombo", "pos": [2290, -110], "size": [270, 222], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "blend_mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [46]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["add", 0, "add", "screen", "soft", "overlay", "lighten", ""]}], "groups": [], "links": [{"id": 51, "origin_id": 34, "origin_slot": 0, "target_id": 30, "target_slot": 2, "type": "FLOAT"}, {"id": 50, "origin_id": 35, "origin_slot": 0, "target_id": 30, "target_slot": 3, "type": "FLOAT"}, {"id": 52, "origin_id": 33, "origin_slot": 0, "target_id": 30, "target_slot": 4, "type": "FLOAT"}, {"id": 46, "origin_id": 31, "origin_slot": 1, "target_id": 30, "target_slot": 6, "type": "INT"}, {"id": 47, "origin_id": 32, "origin_slot": 0, "target_id": 30, "target_slot": 7, "type": "INT"}, {"id": 45, "origin_id": -10, "origin_slot": 0, "target_id": 30, "target_slot": 0, "type": "IMAGE"}, {"id": 53, "origin_id": 30, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 37, + "last_link_id": 0, + "nodes": [ + { + "id": 37, + "type": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", + "pos": [ + 2160, + -360 + ], + "size": [ + 260, + 154 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "34", + "value" + ], + [ + "35", + "value" + ], + [ + "33", + "value" + ], + [ + "31", + "choice" + ], + [ + "32", + "color" + ] + ] + }, + "widgets_values": [], + "title": "Glow" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 36, + "lastLinkId": 53, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Glow", + "inputNode": { + "id": -10, + "bounding": [ + 2110, + -165, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3170, + -165, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "ffc7cf94-be90-4d56-a3b8-d0514d61c015", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 45 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 2210, + -145 + ] + } + ], + "outputs": [ + { + "id": "04986101-50be-4762-8957-8e2a5e460bbb", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 53 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 3190, + -145 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 30, + "type": "GLSLShader", + "pos": [ + 2590, + -520 + ], + "size": [ + 520, + 272 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 45 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 51 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 50 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 52 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 46 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": 47 + }, + { + "label": "u_int2", + "localized_name": "ints.u_int2", + "name": "ints.u_int2", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 53 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform int u_int0; // Blend mode\nuniform int u_int1; // Color tint\nuniform float u_float0; // Intensity\nuniform float u_float1; // Radius\nuniform float u_float2; // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD = 0;\nconst int BLEND_SCREEN = 1;\nconst int BLEND_SOFT = 2;\nconst int BLEND_OVERLAY = 3;\nconst int BLEND_LIGHTEN = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n p = fract(p * vec2(123.34, 456.21));\n p += dot(p, p + 45.32);\n return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n return vec3(\n float((h >> 16) & 255),\n float((h >> 8) & 255),\n float(h & 255)\n ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n if (mode == BLEND_SCREEN) {\n return 1.0 - (1.0 - base) * (1.0 - glow);\n }\n if (mode == BLEND_SOFT) {\n return mix(\n base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n step(0.5, glow)\n );\n }\n if (mode == BLEND_OVERLAY) {\n return mix(\n 2.0 * base * glow,\n 1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n step(0.5, base)\n );\n }\n if (mode == BLEND_LIGHTEN) {\n return max(base, glow);\n }\n return base + glow;\n}\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n \n float intensity = u_float0 * 0.05;\n float radius = u_float1 * u_float1 * 0.012;\n \n if (intensity < 0.001 || radius < 0.1) {\n fragColor = original;\n return;\n }\n \n float threshold = 1.0 - u_float2 * 0.01;\n float t0 = threshold - 0.15;\n float t1 = threshold + 0.15;\n \n vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n float radius2 = radius * radius;\n \n float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n int samples = int(float(MAX_SAMPLES) * sampleScale);\n \n float noise = hash(gl_FragCoord.xy);\n float angleOffset = noise * GOLDEN_ANGLE;\n float radiusJitter = 0.85 + noise * 0.3;\n \n float ca = cos(GOLDEN_ANGLE);\n float sa = sin(GOLDEN_ANGLE);\n vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n \n vec3 glow = vec3(0.0);\n float totalWeight = 0.0;\n \n // Center tap\n float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n glow += original.rgb * centerMask * 2.0;\n totalWeight += 2.0;\n \n for (int i = 1; i < MAX_SAMPLES; i++) {\n if (i >= samples) break;\n \n float fi = float(i);\n float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n \n vec2 offset = dir * dist * texelSize;\n vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n float mask = smoothstep(t0, t1, dot(c, LUMA));\n \n float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n w = max(w, 0.0);\n w *= w;\n \n glow += c * mask * w;\n totalWeight += w;\n \n dir = vec2(\n dir.x * ca - dir.y * sa,\n dir.x * sa + dir.y * ca\n );\n }\n \n glow *= intensity / max(totalWeight, 0.001);\n \n if (u_int1 > 0) {\n glow *= hexToRgb(u_int1);\n }\n \n vec3 result = blend(original.rgb, glow, u_int0);\n result += (noise - 0.5) * (1.0 / 255.0);\n \n fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}", + "from_input" + ] + }, + { + "id": 34, + "type": "PrimitiveFloat", + "pos": [ + 2290, + -510 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "intensity", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 51 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1 + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 35, + "type": "PrimitiveFloat", + "pos": [ + 2290, + -410 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "radius", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 50 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 33, + "type": "PrimitiveFloat", + "pos": [ + 2290, + -310 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "threshold", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 52 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1 + }, + "widgets_values": [ + 100 + ] + }, + { + "id": 32, + "type": "ColorToRGBInt", + "pos": [ + 2290, + -210 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "color_tint", + "localized_name": "color", + "name": "color", + "type": "COLOR", + "widget": { + "name": "color" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "rgb_int", + "name": "rgb_int", + "type": "INT", + "links": [ + 47 + ] + } + ], + "properties": { + "Node name for S&R": "ColorToRGBInt" + }, + "widgets_values": [ + "#45edf5" + ] + }, + { + "id": 31, + "type": "CustomCombo", + "pos": [ + 2290, + -110 + ], + "size": [ + 270, + 222 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "blend_mode", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 46 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "add", + 0, + "add", + "screen", + "soft", + "overlay", + "lighten", + "" + ] + } + ], + "groups": [], + "links": [ + { + "id": 51, + "origin_id": 34, + "origin_slot": 0, + "target_id": 30, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 50, + "origin_id": 35, + "origin_slot": 0, + "target_id": 30, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 52, + "origin_id": 33, + "origin_slot": 0, + "target_id": 30, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 46, + "origin_id": 31, + "origin_slot": 1, + "target_id": 30, + "target_slot": 6, + "type": "INT" + }, + { + "id": 47, + "origin_id": 32, + "origin_slot": 0, + "target_id": 30, + "target_slot": 7, + "type": "INT" + }, + { + "id": 45, + "origin_id": -10, + "origin_slot": 0, + "target_id": 30, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 53, + "origin_id": 30, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adds a glow/bloom effect around bright image areas via GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Hue and Saturation.json b/blueprints/Hue and Saturation.json index 04846c51d..cddf0154a 100644 --- a/blueprints/Hue and Saturation.json +++ b/blueprints/Hue and Saturation.json @@ -1 +1,760 @@ -{"revision": 0, "last_node_id": 11, "last_link_id": 0, "nodes": [{"id": 11, "type": "c64f83e9-aa5d-4031-89f1-0704e39299fe", "pos": [870, -220], "size": [250, 178], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Hue and Saturation", "properties": {"proxyWidgets": [["2", "choice"], ["4", "value"], ["5", "value"], ["6", "value"], ["7", "value"], ["3", "choice"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "c64f83e9-aa5d-4031-89f1-0704e39299fe", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 10, "lastLinkId": 11, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Hue and Saturation", "inputNode": {"id": -10, "bounding": [360, -176, 120, 60]}, "outputNode": {"id": -20, "bounding": [1410, -176, 120, 60]}, "inputs": [{"id": "a5aae7ea-b511-4045-b5da-94101e269cd7", "name": "images.image0", "type": "IMAGE", "linkIds": [10], "localized_name": "images.image0", "label": "image", "pos": [460, -156]}], "outputs": [{"id": "30b72604-69b3-4944-b253-a9099bbd73a9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [8], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [1430, -156]}], "widgets": [], "nodes": [{"id": 3, "type": "CustomCombo", "pos": [540, -240], "size": [270, 150], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "color_space", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [2]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["HSL", 0, "HSL", "HSB/HSV", ""]}, {"id": 2, "type": "CustomCombo", "pos": [540, -580], "size": [270, 294], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [1]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Master", 0, "Master", "Reds", "Yellows", "Greens", "Cyans", "Blues", "Magentas", "Colorize", ""]}, {"id": 7, "type": "PrimitiveFloat", "pos": [540, 260], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "overlap", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [6]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [50]}, {"id": 6, "type": "PrimitiveFloat", "pos": [540, 160], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "brightness", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [5]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -100, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 5, "type": "PrimitiveFloat", "pos": [540, 60], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "saturation", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [4]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -100, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 4, "type": "PrimitiveFloat", "pos": [540, -40], "size": [270, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "hue", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [3]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -180, "max": 180, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 0, 0]}, {"offset": 0.16666666666666666, "color": [255, 255, 0]}, {"offset": 0.3333333333333333, "color": [0, 255, 0]}, {"offset": 0.5, "color": [0, 255, 255]}, {"offset": 0.6666666666666666, "color": [0, 0, 255]}, {"offset": 0.8333333333333334, "color": [255, 0, 255]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 1, "type": "GLSLShader", "pos": [880, -300], "size": [470, 292], "flags": {}, "order": 6, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 10}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 3}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 4}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 5}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 6}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 1}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 2}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [8]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform int u_int0; // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize\nuniform int u_int1; // Color Space: 0=HSL, 1=HSB/HSV\nuniform float u_float0; // Hue (-180 to 180)\nuniform float u_float1; // Saturation (-100 to 100)\nuniform float u_float2; // Lightness/Brightness (-100 to 100)\nuniform float u_float3; // Overlap (0 to 100) - feathering between adjacent color ranges\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\n// Color range modes\nconst int MODE_MASTER = 0;\nconst int MODE_RED = 1;\nconst int MODE_YELLOW = 2;\nconst int MODE_GREEN = 3;\nconst int MODE_CYAN = 4;\nconst int MODE_BLUE = 5;\nconst int MODE_MAGENTA = 6;\nconst int MODE_COLORIZE = 7;\n\n// Color space modes\nconst int COLORSPACE_HSL = 0;\nconst int COLORSPACE_HSB = 1;\n\nconst float EPSILON = 0.0001;\n\n//=============================================================================\n// RGB <-> HSL Conversions\n//=============================================================================\n\nvec3 rgb2hsl(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = 0.0;\n float l = (maxC + minC) * 0.5;\n\n if (delta > EPSILON) {\n s = l < 0.5\n ? delta / (maxC + minC)\n : delta / (2.0 - maxC - minC);\n\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, l);\n}\n\nfloat hue2rgb(float p, float q, float t) {\n t = fract(t);\n if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;\n if (t < 0.5) return q;\n if (t < 2.0/3.0) return p + (q - p) * (2.0/3.0 - t) * 6.0;\n return p;\n}\n\nvec3 hsl2rgb(vec3 hsl) {\n if (hsl.y < EPSILON) return vec3(hsl.z);\n\n float q = hsl.z < 0.5\n ? hsl.z * (1.0 + hsl.y)\n : hsl.z + hsl.y - hsl.z * hsl.y;\n float p = 2.0 * hsl.z - q;\n\n return vec3(\n hue2rgb(p, q, hsl.x + 1.0/3.0),\n hue2rgb(p, q, hsl.x),\n hue2rgb(p, q, hsl.x - 1.0/3.0)\n );\n}\n\nvec3 rgb2hsb(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = (maxC > EPSILON) ? delta / maxC : 0.0;\n float b = maxC;\n\n if (delta > EPSILON) {\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, b);\n}\n\nvec3 hsb2rgb(vec3 hsb) {\n vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);\n return hsb.z * mix(vec3(1.0), rgb, hsb.y);\n}\n\n//=============================================================================\n// Color Range Weight Calculation\n//=============================================================================\n\nfloat hueDistance(float a, float b) {\n float d = abs(a - b);\n return min(d, 1.0 - d);\n}\n\nfloat getHueWeight(float hue, float center, float overlap) {\n float baseWidth = 1.0 / 6.0;\n float feather = baseWidth * overlap;\n\n float d = hueDistance(hue, center);\n\n float inner = baseWidth * 0.5;\n float outer = inner + feather;\n\n return 1.0 - smoothstep(inner, outer, d);\n}\n\nfloat getModeWeight(float hue, int mode, float overlap) {\n if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;\n\n if (mode == MODE_RED) {\n return max(\n getHueWeight(hue, 0.0, overlap),\n getHueWeight(hue, 1.0, overlap)\n );\n }\n\n float center = float(mode - 1) / 6.0;\n return getHueWeight(hue, center, overlap);\n}\n\n//=============================================================================\n// Adjustment Functions\n//=============================================================================\n\nfloat adjustLightness(float l, float amount) {\n return amount > 0.0\n ? l + (1.0 - l) * amount\n : l + l * amount;\n}\n\nfloat adjustBrightness(float b, float amount) {\n return clamp(b + amount, 0.0, 1.0);\n}\n\nfloat adjustSaturation(float s, float amount) {\n return amount > 0.0\n ? s + (1.0 - s) * amount\n : s + s * amount;\n}\n\nvec3 colorize(vec3 rgb, float hue, float sat, float light) {\n float lum = dot(rgb, vec3(0.299, 0.587, 0.114));\n float l = adjustLightness(lum, light);\n\n vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));\n return hsl2rgb(hsl);\n}\n\n//=============================================================================\n// Main\n//=============================================================================\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n\n float hueShift = u_float0 / 360.0; // -180..180 -> -0.5..0.5\n float satAmount = u_float1 / 100.0; // -100..100 -> -1..1\n float lightAmount= u_float2 / 100.0; // -100..100 -> -1..1\n float overlap = u_float3 / 100.0; // 0..100 -> 0..1\n\n vec3 result;\n\n if (u_int0 == MODE_COLORIZE) {\n result = colorize(original.rgb, hueShift, satAmount, lightAmount);\n fragColor = vec4(result, original.a);\n return;\n }\n\n vec3 hsx = (u_int1 == COLORSPACE_HSL)\n ? rgb2hsl(original.rgb)\n : rgb2hsb(original.rgb);\n\n float weight = getModeWeight(hsx.x, u_int0, overlap);\n\n if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {\n weight = 0.0;\n }\n\n if (weight > EPSILON) {\n float h = fract(hsx.x + hueShift * weight);\n float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);\n float v = (u_int1 == COLORSPACE_HSL)\n ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)\n : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);\n\n vec3 adjusted = vec3(h, s, v);\n result = (u_int1 == COLORSPACE_HSL)\n ? hsl2rgb(adjusted)\n : hsb2rgb(adjusted);\n } else {\n result = original.rgb;\n }\n\n fragColor = vec4(result, original.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 3, "origin_id": 4, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 4, "origin_id": 5, "origin_slot": 0, "target_id": 1, "target_slot": 3, "type": "FLOAT"}, {"id": 5, "origin_id": 6, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "FLOAT"}, {"id": 6, "origin_id": 7, "origin_slot": 0, "target_id": 1, "target_slot": 5, "type": "FLOAT"}, {"id": 1, "origin_id": 2, "origin_slot": 1, "target_id": 1, "target_slot": 7, "type": "INT"}, {"id": 2, "origin_id": 3, "origin_slot": 1, "target_id": 1, "target_slot": 8, "type": "INT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 8, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 11, + "last_link_id": 0, + "nodes": [ + { + "id": 11, + "type": "c64f83e9-aa5d-4031-89f1-0704e39299fe", + "pos": [ + 870, + -220 + ], + "size": [ + 250, + 178 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "title": "Hue and Saturation", + "properties": { + "proxyWidgets": [ + [ + "2", + "choice" + ], + [ + "4", + "value" + ], + [ + "5", + "value" + ], + [ + "6", + "value" + ], + [ + "7", + "value" + ], + [ + "3", + "choice" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "c64f83e9-aa5d-4031-89f1-0704e39299fe", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 10, + "lastLinkId": 11, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Hue and Saturation", + "inputNode": { + "id": -10, + "bounding": [ + 360, + -176, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1410, + -176, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "a5aae7ea-b511-4045-b5da-94101e269cd7", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 10 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 460, + -156 + ] + } + ], + "outputs": [ + { + "id": "30b72604-69b3-4944-b253-a9099bbd73a9", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 8 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 1430, + -156 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 3, + "type": "CustomCombo", + "pos": [ + 540, + -240 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "color_space", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 2 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "HSL", + 0, + "HSL", + "HSB/HSV", + "" + ] + }, + { + "id": 2, + "type": "CustomCombo", + "pos": [ + 540, + -580 + ], + "size": [ + 270, + 294 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "mode", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 1 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "Master", + 0, + "Master", + "Reds", + "Yellows", + "Greens", + "Cyans", + "Blues", + "Magentas", + "Colorize", + "" + ] + }, + { + "id": 7, + "type": "PrimitiveFloat", + "pos": [ + 540, + 260 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "overlap", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 100, + "precision": 1, + "step": 1 + }, + "widgets_values": [ + 50 + ] + }, + { + "id": 6, + "type": "PrimitiveFloat", + "pos": [ + 540, + 160 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "brightness", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 5 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": -100, + "max": 100, + "precision": 1, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 5, + "type": "PrimitiveFloat", + "pos": [ + 540, + 60 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "saturation", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 4 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": -100, + "max": 100, + "precision": 1, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [ + 540, + -40 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "hue", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 3 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": -180, + "max": 180, + "precision": 1, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 255, + 0, + 0 + ] + }, + { + "offset": 0.16666666666666666, + "color": [ + 255, + 255, + 0 + ] + }, + { + "offset": 0.3333333333333333, + "color": [ + 0, + 255, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 0, + 255, + 255 + ] + }, + { + "offset": 0.6666666666666666, + "color": [ + 0, + 0, + 255 + ] + }, + { + "offset": 0.8333333333333334, + "color": [ + 255, + 0, + 255 + ] + }, + { + "offset": 1, + "color": [ + 255, + 0, + 0 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 1, + "type": "GLSLShader", + "pos": [ + 880, + -300 + ], + "size": [ + 470, + 292 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 10 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 3 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 4 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 5 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": 6 + }, + { + "label": "u_float4", + "localized_name": "floats.u_float4", + "name": "floats.u_float4", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 1 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": 2 + }, + { + "label": "u_int2", + "localized_name": "ints.u_int2", + "name": "ints.u_int2", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 8 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform int u_int0; // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize\nuniform int u_int1; // Color Space: 0=HSL, 1=HSB/HSV\nuniform float u_float0; // Hue (-180 to 180)\nuniform float u_float1; // Saturation (-100 to 100)\nuniform float u_float2; // Lightness/Brightness (-100 to 100)\nuniform float u_float3; // Overlap (0 to 100) - feathering between adjacent color ranges\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\n// Color range modes\nconst int MODE_MASTER = 0;\nconst int MODE_RED = 1;\nconst int MODE_YELLOW = 2;\nconst int MODE_GREEN = 3;\nconst int MODE_CYAN = 4;\nconst int MODE_BLUE = 5;\nconst int MODE_MAGENTA = 6;\nconst int MODE_COLORIZE = 7;\n\n// Color space modes\nconst int COLORSPACE_HSL = 0;\nconst int COLORSPACE_HSB = 1;\n\nconst float EPSILON = 0.0001;\n\n//=============================================================================\n// RGB <-> HSL Conversions\n//=============================================================================\n\nvec3 rgb2hsl(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = 0.0;\n float l = (maxC + minC) * 0.5;\n\n if (delta > EPSILON) {\n s = l < 0.5\n ? delta / (maxC + minC)\n : delta / (2.0 - maxC - minC);\n\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, l);\n}\n\nfloat hue2rgb(float p, float q, float t) {\n t = fract(t);\n if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;\n if (t < 0.5) return q;\n if (t < 2.0/3.0) return p + (q - p) * (2.0/3.0 - t) * 6.0;\n return p;\n}\n\nvec3 hsl2rgb(vec3 hsl) {\n if (hsl.y < EPSILON) return vec3(hsl.z);\n\n float q = hsl.z < 0.5\n ? hsl.z * (1.0 + hsl.y)\n : hsl.z + hsl.y - hsl.z * hsl.y;\n float p = 2.0 * hsl.z - q;\n\n return vec3(\n hue2rgb(p, q, hsl.x + 1.0/3.0),\n hue2rgb(p, q, hsl.x),\n hue2rgb(p, q, hsl.x - 1.0/3.0)\n );\n}\n\nvec3 rgb2hsb(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = (maxC > EPSILON) ? delta / maxC : 0.0;\n float b = maxC;\n\n if (delta > EPSILON) {\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, b);\n}\n\nvec3 hsb2rgb(vec3 hsb) {\n vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);\n return hsb.z * mix(vec3(1.0), rgb, hsb.y);\n}\n\n//=============================================================================\n// Color Range Weight Calculation\n//=============================================================================\n\nfloat hueDistance(float a, float b) {\n float d = abs(a - b);\n return min(d, 1.0 - d);\n}\n\nfloat getHueWeight(float hue, float center, float overlap) {\n float baseWidth = 1.0 / 6.0;\n float feather = baseWidth * overlap;\n\n float d = hueDistance(hue, center);\n\n float inner = baseWidth * 0.5;\n float outer = inner + feather;\n\n return 1.0 - smoothstep(inner, outer, d);\n}\n\nfloat getModeWeight(float hue, int mode, float overlap) {\n if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;\n\n if (mode == MODE_RED) {\n return max(\n getHueWeight(hue, 0.0, overlap),\n getHueWeight(hue, 1.0, overlap)\n );\n }\n\n float center = float(mode - 1) / 6.0;\n return getHueWeight(hue, center, overlap);\n}\n\n//=============================================================================\n// Adjustment Functions\n//=============================================================================\n\nfloat adjustLightness(float l, float amount) {\n return amount > 0.0\n ? l + (1.0 - l) * amount\n : l + l * amount;\n}\n\nfloat adjustBrightness(float b, float amount) {\n return clamp(b + amount, 0.0, 1.0);\n}\n\nfloat adjustSaturation(float s, float amount) {\n return amount > 0.0\n ? s + (1.0 - s) * amount\n : s + s * amount;\n}\n\nvec3 colorize(vec3 rgb, float hue, float sat, float light) {\n float lum = dot(rgb, vec3(0.299, 0.587, 0.114));\n float l = adjustLightness(lum, light);\n\n vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));\n return hsl2rgb(hsl);\n}\n\n//=============================================================================\n// Main\n//=============================================================================\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n\n float hueShift = u_float0 / 360.0; // -180..180 -> -0.5..0.5\n float satAmount = u_float1 / 100.0; // -100..100 -> -1..1\n float lightAmount= u_float2 / 100.0; // -100..100 -> -1..1\n float overlap = u_float3 / 100.0; // 0..100 -> 0..1\n\n vec3 result;\n\n if (u_int0 == MODE_COLORIZE) {\n result = colorize(original.rgb, hueShift, satAmount, lightAmount);\n fragColor = vec4(result, original.a);\n return;\n }\n\n vec3 hsx = (u_int1 == COLORSPACE_HSL)\n ? rgb2hsl(original.rgb)\n : rgb2hsb(original.rgb);\n\n float weight = getModeWeight(hsx.x, u_int0, overlap);\n\n if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {\n weight = 0.0;\n }\n\n if (weight > EPSILON) {\n float h = fract(hsx.x + hueShift * weight);\n float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);\n float v = (u_int1 == COLORSPACE_HSL)\n ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)\n : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);\n\n vec3 adjusted = vec3(h, s, v);\n result = (u_int1 == COLORSPACE_HSL)\n ? hsl2rgb(adjusted)\n : hsb2rgb(adjusted);\n } else {\n result = original.rgb;\n }\n\n fragColor = vec4(result, original.a);\n}\n", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 3, + "origin_id": 4, + "origin_slot": 0, + "target_id": 1, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 4, + "origin_id": 5, + "origin_slot": 0, + "target_id": 1, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 5, + "origin_id": 6, + "origin_slot": 0, + "target_id": 1, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 6, + "origin_id": 7, + "origin_slot": 0, + "target_id": 1, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 1, + "origin_id": 2, + "origin_slot": 1, + "target_id": 1, + "target_slot": 7, + "type": "INT" + }, + { + "id": 2, + "origin_id": 3, + "origin_slot": 1, + "target_id": 1, + "target_slot": 8, + "type": "INT" + }, + { + "id": 10, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 8, + "origin_id": 1, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adjusts hue, saturation, and lightness of an image using a real-time GPU fragment shader." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Image Blur.json b/blueprints/Image Blur.json index 4b9e74255..0ca8d9931 100644 --- a/blueprints/Image Blur.json +++ b/blueprints/Image Blur.json @@ -1 +1,382 @@ -{"revision": 0, "last_node_id": 8, "last_link_id": 0, "nodes": [{"id": 8, "type": "198632a3-ee76-4aab-9ce7-a69c624eaff9", "pos": [4470, -1840], "size": [210, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "blurred_image", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["12", "choice"], ["10", "value"]]}, "widgets_values": [], "title": "Image Blur"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "198632a3-ee76-4aab-9ce7-a69c624eaff9", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 12, "lastLinkId": 11, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Blur", "inputNode": {"id": -10, "bounding": [3540, -2445, 120, 60]}, "outputNode": {"id": -20, "bounding": [4620, -2445, 121.11666870117188, 60]}, "inputs": [{"id": "7ff2a402-6b11-45e8-a92a-7158d216520a", "name": "images.image0", "type": "IMAGE", "linkIds": [9], "localized_name": "images.image0", "label": "image", "pos": [3640, -2425]}], "outputs": [{"id": "80a8e19e-ffd9-44a5-90f2-710815a5b063", "name": "IMAGE0", "type": "IMAGE", "linkIds": [3], "localized_name": "IMAGE0", "label": "blurred_image", "pos": [4640, -2425]}], "widgets": [], "nodes": [{"id": 12, "type": "CustomCombo", "pos": [3720, -2620], "size": [270, 174], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "blur_type", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [11]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Gaussian", 0, "Gaussian", "Box", "Radial", ""]}, {"id": 10, "type": "PrimitiveFloat", "pos": [4020, -2780], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [10]}], "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": 0}, "widgets_values": [20]}, {"id": 1, "type": "GLSLShader", "pos": [4020, -2670], "size": [430, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 9}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 10}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 11}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [3]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0; // Blur radius/amount\nuniform int u_pass; // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / u_resolution;\n float radius = max(u_float0, 0.0);\n\n // Radial (angular) blur - single pass, doesn't use separable\n if (u_int0 == BLUR_RADIAL) {\n // Only execute on first pass\n if (u_pass > 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec2 center = vec2(0.5);\n vec2 dir = v_texCoord - center;\n float dist = length(dir);\n\n if (dist < 1e-4) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec4 sum = vec4(0.0);\n float totalWeight = 0.0;\n float angleStep = radius * RADIAL_STRENGTH;\n\n dir /= dist;\n\n float cosStep = cos(angleStep);\n float sinStep = sin(angleStep);\n\n float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n vec2 rotDir = vec2(\n dir.x * cos(negAngle) - dir.y * sin(negAngle),\n dir.x * sin(negAngle) + dir.y * cos(negAngle)\n );\n\n for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n vec2 uv = center + rotDir * dist;\n float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n sum += texture(u_image0, uv) * w;\n totalWeight += w;\n\n rotDir = vec2(\n rotDir.x * cosStep - rotDir.y * sinStep,\n rotDir.x * sinStep + rotDir.y * cosStep\n );\n }\n\n fragColor0 = sum / max(totalWeight, 0.001);\n return;\n }\n\n // Separable Gaussian / Box blur\n int samples = int(ceil(radius));\n\n if (samples == 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n // Direction: pass 0 = horizontal, pass 1 = vertical\n vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n vec4 color = vec4(0.0);\n float totalWeight = 0.0;\n float sigma = radius / 2.0;\n\n for (int i = -samples; i <= samples; i++) {\n vec2 offset = dir * float(i) * texelSize;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float weight;\n if (u_int0 == BLUR_GAUSSIAN) {\n weight = gaussian(float(i), sigma);\n } else {\n // BLUR_BOX\n weight = 1.0;\n }\n\n color += sample_color * weight;\n totalWeight += weight;\n }\n\n fragColor0 = color / totalWeight;\n}\n", "from_input"]}], "groups": [], "links": [{"id": 10, "origin_id": 10, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 11, "origin_id": 12, "origin_slot": 1, "target_id": 1, "target_slot": 4, "type": "INT"}, {"id": 9, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 3, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Blur"}]}} +{ + "revision": 0, + "last_node_id": 8, + "last_link_id": 0, + "nodes": [ + { + "id": 8, + "type": "198632a3-ee76-4aab-9ce7-a69c624eaff9", + "pos": [ + 4470, + -1840 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "blurred_image", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "12", + "choice" + ], + [ + "10", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Image Blur" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "198632a3-ee76-4aab-9ce7-a69c624eaff9", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 12, + "lastLinkId": 11, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Blur", + "inputNode": { + "id": -10, + "bounding": [ + 3540, + -2445, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4620, + -2445, + 121.11666870117188, + 60 + ] + }, + "inputs": [ + { + "id": "7ff2a402-6b11-45e8-a92a-7158d216520a", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 9 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 3640, + -2425 + ] + } + ], + "outputs": [ + { + "id": "80a8e19e-ffd9-44a5-90f2-710815a5b063", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 3 + ], + "localized_name": "IMAGE0", + "label": "blurred_image", + "pos": [ + 4640, + -2425 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 12, + "type": "CustomCombo", + "pos": [ + 3720, + -2620 + ], + "size": [ + 270, + 174 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "blur_type", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 11 + ] + } + ], + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "Gaussian", + 0, + "Gaussian", + "Box", + "Radial", + "" + ] + }, + { + "id": 10, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -2780 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "strength", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 10 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 100, + "min": 0 + }, + "widgets_values": [ + 20 + ] + }, + { + "id": 1, + "type": "GLSLShader", + "pos": [ + 4020, + -2670 + ], + "size": [ + 430, + 212 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 9 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 10 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 11 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 3 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": [] + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": [] + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform int u_int0; // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0; // Blur radius/amount\nuniform int u_pass; // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n float radius = max(u_float0, 0.0);\n\n // Radial (angular) blur - single pass, doesn't use separable\n if (u_int0 == BLUR_RADIAL) {\n // Only execute on first pass\n if (u_pass > 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec2 center = vec2(0.5);\n vec2 dir = v_texCoord - center;\n float dist = length(dir);\n\n if (dist < 1e-4) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec4 sum = vec4(0.0);\n float totalWeight = 0.0;\n float angleStep = radius * RADIAL_STRENGTH;\n\n dir /= dist;\n\n float cosStep = cos(angleStep);\n float sinStep = sin(angleStep);\n\n float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n vec2 rotDir = vec2(\n dir.x * cos(negAngle) - dir.y * sin(negAngle),\n dir.x * sin(negAngle) + dir.y * cos(negAngle)\n );\n\n for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n vec2 uv = center + rotDir * dist;\n float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n sum += texture(u_image0, uv) * w;\n totalWeight += w;\n\n rotDir = vec2(\n rotDir.x * cosStep - rotDir.y * sinStep,\n rotDir.x * sinStep + rotDir.y * cosStep\n );\n }\n\n fragColor0 = sum / max(totalWeight, 0.001);\n return;\n }\n\n // Separable Gaussian / Box blur\n int samples = int(ceil(radius));\n\n if (samples == 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n // Direction: pass 0 = horizontal, pass 1 = vertical\n vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n vec4 color = vec4(0.0);\n float totalWeight = 0.0;\n float sigma = radius / 2.0;\n\n for (int i = -samples; i <= samples; i++) {\n vec2 offset = dir * float(i) * texelSize;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float weight;\n if (u_int0 == BLUR_GAUSSIAN) {\n weight = gaussian(float(i), sigma);\n } else {\n // BLUR_BOX\n weight = 1.0;\n }\n\n color += sample_color * weight;\n totalWeight += weight;\n }\n\n fragColor0 = color / totalWeight;\n}\n", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 10, + "origin_id": 10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 11, + "origin_id": 12, + "origin_slot": 1, + "target_id": 1, + "target_slot": 4, + "type": "INT" + }, + { + "id": 9, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 3, + "origin_id": 1, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Blur", + "description": "Applies Gaussian, Box, or Radial blur to soften images and create stylized depth or motion effects." + } + ] + } +} diff --git a/blueprints/Image Captioning (gemini).json b/blueprints/Image Captioning (gemini).json index 89ebac802..2fc5d6746 100644 --- a/blueprints/Image Captioning (gemini).json +++ b/blueprints/Image Captioning (gemini).json @@ -1 +1,318 @@ -{"revision": 0, "last_node_id": 231, "last_link_id": 0, "nodes": [{"id": 231, "type": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", "pos": [23.13283014087665, 1034.468391137315], "size": [280, 260], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": null}, {"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": []}], "properties": {"proxyWidgets": [["-1", "prompt"], ["-1", "model"], ["1", "seed"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["Describe this image", "gemini-2.5-pro"], "title": "Image Captioning(Gemini)"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 16, "lastLinkId": 16, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Captioning(Gemini)", "inputNode": {"id": -10, "bounding": [-6870, 2530, 120, 100]}, "outputNode": {"id": -20, "bounding": [-6240, 2530, 120, 60]}, "inputs": [{"id": "97cb8fa5-0514-4e05-b206-46fa6d7b5589", "name": "images", "type": "IMAGE", "linkIds": [1], "localized_name": "images", "shape": 7, "pos": [-6770, 2550]}, {"id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", "name": "prompt", "type": "STRING", "linkIds": [15], "pos": [-6770, 2570]}, {"id": "b034e26a-d114-4604-aec2-32783e86aa6b", "name": "model", "type": "COMBO", "linkIds": [16], "pos": [-6770, 2590]}], "outputs": [{"id": "e12c6e80-5210-4328-a581-bc8924c53070", "name": "STRING", "type": "STRING", "linkIds": [6], "localized_name": "STRING", "pos": [-6220, 2550]}], "widgets": [], "nodes": [{"id": 1, "type": "GeminiNode", "pos": [-6690, 2360], "size": [390, 430], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 1}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 15}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": 16}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["Describe this image", "gemini-2.5-pro", 511865409297955, "randomize", "- Role: AI Image Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze images and generate detailed descriptions which can be used as drawing prompts to create similar images. This is essential for tasks like content creation, design inspiration, and artistic exploration.\n- Profile: As an AI Image Analysis and Description Specialist, you possess extensive knowledge in computer vision, image processing, and natural language generation. You are adept at interpreting visual data and translating it into descriptive text that can guide the creation of new images.\n- Skills: Proficiency in image recognition, feature extraction, descriptive language generation, and understanding of artistic elements such as composition, color, and texture.\n- Goals: To analyze the provided image, generate a comprehensive and detailed description that captures the key visual elements, and ensure this description can effectively serve as a drawing prompt for creating similar images.\n- Constrains: The description must be clear, concise, and specific enough to guide the creation of a similar image. It should avoid ambiguity and focus on the most salient features of the image. The output should only contain the drawing prompt.\n- OutputFormat: A detailed text description of the image, highlighting key visual elements such as objects, colors, composition, and any unique features.\n- Workflow:\n 1. Analyze the image to identify key visual elements including objects, colors, and composition.\n 2. Generate a detailed description that captures the essence of the image, ensuring it is specific and actionable.\n 3. Refine the description to ensure clarity and conciseness, making it suitable for use as a drawing prompt."], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 1, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 6, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "*"}, {"id": 15, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 4, "type": "STRING"}, {"id": 16, "origin_id": -10, "origin_slot": 2, "target_id": 1, "target_slot": 5, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Image Captioning"}]}} +{ + "revision": 0, + "last_node_id": 231, + "last_link_id": 0, + "nodes": [ + { + "id": 231, + "type": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", + "pos": [ + 23.13283014087665, + 1034.468391137315 + ], + "size": [ + 280, + 260 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": null + }, + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "name": "model", + "type": "COMBO", + "widget": { + "name": "model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "prompt" + ], + [ + "-1", + "model" + ], + [ + "1", + "seed" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + "Describe this image", + "gemini-2.5-pro" + ], + "title": "Image Captioning(Gemini)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", + "version": 1, + "state": { + "lastGroupId": 1, + "lastNodeId": 16, + "lastLinkId": 16, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Captioning(Gemini)", + "inputNode": { + "id": -10, + "bounding": [ + -6870, + 2530, + 120, + 100 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -6240, + 2530, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "97cb8fa5-0514-4e05-b206-46fa6d7b5589", + "name": "images", + "type": "IMAGE", + "linkIds": [ + 1 + ], + "localized_name": "images", + "shape": 7, + "pos": [ + -6770, + 2550 + ] + }, + { + "id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 15 + ], + "pos": [ + -6770, + 2570 + ] + }, + { + "id": "b034e26a-d114-4604-aec2-32783e86aa6b", + "name": "model", + "type": "COMBO", + "linkIds": [ + 16 + ], + "pos": [ + -6770, + 2590 + ] + } + ], + "outputs": [ + { + "id": "e12c6e80-5210-4328-a581-bc8924c53070", + "name": "STRING", + "type": "STRING", + "linkIds": [ + 6 + ], + "localized_name": "STRING", + "pos": [ + -6220, + 2550 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "GeminiNode", + "pos": [ + -6690, + 2360 + ], + "size": [ + 390, + 430 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "shape": 7, + "type": "IMAGE", + "link": 1 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "localized_name": "video", + "name": "video", + "shape": 7, + "type": "VIDEO", + "link": null + }, + { + "localized_name": "files", + "name": "files", + "shape": 7, + "type": "GEMINI_INPUT_FILES", + "link": null + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 15 + }, + { + "localized_name": "model", + "name": "model", + "type": "COMBO", + "widget": { + "name": "model" + }, + "link": 16 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "system_prompt", + "name": "system_prompt", + "shape": 7, + "type": "STRING", + "widget": { + "name": "system_prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 6 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "GeminiNode" + }, + "widgets_values": [ + "Describe this image", + "gemini-2.5-pro", + 511865409297955, + "randomize", + "- Role: AI Image Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze images and generate detailed descriptions which can be used as drawing prompts to create similar images. This is essential for tasks like content creation, design inspiration, and artistic exploration.\n- Profile: As an AI Image Analysis and Description Specialist, you possess extensive knowledge in computer vision, image processing, and natural language generation. You are adept at interpreting visual data and translating it into descriptive text that can guide the creation of new images.\n- Skills: Proficiency in image recognition, feature extraction, descriptive language generation, and understanding of artistic elements such as composition, color, and texture.\n- Goals: To analyze the provided image, generate a comprehensive and detailed description that captures the key visual elements, and ensure this description can effectively serve as a drawing prompt for creating similar images.\n- Constrains: The description must be clear, concise, and specific enough to guide the creation of a similar image. It should avoid ambiguity and focus on the most salient features of the image. The output should only contain the drawing prompt.\n- OutputFormat: A detailed text description of the image, highlighting key visual elements such as objects, colors, composition, and any unique features.\n- Workflow:\n 1. Analyze the image to identify key visual elements including objects, colors, and composition.\n 2. Generate a detailed description that captures the essence of the image, ensuring it is specific and actionable.\n 3. Refine the description to ensure clarity and conciseness, making it suitable for use as a drawing prompt." + ], + "color": "#432", + "bgcolor": "#653" + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 6, + "origin_id": 1, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "*" + }, + { + "id": 15, + "origin_id": -10, + "origin_slot": 1, + "target_id": 1, + "target_slot": 4, + "type": "STRING" + }, + { + "id": 16, + "origin_id": -10, + "origin_slot": 2, + "target_id": 1, + "target_slot": 5, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Text generation/Image Captioning", + "description": "Generates descriptive captions for images using Google's Gemini multimodal LLM." + } + ] + } +} diff --git a/blueprints/Image Channels.json b/blueprints/Image Channels.json index cb3488883..b6fdff5be 100644 --- a/blueprints/Image Channels.json +++ b/blueprints/Image Channels.json @@ -1 +1,323 @@ -{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} +{ + "revision": 0, + "last_node_id": 29, + "last_link_id": 0, + "nodes": [ + { + "id": 29, + "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", + "pos": [ + 1970, + -230 + ], + "size": [ + 180, + 86 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "R", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + }, + { + "label": "G", + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": [] + }, + { + "label": "B", + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": [] + }, + { + "label": "A", + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": [] + } + ], + "title": "Image Channels", + "properties": { + "proxyWidgets": [] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 28, + "lastLinkId": 39, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Channels", + "inputNode": { + "id": -10, + "bounding": [ + 1820, + -185, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2460, + -215, + 120, + 120 + ] + }, + "inputs": [ + { + "id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 39 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 1920, + -165 + ] + } + ], + "outputs": [ + { + "id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 26 + ], + "localized_name": "IMAGE0", + "label": "R", + "pos": [ + 2480, + -195 + ] + }, + { + "id": "fb44a77e-0522-43e9-9527-82e7465b3596", + "name": "IMAGE1", + "type": "IMAGE", + "linkIds": [ + 27 + ], + "localized_name": "IMAGE1", + "label": "G", + "pos": [ + 2480, + -175 + ] + }, + { + "id": "81460ee6-0131-402a-874f-6bf3001fc4ff", + "name": "IMAGE2", + "type": "IMAGE", + "linkIds": [ + 28 + ], + "localized_name": "IMAGE2", + "label": "B", + "pos": [ + 2480, + -155 + ] + }, + { + "id": "ae690246-80d4-4951-b1d9-9306d8a77417", + "name": "IMAGE3", + "type": "IMAGE", + "linkIds": [ + 29 + ], + "localized_name": "IMAGE3", + "label": "A", + "pos": [ + 2480, + -135 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 23, + "type": "GLSLShader", + "pos": [ + 2000, + -330 + ], + "size": [ + 400, + 172 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 39 + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "R", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 26 + ] + }, + { + "label": "G", + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": [ + 27 + ] + }, + { + "label": "B", + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": [ + 28 + ] + }, + { + "label": "A", + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": [ + 29 + ] + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 39, + "origin_id": -10, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 26, + "origin_id": 23, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 27, + "origin_id": 23, + "origin_slot": 1, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 23, + "origin_slot": 2, + "target_id": -20, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 29, + "origin_id": 23, + "origin_slot": 3, + "target_id": -20, + "target_slot": 3, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Manipulates individual RGBA channels for masking, compositing, and channel effects." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Image Edit (FireRed Image Edit 1.1).json b/blueprints/Image Edit (FireRed Image Edit 1.1).json new file mode 100644 index 000000000..b82c7d18b --- /dev/null +++ b/blueprints/Image Edit (FireRed Image Edit 1.1).json @@ -0,0 +1,2149 @@ +{ + "revision": 0, + "last_node_id": 213, + "last_link_id": 0, + "nodes": [ + { + "id": 213, + "type": "e35fbbeb-d7b1-46d1-a74e-959517d0fb1a", + "pos": [ + -700, + -470 + ], + "size": [ + 500, + 0 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "image2 (optional)", + "name": "image2_1", + "type": "IMAGE", + "link": null + }, + { + "label": "image3 (optional)", + "name": "image3_1", + "type": "IMAGE", + "link": null + }, + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "208", + "prompt" + ], + [ + "207", + "value" + ], + [ + "210", + "seed" + ], + [ + "205", + "unet_name" + ], + [ + "203", + "clip_name" + ], + [ + "202", + "vae_name" + ], + [ + "204", + "lora_name" + ], + [ + "210", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Image Edit (FireRed Image Edit 1.1)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "e35fbbeb-d7b1-46d1-a74e-959517d0fb1a", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 213, + "lastLinkId": 378, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (FireRed Image Edit 1.1)", + "inputNode": { + "id": -10, + "bounding": [ + -1670, + -1370, + 151.744140625, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1860, + -1340, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "1d810e30-f1fb-4d10-95f8-3c5f7db2c8b7", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 371 + ], + "localized_name": "image", + "pos": [ + -1538.255859375, + -1350 + ] + }, + { + "id": "a8decf32-2262-4cdd-9e6b-c0ca7d4cdebe", + "name": "image2_1", + "type": "IMAGE", + "linkIds": [ + 355, + 356 + ], + "label": "image2 (optional)", + "pos": [ + -1538.255859375, + -1330 + ] + }, + { + "id": "3ff7a4ed-8e3d-45d4-b1d8-40ed88a6def6", + "name": "image3_1", + "type": "IMAGE", + "linkIds": [ + 357, + 358 + ], + "label": "image3 (optional)", + "pos": [ + -1538.255859375, + -1310 + ] + }, + { + "id": "01d9e68c-c664-4584-9cde-66f60e54eb3c", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 359 + ], + "pos": [ + -1538.255859375, + -1290 + ] + }, + { + "id": "97d24b10-6540-48c4-81eb-a432832f5729", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 364 + ], + "label": "enable_turbo_mode", + "pos": [ + -1538.255859375, + -1270 + ] + }, + { + "id": "15890efb-ba15-41cd-91ef-5adad7a52167", + "name": "seed", + "type": "INT", + "linkIds": [ + 372 + ], + "pos": [ + -1538.255859375, + -1250 + ] + }, + { + "id": "43f22fe2-6836-4f75-8146-04c84fbba75d", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 373 + ], + "pos": [ + -1538.255859375, + -1230 + ] + }, + { + "id": "cd5e4502-2aca-4645-9e2e-ca8719f05bf6", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 374 + ], + "pos": [ + -1538.255859375, + -1210 + ] + }, + { + "id": "f6ae73dc-39e8-44b2-958d-705ae159ea86", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 375 + ], + "pos": [ + -1538.255859375, + -1190 + ] + }, + { + "id": "66dc179d-e6c9-4485-a2db-a47d25b44363", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 376 + ], + "pos": [ + -1538.255859375, + -1170 + ] + } + ], + "outputs": [ + { + "id": "712c5c76-8620-44e1-9c9d-0798b6cdb77a", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 292 + ], + "localized_name": "IMAGE", + "pos": [ + 1880, + -1320 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 193, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1010, + -1680 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 326 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 294 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingAuraFlow", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3.1 + ] + }, + { + "id": 194, + "type": "ComfySwitchNode", + "pos": [ + 680, + -1690 + ], + "size": [ + 260, + 140 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 324 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 325 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 323 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 326 + ] + } + ], + "title": "Switch (Model)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 195, + "type": "PrimitiveInt", + "pos": [ + 190, + -1680 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 329 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 40, + "fixed" + ] + }, + { + "id": 196, + "type": "CFGNorm", + "pos": [ + 1010, + -1510 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 294 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "patched_model", + "name": "patched_model", + "type": "MODEL", + "links": [ + 295 + ] + } + ], + "properties": { + "Node name for S&R": "CFGNorm", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 197, + "type": "ComfySwitchNode", + "pos": [ + 680, + -1250 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 333 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 334 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 336 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 335 + ] + } + ], + "title": "Switch (CFG)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 198, + "type": "PrimitiveInt", + "pos": [ + 190, + -1060 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 337 + ] + } + ], + "title": "Float (Steps)", + "properties": { + "Node name for S&R": "PrimitiveInt", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 8, + "fixed" + ] + }, + { + "id": 199, + "type": "PrimitiveFloat", + "pos": [ + 190, + -1500 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 333 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 4 + ] + }, + { + "id": 200, + "type": "PrimitiveFloat", + "pos": [ + 190, + -1230 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 334 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 201, + "type": "ComfySwitchNode", + "pos": [ + 680, + -1470 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 329 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 337 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 330 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 345 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 202, + "type": "VAELoader", + "pos": [ + -960, + -1100 + ], + "size": [ + 400, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 375 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 298, + 299, + 300, + 314 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.0-ComfyUI/resolve/main/qwen_image_vae.safetensors", + "directory": "vae" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 203, + "type": "CLIPLoader", + "pos": [ + -960, + -1400 + ], + "size": [ + 400, + 150 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 374 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 296, + 297 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 204, + "type": "LoraLoaderModelOnly", + "pos": [ + 100, + -900 + ], + "size": [ + 400, + 140 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 316 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 376 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 325 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "models": [ + { + "name": "FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors", + "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.0-ComfyUI/resolve/main/FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors", + "directory": "loras" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors", + 1 + ] + }, + { + "id": 205, + "type": "UNETLoader", + "pos": [ + -960, + -1670 + ], + "size": [ + 400, + 110 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 373 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 316, + 324 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "models": [ + { + "name": "FireRed-Image-Edit-1.1-transformer.safetensors", + "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1-ComfyUI/resolve/main/FireRed-Image-Edit-1.1-transformer.safetensors", + "directory": "diffusion_models" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "FireRed-Image-Edit-1.1-transformer.safetensors", + "default" + ] + }, + { + "id": 206, + "type": "VAEEncode", + "pos": [ + -390, + -810 + ], + "size": [ + 390, + 100 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 368 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 300 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 303 + ] + } + ], + "properties": { + "Node name for S&R": "VAEEncode", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 207, + "type": "PrimitiveBoolean", + "pos": [ + 160, + -650 + ], + "size": [ + 400, + 100 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 364 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 323, + 330, + 336 + ] + } + ], + "title": "Enable Lightning LoRA?", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 208, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + -480, + -1690 + ], + "size": [ + 470, + 370 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 296 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 298 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 369 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 355 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 357 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 359 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 312 + ] + } + ], + "title": "TextEncodeQwenImageEditPlus (Positive)", + "properties": { + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 209, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + -470, + -1240 + ], + "size": [ + 460, + 290 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 297 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 299 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 370 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 356 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 358 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 313 + ] + } + ], + "properties": { + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 210, + "type": "KSampler", + "pos": [ + 1010, + -1340 + ], + "size": [ + 270, + 480 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 295 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 312 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 313 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 303 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 372 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 345 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 335 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 273 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 43, + "fixed", + 40, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 211, + "type": "VAEDecode", + "pos": [ + 1440, + -1340 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 273 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 314 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 292 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 212, + "type": "ResizeImageMaskNode", + "pos": [ + -900, + -810 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 371 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "resize_type.megapixels", + "name": "resize_type.megapixels", + "type": "FLOAT", + "widget": { + "name": "resize_type.megapixels" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 368, + 369, + 370 + ] + } + ], + "properties": { + "Node name for S&R": "ResizeImageMaskNode", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "scale total pixels", + 1, + "lanczos" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -990, + -1770, + 460, + 870 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Prompt", + "bounding": [ + -500, + -1770, + 510, + 870 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Original", + "bounding": [ + 40, + -1770, + 530, + 410 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 8, + "title": "Lightning LoRA", + "bounding": [ + 40, + -1330, + 560, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 326, + "origin_id": 194, + "origin_slot": 0, + "target_id": 193, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 324, + "origin_id": 205, + "origin_slot": 0, + "target_id": 194, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 325, + "origin_id": 204, + "origin_slot": 0, + "target_id": 194, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 323, + "origin_id": 207, + "origin_slot": 0, + "target_id": 194, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 294, + "origin_id": 193, + "origin_slot": 0, + "target_id": 196, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 333, + "origin_id": 199, + "origin_slot": 0, + "target_id": 197, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 334, + "origin_id": 200, + "origin_slot": 0, + "target_id": 197, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 336, + "origin_id": 207, + "origin_slot": 0, + "target_id": 197, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 329, + "origin_id": 195, + "origin_slot": 0, + "target_id": 201, + "target_slot": 0, + "type": "INT" + }, + { + "id": 337, + "origin_id": 198, + "origin_slot": 0, + "target_id": 201, + "target_slot": 1, + "type": "INT" + }, + { + "id": 330, + "origin_id": 207, + "origin_slot": 0, + "target_id": 201, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 297, + "origin_id": 203, + "origin_slot": 0, + "target_id": 209, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 299, + "origin_id": 202, + "origin_slot": 0, + "target_id": 209, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 316, + "origin_id": 205, + "origin_slot": 0, + "target_id": 204, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 296, + "origin_id": 203, + "origin_slot": 0, + "target_id": 208, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 298, + "origin_id": 202, + "origin_slot": 0, + "target_id": 208, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 300, + "origin_id": 202, + "origin_slot": 0, + "target_id": 206, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 295, + "origin_id": 196, + "origin_slot": 0, + "target_id": 210, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 312, + "origin_id": 208, + "origin_slot": 0, + "target_id": 210, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 313, + "origin_id": 209, + "origin_slot": 0, + "target_id": 210, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 303, + "origin_id": 206, + "origin_slot": 0, + "target_id": 210, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 345, + "origin_id": 201, + "origin_slot": 0, + "target_id": 210, + "target_slot": 5, + "type": "INT" + }, + { + "id": 335, + "origin_id": 197, + "origin_slot": 0, + "target_id": 210, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 273, + "origin_id": 210, + "origin_slot": 0, + "target_id": 211, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 314, + "origin_id": 202, + "origin_slot": 0, + "target_id": 211, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 292, + "origin_id": 211, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 355, + "origin_id": -10, + "origin_slot": 1, + "target_id": 208, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 356, + "origin_id": -10, + "origin_slot": 1, + "target_id": 209, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 357, + "origin_id": -10, + "origin_slot": 2, + "target_id": 208, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 358, + "origin_id": -10, + "origin_slot": 2, + "target_id": 209, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 359, + "origin_id": -10, + "origin_slot": 3, + "target_id": 208, + "target_slot": 5, + "type": "STRING" + }, + { + "id": 364, + "origin_id": -10, + "origin_slot": 4, + "target_id": 207, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 368, + "origin_id": 212, + "origin_slot": 0, + "target_id": 206, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 369, + "origin_id": 212, + "origin_slot": 0, + "target_id": 208, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 370, + "origin_id": 212, + "origin_slot": 0, + "target_id": 209, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 371, + "origin_id": -10, + "origin_slot": 0, + "target_id": 212, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 372, + "origin_id": -10, + "origin_slot": 5, + "target_id": 210, + "target_slot": 4, + "type": "INT" + }, + { + "id": 373, + "origin_id": -10, + "origin_slot": 6, + "target_id": 205, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 374, + "origin_id": -10, + "origin_slot": 7, + "target_id": 203, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 375, + "origin_id": -10, + "origin_slot": 8, + "target_id": 202, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 376, + "origin_id": -10, + "origin_slot": 9, + "target_id": 204, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Edit image", + "description": "Edits images via text instructions using FireRed Image Edit 1.1, a diffusion-based instruction-following editing model." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Image Edit (Flux.2 Dev).json b/blueprints/Image Edit (Flux.2 Dev).json new file mode 100644 index 000000000..92827bf17 --- /dev/null +++ b/blueprints/Image Edit (Flux.2 Dev).json @@ -0,0 +1,2050 @@ +{ + "revision": 0, + "last_node_id": 139, + "last_link_id": 0, + "nodes": [ + { + "id": 139, + "type": "41b0c117-7470-454c-914e-b8742dc06d62", + "pos": [ + -650, + 570 + ], + "size": [ + 400, + 0 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "turbo_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "123", + "text" + ], + [ + "129", + "unet_name" + ], + [ + "124", + "clip_name" + ], + [ + "121", + "vae_name" + ], + [ + "138", + "value" + ], + [ + "128", + "lora_name" + ], + [ + "125", + "noise_seed" + ], + [ + "125", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": { + "text": true, + "value": true, + "lora_name": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Image Edit (Flux.2 Dev)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "41b0c117-7470-454c-914e-b8742dc06d62", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 139, + "lastLinkId": 194, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (Flux.2 Dev)", + "inputNode": { + "id": -10, + "bounding": [ + -1520, + 400, + 151.744140625, + 180 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1240, + 420, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "fc74acd5-30a9-410b-abb5-4a4171ba3d25", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 126, + 169 + ], + "localized_name": "pixels", + "label": "image", + "pos": [ + -1388.255859375, + 420 + ] + }, + { + "id": "3e69affa-397b-4d52-82d7-68dfcef9e761", + "name": "text", + "type": "STRING", + "linkIds": [ + 168 + ], + "label": "prompt", + "pos": [ + -1388.255859375, + 440 + ] + }, + { + "id": "2f016a8a-fb3e-4cb9-97f2-a991defe4fa2", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 177 + ], + "pos": [ + -1388.255859375, + 460 + ] + }, + { + "id": "799b9dc7-0c90-4b19-9a13-e01d896bea1f", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 178 + ], + "pos": [ + -1388.255859375, + 480 + ] + }, + { + "id": "e58a83c9-1b93-4378-9598-f24068820313", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 179 + ], + "pos": [ + -1388.255859375, + 500 + ] + }, + { + "id": "8335a4a9-0ce4-4e67-a641-1c9d7a762977", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 191 + ], + "label": "enable_turbo_mode", + "pos": [ + -1388.255859375, + 520 + ] + }, + { + "id": "890b22b4-44a7-4707-912a-ca8b4ee7b7c9", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 192 + ], + "label": "turbo_lora", + "pos": [ + -1388.255859375, + 540 + ] + } + ], + "outputs": [ + { + "id": "3eaa05d6-4960-4a7c-bf2a-8b585fbb7c9c", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 9 + ], + "localized_name": "IMAGE", + "pos": [ + 1260, + 440 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 118, + "type": "Flux2Scheduler", + "pos": [ + 540, + 430 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 188 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 170 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 172 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 132 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "Flux2Scheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 1248, + 832 + ] + }, + { + "id": 119, + "type": "BasicGuider", + "pos": [ + 530, + 120 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 185 + }, + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 166 + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 30 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "BasicGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 120, + "type": "KSamplerSelect", + "pos": [ + 530, + 270 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 19 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 121, + "type": "VAELoader", + "pos": [ + -970, + 390 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 179 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 127, + 159 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "full_encoder_small_decoder.safetensors", + "url": "https://huggingface.co/black-forest-labs/FLUX.2-small-decoder/resolve/main/full_encoder_small_decoder.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "full_encoder_small_decoder.safetensors" + ] + }, + { + "id": 122, + "type": "SamplerCustomAdvanced", + "pos": [ + 790, + -50 + ], + "size": [ + 280, + 170 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 37 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 30 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 19 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 132 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 161 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 24 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 123, + "type": "CLIPTextEncode", + "pos": [ + -630, + -50 + ], + "size": [ + 430, + 360 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 117 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 168 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 41 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 124, + "type": "CLIPLoader", + "pos": [ + -970, + 160 + ], + "size": [ + 300, + 150 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 178 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 117 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "mistral_3_small_flux2_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/text_encoders/mistral_3_small_flux2_bf16.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "mistral_3_small_flux2_bf16.safetensors", + "flux2", + "default" + ] + }, + { + "id": 125, + "type": "RandomNoise", + "pos": [ + 530, + -50 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 37 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 342971778941390, + "randomize" + ] + }, + { + "id": 126, + "type": "VAEDecode", + "pos": [ + 830, + 410 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 24 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 159 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 127, + "type": "FluxGuidance", + "pos": [ + -520, + 390 + ], + "size": [ + 320, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 41 + }, + { + "localized_name": "guidance", + "name": "guidance", + "type": "FLOAT", + "widget": { + "name": "guidance" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 144 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxGuidance", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 4 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 128, + "type": "LoraLoaderModelOnly", + "pos": [ + -150, + 200 + ], + "size": [ + 300, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 192 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 183 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "Flux_2-Turbo-LoRA_comfyui.safetensors", + "url": "https://huggingface.co/ByteZSzn/Flux.2-Turbo-ComfyUI/resolve/main/Flux_2-Turbo-LoRA_comfyui.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Flux_2-Turbo-LoRA_comfyui.safetensors", + 1 + ] + }, + { + "id": 129, + "type": "UNETLoader", + "pos": [ + -970, + -40 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 177 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 181, + 184 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "flux2_dev_fp8mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/diffusion_models/flux2_dev_fp8mixed.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "flux2_dev_fp8mixed.safetensors", + "default" + ] + }, + { + "id": 130, + "type": "ComfySwitchNode", + "pos": [ + 220, + 10 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 184 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 183 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 190 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 185 + ] + } + ], + "title": "Switch(model)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 131, + "type": "PrimitiveInt", + "pos": [ + -150, + 430 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 186 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 8, + "fixed" + ] + }, + { + "id": 132, + "type": "PrimitiveInt", + "pos": [ + -150, + -50 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 187 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 133, + "type": "ComfySwitchNode", + "pos": [ + 220, + 280 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 187 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 186 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 189 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 188 + ] + } + ], + "title": "Switch(steps)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 134, + "type": "EmptyFlux2LatentImage", + "pos": [ + 530, + 790 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 171 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 173 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 161 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptyFlux2LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1248, + 832, + 1 + ] + }, + { + "id": 135, + "type": "GetImageSize", + "pos": [ + -100, + 810 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 169 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 170, + 171 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 172, + 173 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 136, + "type": "VAEEncode", + "pos": [ + -910, + 600 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": true + }, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 126 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 127 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 125 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 137, + "type": "ReferenceLatent", + "pos": [ + -470, + 580 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 144 + }, + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 125 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 166 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ReferenceLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 138, + "type": "PrimitiveBoolean", + "pos": [ + -130, + 640 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 191 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 189, + 190 + ] + } + ], + "title": "Enable 8 steps lora", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + -980, + -120, + 320, + 640 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Custom sampler", + "bounding": [ + 520, + -120, + 590, + 740 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Image size", + "bounding": [ + 510, + 690, + 590, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Prompt", + "bounding": [ + -640, + -120, + 450, + 640 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Original", + "bounding": [ + -160, + -120, + 340, + 230 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 8, + "title": "8 Steps LoRA", + "bounding": [ + -160, + 130, + 340, + 430 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 41, + "origin_id": 123, + "origin_slot": 0, + "target_id": 127, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 144, + "origin_id": 127, + "origin_slot": 0, + "target_id": 137, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 125, + "origin_id": 136, + "origin_slot": 0, + "target_id": 137, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 37, + "origin_id": 125, + "origin_slot": 0, + "target_id": 122, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 30, + "origin_id": 119, + "origin_slot": 0, + "target_id": 122, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 19, + "origin_id": 120, + "origin_slot": 0, + "target_id": 122, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 132, + "origin_id": 118, + "origin_slot": 0, + "target_id": 122, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 161, + "origin_id": 134, + "origin_slot": 0, + "target_id": 122, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 24, + "origin_id": 122, + "origin_slot": 0, + "target_id": 126, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 159, + "origin_id": 121, + "origin_slot": 0, + "target_id": 126, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 117, + "origin_id": 124, + "origin_slot": 0, + "target_id": 123, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 127, + "origin_id": 121, + "origin_slot": 0, + "target_id": 136, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 126, + "origin_id": -10, + "origin_slot": 0, + "target_id": 136, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 9, + "origin_id": 126, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 166, + "origin_id": 137, + "origin_slot": 0, + "target_id": 119, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 168, + "origin_id": -10, + "origin_slot": 1, + "target_id": 123, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 169, + "origin_id": -10, + "origin_slot": 0, + "target_id": 135, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 170, + "origin_id": 135, + "origin_slot": 0, + "target_id": 118, + "target_slot": 1, + "type": "INT" + }, + { + "id": 171, + "origin_id": 135, + "origin_slot": 0, + "target_id": 134, + "target_slot": 0, + "type": "INT" + }, + { + "id": 172, + "origin_id": 135, + "origin_slot": 1, + "target_id": 118, + "target_slot": 2, + "type": "INT" + }, + { + "id": 173, + "origin_id": 135, + "origin_slot": 1, + "target_id": 134, + "target_slot": 1, + "type": "INT" + }, + { + "id": 177, + "origin_id": -10, + "origin_slot": 2, + "target_id": 129, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 178, + "origin_id": -10, + "origin_slot": 3, + "target_id": 124, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 179, + "origin_id": -10, + "origin_slot": 4, + "target_id": 121, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 181, + "origin_id": 129, + "origin_slot": 0, + "target_id": 128, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 183, + "origin_id": 128, + "origin_slot": 0, + "target_id": 130, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 184, + "origin_id": 129, + "origin_slot": 0, + "target_id": 130, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 185, + "origin_id": 130, + "origin_slot": 0, + "target_id": 119, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 186, + "origin_id": 131, + "origin_slot": 0, + "target_id": 133, + "target_slot": 1, + "type": "INT" + }, + { + "id": 187, + "origin_id": 132, + "origin_slot": 0, + "target_id": 133, + "target_slot": 0, + "type": "INT" + }, + { + "id": 188, + "origin_id": 133, + "origin_slot": 0, + "target_id": 118, + "target_slot": 0, + "type": "INT" + }, + { + "id": 189, + "origin_id": 138, + "origin_slot": 0, + "target_id": 133, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 190, + "origin_id": 138, + "origin_slot": 0, + "target_id": 130, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 191, + "origin_id": -10, + "origin_slot": 5, + "target_id": 138, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 192, + "origin_id": -10, + "origin_slot": 6, + "target_id": 128, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Edit image", + "description": "Edits an image from text instructions using Flux.2 [dev], with guidance, schedulers, and optional Turbo LoRAs." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Image Edit (Flux.2 Klein 4B).json b/blueprints/Image Edit (Flux.2 Klein 4B).json index c87c7e122..7f6fa7a4b 100644 --- a/blueprints/Image Edit (Flux.2 Klein 4B).json +++ b/blueprints/Image Edit (Flux.2 Klein 4B).json @@ -1 +1,1842 @@ -{"id": "6686cb78-8003-4289-b969-929755e9a84d", "revision": 0, "last_node_id": 81, "last_link_id": 179, "nodes": [{"id": 75, "type": "7b34ab90-36f9-45ba-a665-71d418f0df18", "pos": [311.66672468419983, 830], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "image", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["73", "noise_seed"], ["73", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.8.2", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", null, null, "flux-2-klein-base-4b-fp8.safetensors", "qwen_3_4b.safetensors", "flux2-vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "7b34ab90-36f9-45ba-a665-71d418f0df18", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 81, "lastLinkId": 179, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Edit (Flux.2 Klein 4B)", "inputNode": {"id": -10, "bounding": [-576.3333463986639, 559.0277780034634, 120, 140]}, "outputNode": {"id": -20, "bounding": [1373.6666536013363, 549.0277780034634, 120, 60]}, "inputs": [{"id": "7061147a-fb75-450d-8e97-c8be594a8e16", "name": "text", "type": "STRING", "linkIds": [162], "label": "prompt", "pos": [-476.33334639866393, 579.0277780034634]}, {"id": "68629112-b7b0-41ce-8912-23adad00d3db", "name": "image", "type": "IMAGE", "linkIds": [175], "pos": [-476.33334639866393, 599.0277780034634]}, {"id": "006f0b42-cb11-4484-8b7e-c34a9fb12824", "name": "unet_name", "type": "COMBO", "linkIds": [177], "pos": [-476.33334639866393, 619.0277780034634]}, {"id": "0083499c-8e83-4974-a587-ba6e89e36acc", "name": "clip_name", "type": "COMBO", "linkIds": [178], "pos": [-476.33334639866393, 639.0277780034634]}, {"id": "7c95e27c-7920-43d5-a0ac-c6570653f5da", "name": "vae_name", "type": "COMBO", "linkIds": [179], "pos": [-476.33334639866393, 659.0277780034634]}], "outputs": [{"id": "c5e7966d-07ed-4c9a-ad89-9d378a41ea7b", "name": "IMAGE", "type": "IMAGE", "linkIds": [153], "localized_name": "IMAGE", "pos": [1393.6666536013363, 569.0277780034634]}], "widgets": [], "nodes": [{"id": 61, "type": "KSamplerSelect", "pos": [560, 460], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [144]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 62, "type": "Flux2Scheduler", "pos": [560, 560], "size": [270, 106], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 171}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 173}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "Flux2Scheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 1024, 1024]}, {"id": 63, "type": "CFGGuider", "pos": [560, 320], "size": [270, 98], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 139}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 167}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 168}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [143]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [5]}, {"id": 65, "type": "VAEDecode", "pos": [1093.6666007601261, 154.02777277882814], "size": [220, 46], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 147}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 148}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [153]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 70, "type": "UNETLoader", "pos": [-386.3333318901398, 203.8611174586574], "size": [364.42708333333337, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 177}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [139]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "UNETLoader", "models": [{"name": "flux-2-klein-base-4b-fp8.safetensors", "url": "https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4b-fp8/resolve/main/flux-2-klein-base-4b-fp8.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["flux-2-klein-base-4b-fp8.safetensors", "default"]}, {"id": 71, "type": "CLIPLoader", "pos": [-386.3333318901398, 353.8611341117752], "size": [364.42708333333337, 106], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 178}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [151, 152]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "flux2", "default"]}, {"id": 74, "type": "CLIPTextEncode", "pos": [43.666666014853874, 204.02777159555063], "size": [430, 230], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 151}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 162}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [165]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 67, "type": "CLIPTextEncode", "pos": [43.666666014853874, 534.0277718670993], "size": [430, 88], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 152}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [166]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 72, "type": "VAELoader", "pos": [-386.3333318901398, 523.8611624133522], "size": [364.42708333333337, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 179}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [148, 176]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAELoader", "models": [{"name": "flux2-vae.safetensors", "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/vae/flux2-vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["flux2-vae.safetensors"]}, {"id": 66, "type": "EmptyFlux2LatentImage", "pos": [570, 740], "size": [270, 106], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 172}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 174}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [146]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "EmptyFlux2LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 80, "type": "ImageScaleToTotalPixels", "pos": [-391.6666683297289, 715.194415255584], "size": [270, 106], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 175}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [169, 170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ImageScaleToTotalPixels", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["nearest-exact", 1, 1]}, {"id": 79, "type": "6007e698-2ebd-4917-84d8-299b35d7b7ab", "pos": [238.33332484215495, 835.1944447404384], "size": [240, 86], "flags": {}, "order": 12, "mode": 0, "inputs": [{"label": "positive", "name": "conditioning", "type": "CONDITIONING", "link": 165}, {"label": "negative", "name": "conditioning_1", "type": "CONDITIONING", "link": 166}, {"name": "pixels", "type": "IMAGE", "link": 169}, {"name": "vae", "type": "VAE", "link": 176}], "outputs": [{"label": "positive", "name": "CONDITIONING", "type": "CONDITIONING", "links": [167]}, {"label": "negative", "name": "CONDITIONING_1", "type": "CONDITIONING", "links": [168]}], "properties": {"proxyWidgets": [], "cnr_id": "comfy-core", "ver": "0.8.2", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 81, "type": "GetImageSize", "pos": [310, 720], "size": [187.5, 66], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 170}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [171, 172]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [173, 174]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 64, "type": "SamplerCustomAdvanced", "pos": [860, 220], "size": [212.3638671875, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 142}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 143}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 144}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 145}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 146}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [147]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 73, "type": "RandomNoise", "pos": [560, 200], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [142]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}], "groups": [{"id": 1, "title": "Models", "bounding": [-390, 120, 380, 550], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [30, 120, 470, 550], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Sampler", "bounding": [540, 120, 532.3638671875, 550], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 139, "origin_id": 70, "origin_slot": 0, "target_id": 63, "target_slot": 0, "type": "MODEL"}, {"id": 142, "origin_id": 73, "origin_slot": 0, "target_id": 64, "target_slot": 0, "type": "NOISE"}, {"id": 143, "origin_id": 63, "origin_slot": 0, "target_id": 64, "target_slot": 1, "type": "GUIDER"}, {"id": 144, "origin_id": 61, "origin_slot": 0, "target_id": 64, "target_slot": 2, "type": "SAMPLER"}, {"id": 145, "origin_id": 62, "origin_slot": 0, "target_id": 64, "target_slot": 3, "type": "SIGMAS"}, {"id": 146, "origin_id": 66, "origin_slot": 0, "target_id": 64, "target_slot": 4, "type": "LATENT"}, {"id": 147, "origin_id": 64, "origin_slot": 0, "target_id": 65, "target_slot": 0, "type": "LATENT"}, {"id": 148, "origin_id": 72, "origin_slot": 0, "target_id": 65, "target_slot": 1, "type": "VAE"}, {"id": 152, "origin_id": 71, "origin_slot": 0, "target_id": 67, "target_slot": 0, "type": "CLIP"}, {"id": 151, "origin_id": 71, "origin_slot": 0, "target_id": 74, "target_slot": 0, "type": "CLIP"}, {"id": 153, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 162, "origin_id": -10, "origin_slot": 0, "target_id": 74, "target_slot": 1, "type": "STRING"}, {"id": 165, "origin_id": 74, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "CONDITIONING"}, {"id": 166, "origin_id": 67, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "CONDITIONING"}, {"id": 167, "origin_id": 79, "origin_slot": 0, "target_id": 63, "target_slot": 1, "type": "CONDITIONING"}, {"id": 168, "origin_id": 79, "origin_slot": 1, "target_id": 63, "target_slot": 2, "type": "CONDITIONING"}, {"id": 169, "origin_id": 80, "origin_slot": 0, "target_id": 79, "target_slot": 2, "type": "IMAGE"}, {"id": 170, "origin_id": 80, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "IMAGE"}, {"id": 171, "origin_id": 81, "origin_slot": 0, "target_id": 62, "target_slot": 1, "type": "INT"}, {"id": 172, "origin_id": 81, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "INT"}, {"id": 173, "origin_id": 81, "origin_slot": 1, "target_id": 62, "target_slot": 2, "type": "INT"}, {"id": 174, "origin_id": 81, "origin_slot": 1, "target_id": 66, "target_slot": 1, "type": "INT"}, {"id": 175, "origin_id": -10, "origin_slot": 1, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 176, "origin_id": 72, "origin_slot": 0, "target_id": 79, "target_slot": 3, "type": "VAE"}, {"id": 177, "origin_id": -10, "origin_slot": 2, "target_id": 70, "target_slot": 0, "type": "COMBO"}, {"id": 178, "origin_id": -10, "origin_slot": 3, "target_id": 71, "target_slot": 0, "type": "COMBO"}, {"id": 179, "origin_id": -10, "origin_slot": 4, "target_id": 72, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Edit image"}, {"id": "6007e698-2ebd-4917-84d8-299b35d7b7ab", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 81, "lastLinkId": 179, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Reference Conditioning", "inputNode": {"id": -10, "bounding": [-270, 990, 120, 120]}, "outputNode": {"id": -20, "bounding": [580, 970, 120, 80]}, "inputs": [{"id": "5c9a0f5e-8cee-4947-90bc-330de782043a", "name": "conditioning", "type": "CONDITIONING", "linkIds": [165], "label": "positive", "pos": [-170, 1010]}, {"id": "61826d46-4c21-4ad6-801c-3e3fa94115e2", "name": "conditioning_1", "type": "CONDITIONING", "linkIds": [166], "label": "negative", "pos": [-170, 1030]}, {"id": "345bf085-5939-47ff-9767-8f8f239a719c", "name": "pixels", "type": "IMAGE", "linkIds": [167], "pos": [-170, 1050]}, {"id": "f4594e34-e2f5-4f1e-b1fa-a1dc2aeb0a90", "name": "vae", "type": "VAE", "linkIds": [168], "pos": [-170, 1070]}], "outputs": [{"id": "b3357c0e-6428-4055-9cd3-3595f0896fa8", "name": "CONDITIONING", "type": "CONDITIONING", "linkIds": [169], "label": "positive", "pos": [600, 990]}, {"id": "01519713-2ed1-4694-a387-79f44e088e89", "name": "CONDITIONING_1", "type": "CONDITIONING", "linkIds": [170], "label": "negative", "pos": [600, 1010]}], "widgets": [], "nodes": [{"id": 76, "type": "ReferenceLatent", "pos": [170, 1050], "size": [204.134765625, 46], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 166}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 163}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ReferenceLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 78, "type": "VAEEncode", "pos": [-90, 1150], "size": [190, 46], "flags": {"collapsed": false}, "order": 2, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 167}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 168}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [163, 164]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 77, "type": "ReferenceLatent", "pos": [170, 940], "size": [210, 46], "flags": {"collapsed": false}, "order": 1, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 165}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 164}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [169]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ReferenceLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}], "groups": [], "links": [{"id": 163, "origin_id": 78, "origin_slot": 0, "target_id": 76, "target_slot": 1, "type": "LATENT"}, {"id": 164, "origin_id": 78, "origin_slot": 0, "target_id": 77, "target_slot": 1, "type": "LATENT"}, {"id": 165, "origin_id": -10, "origin_slot": 0, "target_id": 77, "target_slot": 0, "type": "CONDITIONING"}, {"id": 166, "origin_id": -10, "origin_slot": 1, "target_id": 76, "target_slot": 0, "type": "CONDITIONING"}, {"id": 167, "origin_id": -10, "origin_slot": 2, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 168, "origin_id": -10, "origin_slot": 3, "target_id": 78, "target_slot": 1, "type": "VAE"}, {"id": 169, "origin_id": 77, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "CONDITIONING"}, {"id": 170, "origin_id": 76, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "CONDITIONING"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 1.1478862047043865, "offset": [302.91933883258804, -648.9802050882657]}}, "version": 0.4} +{ + "id": "6686cb78-8003-4289-b969-929755e9a84d", + "revision": 0, + "last_node_id": 81, + "last_link_id": 179, + "nodes": [ + { + "id": 75, + "type": "7b34ab90-36f9-45ba-a665-71d418f0df18", + "pos": [ + 311.66672468419983, + 830 + ], + "size": [ + 400, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "73", + "noise_seed" + ], + [ + "73", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + null, + null, + "flux-2-klein-base-4b-fp8.safetensors", + "qwen_3_4b.safetensors", + "flux2-vae.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "7b34ab90-36f9-45ba-a665-71d418f0df18", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 81, + "lastLinkId": 179, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (Flux.2 Klein 4B)", + "inputNode": { + "id": -10, + "bounding": [ + -576.3333463986639, + 559.0277780034634, + 120, + 140 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1373.6666536013363, + 549.0277780034634, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "7061147a-fb75-450d-8e97-c8be594a8e16", + "name": "text", + "type": "STRING", + "linkIds": [ + 162 + ], + "label": "prompt", + "pos": [ + -476.33334639866393, + 579.0277780034634 + ] + }, + { + "id": "68629112-b7b0-41ce-8912-23adad00d3db", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 175 + ], + "pos": [ + -476.33334639866393, + 599.0277780034634 + ] + }, + { + "id": "006f0b42-cb11-4484-8b7e-c34a9fb12824", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 177 + ], + "pos": [ + -476.33334639866393, + 619.0277780034634 + ] + }, + { + "id": "0083499c-8e83-4974-a587-ba6e89e36acc", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 178 + ], + "pos": [ + -476.33334639866393, + 639.0277780034634 + ] + }, + { + "id": "7c95e27c-7920-43d5-a0ac-c6570653f5da", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 179 + ], + "pos": [ + -476.33334639866393, + 659.0277780034634 + ] + } + ], + "outputs": [ + { + "id": "c5e7966d-07ed-4c9a-ad89-9d378a41ea7b", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 153 + ], + "localized_name": "IMAGE", + "pos": [ + 1393.6666536013363, + 569.0277780034634 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 61, + "type": "KSamplerSelect", + "pos": [ + 560, + 460 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 144 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 62, + "type": "Flux2Scheduler", + "pos": [ + 560, + 560 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 171 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 173 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 145 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "Flux2Scheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 1024, + 1024 + ] + }, + { + "id": 63, + "type": "CFGGuider", + "pos": [ + 560, + 320 + ], + "size": [ + 270, + 98 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 139 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 167 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 168 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 143 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 1093.6666007601261, + 154.02777277882814 + ], + "size": [ + 220, + 46 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 147 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 148 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 153 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 70, + "type": "UNETLoader", + "pos": [ + -386.3333318901398, + 203.8611174586574 + ], + "size": [ + 364.42708333333337, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 177 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 139 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "flux-2-klein-base-4b-fp8.safetensors", + "url": "https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4b-fp8/resolve/main/flux-2-klein-base-4b-fp8.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "flux-2-klein-base-4b-fp8.safetensors", + "default" + ] + }, + { + "id": 71, + "type": "CLIPLoader", + "pos": [ + -386.3333318901398, + 353.8611341117752 + ], + "size": [ + 364.42708333333337, + 106 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 178 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 151, + 152 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "flux2", + "default" + ] + }, + { + "id": 74, + "type": "CLIPTextEncode", + "pos": [ + 43.666666014853874, + 204.02777159555063 + ], + "size": [ + 430, + 230 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 151 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 162 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 165 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + 43.666666014853874, + 534.0277718670993 + ], + "size": [ + 430, + 88 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 152 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 166 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 72, + "type": "VAELoader", + "pos": [ + -386.3333318901398, + 523.8611624133522 + ], + "size": [ + 364.42708333333337, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 179 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 148, + 176 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "flux2-vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/vae/flux2-vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "flux2-vae.safetensors" + ] + }, + { + "id": 66, + "type": "EmptyFlux2LatentImage", + "pos": [ + 570, + 740 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 172 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 174 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 146 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "EmptyFlux2LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 80, + "type": "ImageScaleToTotalPixels", + "pos": [ + -391.6666683297289, + 715.194415255584 + ], + "size": [ + 270, + 106 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 175 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "megapixels", + "name": "megapixels", + "type": "FLOAT", + "widget": { + "name": "megapixels" + }, + "link": null + }, + { + "localized_name": "resolution_steps", + "name": "resolution_steps", + "type": "INT", + "widget": { + "name": "resolution_steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 169, + 170 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "ImageScaleToTotalPixels", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "nearest-exact", + 1, + 1 + ] + }, + { + "id": 79, + "type": "6007e698-2ebd-4917-84d8-299b35d7b7ab", + "pos": [ + 238.33332484215495, + 835.1944447404384 + ], + "size": [ + 240, + 86 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "label": "positive", + "name": "conditioning", + "type": "CONDITIONING", + "link": 165 + }, + { + "label": "negative", + "name": "conditioning_1", + "type": "CONDITIONING", + "link": 166 + }, + { + "name": "pixels", + "type": "IMAGE", + "link": 169 + }, + { + "name": "vae", + "type": "VAE", + "link": 176 + } + ], + "outputs": [ + { + "label": "positive", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 167 + ] + }, + { + "label": "negative", + "name": "CONDITIONING_1", + "type": "CONDITIONING", + "links": [ + 168 + ] + } + ], + "properties": { + "proxyWidgets": [], + "cnr_id": "comfy-core", + "ver": "0.8.2", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 81, + "type": "GetImageSize", + "pos": [ + 310, + 720 + ], + "size": [ + 187.5, + 66 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 170 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 171, + 172 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 173, + 174 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 64, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 220 + ], + "size": [ + 212.3638671875, + 106 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 142 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 143 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 144 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 145 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 146 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 147 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 73, + "type": "RandomNoise", + "pos": [ + 560, + 200 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 142 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + -390, + 120, + 380, + 550 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Prompt", + "bounding": [ + 30, + 120, + 470, + 550 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Sampler", + "bounding": [ + 540, + 120, + 532.3638671875, + 550 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 139, + "origin_id": 70, + "origin_slot": 0, + "target_id": 63, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 142, + "origin_id": 73, + "origin_slot": 0, + "target_id": 64, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 143, + "origin_id": 63, + "origin_slot": 0, + "target_id": 64, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 144, + "origin_id": 61, + "origin_slot": 0, + "target_id": 64, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 145, + "origin_id": 62, + "origin_slot": 0, + "target_id": 64, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 146, + "origin_id": 66, + "origin_slot": 0, + "target_id": 64, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 147, + "origin_id": 64, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 148, + "origin_id": 72, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 152, + "origin_id": 71, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 151, + "origin_id": 71, + "origin_slot": 0, + "target_id": 74, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 153, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 162, + "origin_id": -10, + "origin_slot": 0, + "target_id": 74, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 165, + "origin_id": 74, + "origin_slot": 0, + "target_id": 79, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 166, + "origin_id": 67, + "origin_slot": 0, + "target_id": 79, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 167, + "origin_id": 79, + "origin_slot": 0, + "target_id": 63, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 168, + "origin_id": 79, + "origin_slot": 1, + "target_id": 63, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 169, + "origin_id": 80, + "origin_slot": 0, + "target_id": 79, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 170, + "origin_id": 80, + "origin_slot": 0, + "target_id": 81, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 171, + "origin_id": 81, + "origin_slot": 0, + "target_id": 62, + "target_slot": 1, + "type": "INT" + }, + { + "id": 172, + "origin_id": 81, + "origin_slot": 0, + "target_id": 66, + "target_slot": 0, + "type": "INT" + }, + { + "id": 173, + "origin_id": 81, + "origin_slot": 1, + "target_id": 62, + "target_slot": 2, + "type": "INT" + }, + { + "id": 174, + "origin_id": 81, + "origin_slot": 1, + "target_id": 66, + "target_slot": 1, + "type": "INT" + }, + { + "id": 175, + "origin_id": -10, + "origin_slot": 1, + "target_id": 80, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 176, + "origin_id": 72, + "origin_slot": 0, + "target_id": 79, + "target_slot": 3, + "type": "VAE" + }, + { + "id": 177, + "origin_id": -10, + "origin_slot": 2, + "target_id": 70, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 178, + "origin_id": -10, + "origin_slot": 3, + "target_id": 71, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 179, + "origin_id": -10, + "origin_slot": 4, + "target_id": 72, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Edit image", + "description": "Edits an input image via text instructions using FLUX.2 [klein] 4B." + }, + { + "id": "6007e698-2ebd-4917-84d8-299b35d7b7ab", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 81, + "lastLinkId": 179, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Reference Conditioning", + "inputNode": { + "id": -10, + "bounding": [ + -270, + 990, + 120, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 580, + 970, + 120, + 80 + ] + }, + "inputs": [ + { + "id": "5c9a0f5e-8cee-4947-90bc-330de782043a", + "name": "conditioning", + "type": "CONDITIONING", + "linkIds": [ + 165 + ], + "label": "positive", + "pos": [ + -170, + 1010 + ] + }, + { + "id": "61826d46-4c21-4ad6-801c-3e3fa94115e2", + "name": "conditioning_1", + "type": "CONDITIONING", + "linkIds": [ + 166 + ], + "label": "negative", + "pos": [ + -170, + 1030 + ] + }, + { + "id": "345bf085-5939-47ff-9767-8f8f239a719c", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 167 + ], + "pos": [ + -170, + 1050 + ] + }, + { + "id": "f4594e34-e2f5-4f1e-b1fa-a1dc2aeb0a90", + "name": "vae", + "type": "VAE", + "linkIds": [ + 168 + ], + "pos": [ + -170, + 1070 + ] + } + ], + "outputs": [ + { + "id": "b3357c0e-6428-4055-9cd3-3595f0896fa8", + "name": "CONDITIONING", + "type": "CONDITIONING", + "linkIds": [ + 169 + ], + "label": "positive", + "pos": [ + 600, + 990 + ] + }, + { + "id": "01519713-2ed1-4694-a387-79f44e088e89", + "name": "CONDITIONING_1", + "type": "CONDITIONING", + "linkIds": [ + 170 + ], + "label": "negative", + "pos": [ + 600, + 1010 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 76, + "type": "ReferenceLatent", + "pos": [ + 170, + 1050 + ], + "size": [ + 204.134765625, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 166 + }, + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 163 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 170 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "ReferenceLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 78, + "type": "VAEEncode", + "pos": [ + -90, + 1150 + ], + "size": [ + 190, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 167 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 168 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 163, + 164 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "VAEEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 77, + "type": "ReferenceLatent", + "pos": [ + 170, + 940 + ], + "size": [ + 210, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 165 + }, + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 164 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 169 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.8.2", + "Node name for S&R": "ReferenceLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "groups": [], + "links": [ + { + "id": 163, + "origin_id": 78, + "origin_slot": 0, + "target_id": 76, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 164, + "origin_id": 78, + "origin_slot": 0, + "target_id": 77, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 165, + "origin_id": -10, + "origin_slot": 0, + "target_id": 77, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 166, + "origin_id": -10, + "origin_slot": 1, + "target_id": 76, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 167, + "origin_id": -10, + "origin_slot": 2, + "target_id": 78, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 168, + "origin_id": -10, + "origin_slot": 3, + "target_id": 78, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 169, + "origin_id": 77, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 170, + "origin_id": 76, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "CONDITIONING" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Applies reference image conditioning for style/identity transfer (Flux.2 Klein 4B)." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG", + "ds": { + "scale": 1.1478862047043865, + "offset": [ + 302.91933883258804, + -648.9802050882657 + ] + } + }, + "version": 0.4 +} diff --git a/blueprints/Image Edit (LongCat Image Edit).json b/blueprints/Image Edit (LongCat Image Edit).json new file mode 100644 index 000000000..de1c155a2 --- /dev/null +++ b/blueprints/Image Edit (LongCat Image Edit).json @@ -0,0 +1,1428 @@ +{ + "revision": 0, + "last_node_id": 176, + "last_link_id": 0, + "nodes": [ + { + "id": 176, + "type": "372a02a0-a79c-40b4-84a9-34f246fe0e9c", + "pos": [ + 967.0861152473078, + 4977.534165136897 + ], + "size": [ + 330, + 380 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "27", + "prompt" + ], + [ + "33", + "steps" + ], + [ + "33", + "cfg" + ], + [ + "33", + "seed" + ], + [ + "34", + "unet_name" + ], + [ + "38", + "clip_name" + ], + [ + "26", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "title": "Image Edit (LongCat Image Edit)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "372a02a0-a79c-40b4-84a9-34f246fe0e9c", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 176, + "lastLinkId": 376, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (LongCat Image Edit)", + "inputNode": { + "id": -10, + "bounding": [ + -750, + 380, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1680, + 340, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "616c4f3e-8b64-4711-bee2-5ecbe1814fe4", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 14 + ], + "localized_name": "image", + "pos": [ + -650, + 400 + ] + }, + { + "id": "d39759fc-a5a9-4b82-a88f-df9b953f1d98", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 36 + ], + "pos": [ + -650, + 420 + ] + }, + { + "id": "48627f43-cdf1-4ea9-9e11-ec13451a7323", + "name": "steps", + "type": "INT", + "linkIds": [ + 37 + ], + "pos": [ + -650, + 440 + ] + }, + { + "id": "2213f872-d40f-4fc3-be01-b8fc73f1d92c", + "name": "cfg", + "type": "FLOAT", + "linkIds": [ + 42 + ], + "pos": [ + -650, + 460 + ] + }, + { + "id": "2c7b3e65-e71e-4a9b-a9f8-d2e814ccb6af", + "name": "seed", + "type": "INT", + "linkIds": [ + 43 + ], + "pos": [ + -650, + 480 + ] + }, + { + "id": "bddb2317-7210-48d5-81fd-6b2d6fac33f4", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 44 + ], + "pos": [ + -650, + 500 + ] + }, + { + "id": "a283167b-6d7f-4d19-ad86-1fff2335c08d", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 45 + ], + "pos": [ + -650, + 520 + ] + }, + { + "id": "e033047f-cc37-4043-b4a0-25d7bab661af", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 46 + ], + "pos": [ + -650, + 540 + ] + } + ], + "outputs": [ + { + "id": "0a288e93-c03f-4805-80f3-4e320a6a492e", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 20 + ], + "localized_name": "IMAGE", + "pos": [ + 1700, + 360 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 26, + "type": "VAELoader", + "pos": [ + -360, + 590 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 46 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 4, + 5, + 6, + 7 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 27, + "type": "TextEncodeQwenImageEdit", + "pos": [ + 10, + 200 + ], + "size": [ + 280, + 190 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 2 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 4 + }, + { + "localized_name": "image", + "name": "image", + "shape": 7, + "type": "IMAGE", + "link": 15 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 36 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 8 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "TextEncodeQwenImageEdit" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 28, + "type": "TextEncodeQwenImageEdit", + "pos": [ + 10, + 440 + ], + "size": [ + 280, + 190 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 3 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 5 + }, + { + "localized_name": "image", + "name": "image", + "shape": 7, + "type": "IMAGE", + "link": 16 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "TextEncodeQwenImageEdit" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 29, + "type": "FluxKontextMultiReferenceLatentMethod", + "pos": [ + 660, + 200 + ], + "size": [ + 270, + 80 + ], + "flags": {}, + "order": 3, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 10 + }, + { + "localized_name": "reference_latents_method", + "name": "reference_latents_method", + "type": "COMBO", + "widget": { + "name": "reference_latents_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 12 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxKontextMultiReferenceLatentMethod" + }, + "widgets_values": [ + "index" + ] + }, + { + "id": 30, + "type": "FluxGuidance", + "pos": [ + 330, + 440 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 9 + }, + { + "localized_name": "guidance", + "name": "guidance", + "type": "FLOAT", + "widget": { + "name": "guidance" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 11 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 4.5 + ] + }, + { + "id": 31, + "type": "FluxGuidance", + "pos": [ + 330, + 200 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 8 + }, + { + "localized_name": "guidance", + "name": "guidance", + "type": "FLOAT", + "widget": { + "name": "guidance" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 10 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 4.5 + ] + }, + { + "id": 32, + "type": "FluxKontextMultiReferenceLatentMethod", + "pos": [ + 660, + 440 + ], + "size": [ + 270, + 80 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 11 + }, + { + "localized_name": "reference_latents_method", + "name": "reference_latents_method", + "type": "COMBO", + "widget": { + "name": "reference_latents_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 13 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxKontextMultiReferenceLatentMethod" + }, + "widgets_values": [ + "index" + ] + }, + { + "id": 33, + "type": "KSampler", + "pos": [ + 1080, + 210 + ], + "size": [ + 270, + 460 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 1 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 12 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 13 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 18 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 43 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 37 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 42 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 19 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 43, + "fixed", + 50, + 4.5, + "euler", + "simple", + 1 + ] + }, + { + "id": 34, + "type": "UNETLoader", + "pos": [ + -360, + 170 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 44 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 1 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "longcat_image_edit_bf16.safetensors", + "url": "https://huggingface.co/TalmajM/LongCat-Image-Edit_ComfyUI_repackaged/resolve/main/split_files/diffusion_models/longcat_image_edit_bf16.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "longcat_image_edit_bf16.safetensors", + "default" + ] + }, + { + "id": 35, + "type": "VAEEncode", + "pos": [ + 710, + 790 + ], + "size": [ + 260, + 100 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 17 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 6 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 18 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 36, + "type": "VAEDecode", + "pos": [ + 1100, + 800 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 19 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 7 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 20 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 37, + "type": "ImageScaleToTotalPixels", + "pos": [ + -370, + 790 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 14 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "megapixels", + "name": "megapixels", + "type": "FLOAT", + "widget": { + "name": "megapixels" + }, + "link": null + }, + { + "localized_name": "resolution_steps", + "name": "resolution_steps", + "type": "INT", + "widget": { + "name": "resolution_steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 15, + 16, + 17 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ImageScaleToTotalPixels" + }, + "widgets_values": [ + "lanczos", + 1, + 16 + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -360, + 360 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 45 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 2, + 3 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "longcat_image", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + -380, + 100, + 320, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Conditioning", + "bounding": [ + -30, + 100, + 1030, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Sample", + "bounding": [ + 1030, + 100, + 360, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 2, + "origin_id": 38, + "origin_slot": 0, + "target_id": 27, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 4, + "origin_id": 26, + "origin_slot": 0, + "target_id": 27, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 15, + "origin_id": 37, + "origin_slot": 0, + "target_id": 27, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 3, + "origin_id": 38, + "origin_slot": 0, + "target_id": 28, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 5, + "origin_id": 26, + "origin_slot": 0, + "target_id": 28, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 16, + "origin_id": 37, + "origin_slot": 0, + "target_id": 28, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 10, + "origin_id": 31, + "origin_slot": 0, + "target_id": 29, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 9, + "origin_id": 28, + "origin_slot": 0, + "target_id": 30, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 8, + "origin_id": 27, + "origin_slot": 0, + "target_id": 31, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 11, + "origin_id": 30, + "origin_slot": 0, + "target_id": 32, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 1, + "origin_id": 34, + "origin_slot": 0, + "target_id": 33, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 12, + "origin_id": 29, + "origin_slot": 0, + "target_id": 33, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 13, + "origin_id": 32, + "origin_slot": 0, + "target_id": 33, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 18, + "origin_id": 35, + "origin_slot": 0, + "target_id": 33, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 17, + "origin_id": 37, + "origin_slot": 0, + "target_id": 35, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 6, + "origin_id": 26, + "origin_slot": 0, + "target_id": 35, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 19, + "origin_id": 33, + "origin_slot": 0, + "target_id": 36, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 7, + "origin_id": 26, + "origin_slot": 0, + "target_id": 36, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 14, + "origin_id": -10, + "origin_slot": 0, + "target_id": 37, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 20, + "origin_id": 36, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 36, + "origin_id": -10, + "origin_slot": 1, + "target_id": 27, + "target_slot": 3, + "type": "STRING" + }, + { + "id": 37, + "origin_id": -10, + "origin_slot": 2, + "target_id": 33, + "target_slot": 5, + "type": "INT" + }, + { + "id": 42, + "origin_id": -10, + "origin_slot": 3, + "target_id": 33, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 43, + "origin_id": -10, + "origin_slot": 4, + "target_id": 33, + "target_slot": 4, + "type": "INT" + }, + { + "id": 44, + "origin_id": -10, + "origin_slot": 5, + "target_id": 34, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 45, + "origin_id": -10, + "origin_slot": 6, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 46, + "origin_id": -10, + "origin_slot": 7, + "target_id": 26, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Image generation and editing/Edit image", + "description": "Edits images via text instructions using LongCat Image Edit, an instruction-following image editing diffusion model." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Image Edit (Qwen 2509).json b/blueprints/Image Edit (Qwen 2509).json new file mode 100644 index 000000000..f7be322a0 --- /dev/null +++ b/blueprints/Image Edit (Qwen 2509).json @@ -0,0 +1,1947 @@ +{ + "revision": 0, + "last_node_id": 433, + "last_link_id": 0, + "nodes": [ + { + "id": 433, + "type": "eba40a3a-f6c5-48ac-b58e-55525d06b373", + "pos": [ + 90, + -160 + ], + "size": [ + 390, + 610 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "image2 (optional)", + "name": "image2", + "type": "IMAGE", + "link": null + }, + { + "label": "image3 (optional)", + "name": "image3", + "type": "IMAGE", + "link": null + }, + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "111", + "prompt" + ], + [ + "3", + "seed" + ], + [ + "443", + "value" + ], + [ + "37", + "unet_name" + ], + [ + "38", + "clip_name" + ], + [ + "39", + "vae_name" + ], + [ + "3", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.62" + }, + "widgets_values": [], + "title": "Image Edit (Qwen 2509)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "eba40a3a-f6c5-48ac-b58e-55525d06b373", + "version": 1, + "state": { + "lastGroupId": 51, + "lastNodeId": 468, + "lastLinkId": 731, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (Qwen 2509)", + "inputNode": { + "id": -10, + "bounding": [ + -1160, + 280, + 151.744140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2030, + -20, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "d5089bd3-63bc-4a24-b478-6565ed2364e3", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 248 + ], + "label": "image", + "pos": [ + -1028.255859375, + 300 + ] + }, + { + "id": "9e80fff0-ed0a-439f-a16e-a4a6cc1eb601", + "name": "image2", + "type": "IMAGE", + "linkIds": [ + 235, + 236 + ], + "label": "image2 (optional)", + "pos": [ + -1028.255859375, + 320 + ] + }, + { + "id": "49d98fd6-01b5-440b-8603-579252fd7fef", + "name": "image3", + "type": "IMAGE", + "linkIds": [ + 237, + 238 + ], + "label": "image3 (optional)", + "pos": [ + -1028.255859375, + 340 + ] + }, + { + "id": "5de32f24-a7b5-4423-b772-72824005f585", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 244 + ], + "pos": [ + -1028.255859375, + 360 + ] + }, + { + "id": "85fb3d74-7881-4c71-bc8c-624be5eedc3d", + "name": "seed", + "type": "INT", + "linkIds": [ + 718 + ], + "pos": [ + -1028.255859375, + 380 + ] + }, + { + "id": "b0c828de-d7eb-42a3-8dfb-4f53360d4fc9", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 719 + ], + "label": "enable_turbo_mode", + "pos": [ + -1028.255859375, + 400 + ] + }, + { + "id": "072baa05-5551-4a98-bd66-015a36833ac2", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 720 + ], + "pos": [ + -1028.255859375, + 420 + ] + }, + { + "id": "d2891d11-b336-4750-9742-b93717c9ae39", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 721 + ], + "pos": [ + -1028.255859375, + 440 + ] + }, + { + "id": "4218135f-5128-4b7e-8572-92cc55615793", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 722 + ], + "pos": [ + -1028.255859375, + 460 + ] + } + ], + "outputs": [ + { + "id": "c4ebfc18-de83-4361-8e42-767c3c8c25c0", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 110 + ], + "localized_name": "IMAGE", + "pos": [ + 2050, + 0 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 75, + "type": "CFGNorm", + "pos": [ + 1080, + 30 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 141 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "patched_model", + "name": "patched_model", + "type": "MODEL", + "links": [ + 186 + ] + } + ], + "properties": { + "Node name for S&R": "CFGNorm", + "cnr_id": "comfy-core", + "ver": "0.3.50", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": { + "strength": true + } + } + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + -730, + 410 + ], + "size": [ + 330, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 722 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 168, + 206, + 207 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -730, + 150 + ], + "size": [ + 330, + 150 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 721 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 204, + 205 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -730, + -60 + ], + "size": [ + 330, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 720 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 184, + 710 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "models": [ + { + "name": "qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "qwen_image_edit_2509_fp8_e4m3fn.safetensors", + "default" + ] + }, + { + "id": 110, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + -240, + 320 + ], + "size": [ + 400, + 240 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 204 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 206 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 251 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 236 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 238 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 210 + ] + } + ], + "properties": { + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "cnr_id": "comfy-core", + "ver": "0.3.59" + }, + "widgets_values": [ + "" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 66, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1070, + -120 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 708 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 141 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingAuraFlow", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 111, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + -250, + -70 + ], + "size": [ + 410, + 330 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 205 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 207 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 250 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 235 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 237 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 244 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 211 + ] + } + ], + "properties": { + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "cnr_id": "comfy-core", + "ver": "0.3.59" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 88, + "type": "VAEEncode", + "pos": [ + -70, + 640 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 249 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 168 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 246 + ] + } + ], + "properties": { + "Node name for S&R": "VAEEncode", + "cnr_id": "comfy-core", + "ver": "0.3.50", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {} + } + } + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -60 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 128 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + } + }, + { + "id": 89, + "type": "LoraLoaderModelOnly", + "pos": [ + 320, + 300 + ], + "size": [ + 300, + 140 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 184 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 709 + ] + } + ], + "properties": { + "Node name for S&R": "LoraLoaderModelOnly", + "cnr_id": "comfy-core", + "ver": "0.3.50", + "models": [ + { + "name": "Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors", + "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": { + "lora_name": true, + "strength_model": true + } + } + }, + "widgets_values": [ + "Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors", + 1 + ] + }, + { + "id": 117, + "type": "FluxKontextImageScale", + "pos": [ + -680, + 630 + ], + "size": [ + 230, + 80 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 248 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 249, + 250, + 251 + ] + } + ], + "properties": { + "Node name for S&R": "FluxKontextImageScale" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 1070, + 210 + ], + "size": [ + 300, + 590 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 186 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 211 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 210 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 246 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 718 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 707 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 706 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 128 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.48", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 973414316252139, + "randomize", + 4, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 436, + "type": "PrimitiveInt", + "pos": [ + 320, + 500 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 713 + ] + } + ], + "title": "Steps", + "properties": { + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 4, + "fixed" + ] + }, + { + "id": 437, + "type": "PrimitiveFloat", + "pos": [ + 320, + 670 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 714 + ] + } + ], + "title": "CFG", + "properties": { + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 438, + "type": "PrimitiveInt", + "pos": [ + 320, + -100 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 711 + ] + } + ], + "title": "Steps", + "properties": { + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 439, + "type": "PrimitiveFloat", + "pos": [ + 320, + 70 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 712 + ] + } + ], + "title": "CFG", + "properties": { + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 4 + ] + }, + { + "id": 440, + "type": "ComfySwitchNode", + "pos": [ + 750, + -80 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 710 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 709 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 715 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 708 + ] + } + ], + "title": "Switch (Model)", + "properties": { + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 441, + "type": "ComfySwitchNode", + "pos": [ + 730, + 340 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 711 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 713 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 716 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 707 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 442, + "type": "ComfySwitchNode", + "pos": [ + 730, + 520 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 712 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 714 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 717 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 706 + ] + } + ], + "title": "Switch (CFG)", + "properties": { + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 443, + "type": "PrimitiveBoolean", + "pos": [ + 330, + 850 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 719 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 715, + 716, + 717 + ] + } + ], + "title": "Enable Lightning LoRA", + "properties": { + "Node name for S&R": "PrimitiveBoolean" + }, + "widgets_values": [ + true + ] + }, + { + "id": 444, + "type": "MarkdownNote", + "pos": [ + 240, + -500 + ], + "size": [ + 450, + 310 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Note: KSampler settings", + "properties": {}, + "widgets_values": [ + "You can test and find the best setting by yourself. The following table is for reference.\n| Parameters | Qwen Team | Comfy Original | with 4steps LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 50 | 20 | 4 |\n| CFG | 4.0 | 2.5 | 1.0 |" + ], + "color": "#432", + "bgcolor": "#000" + } + ], + "groups": [ + { + "id": 1, + "title": "Step1 - Load models", + "bounding": [ + -770, + -170, + 410, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Step 4 - Prompt", + "bounding": [ + -330, + -170, + 570, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 50, + "title": "Lightning LoRA", + "bounding": [ + 270, + 220, + 390, + 570 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 51, + "title": "Original Settings", + "bounding": [ + 270, + -170, + 390, + 360 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 141, + "origin_id": 66, + "origin_slot": 0, + "target_id": 75, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 128, + "origin_id": 3, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 76, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 184, + "origin_id": 37, + "origin_slot": 0, + "target_id": 89, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 186, + "origin_id": 75, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 211, + "origin_id": 111, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 210, + "origin_id": 110, + "origin_slot": 0, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 168, + "origin_id": 39, + "origin_slot": 0, + "target_id": 88, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 204, + "origin_id": 38, + "origin_slot": 0, + "target_id": 110, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 206, + "origin_id": 39, + "origin_slot": 0, + "target_id": 110, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 205, + "origin_id": 38, + "origin_slot": 0, + "target_id": 111, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 207, + "origin_id": 39, + "origin_slot": 0, + "target_id": 111, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 110, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 235, + "origin_id": -10, + "origin_slot": 1, + "target_id": 111, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 236, + "origin_id": -10, + "origin_slot": 1, + "target_id": 110, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 237, + "origin_id": -10, + "origin_slot": 2, + "target_id": 111, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 238, + "origin_id": -10, + "origin_slot": 2, + "target_id": 110, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 244, + "origin_id": -10, + "origin_slot": 3, + "target_id": 111, + "target_slot": 5, + "type": "STRING" + }, + { + "id": 246, + "origin_id": 88, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 248, + "origin_id": -10, + "origin_slot": 0, + "target_id": 117, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 249, + "origin_id": 117, + "origin_slot": 0, + "target_id": 88, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 250, + "origin_id": 117, + "origin_slot": 0, + "target_id": 111, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 251, + "origin_id": 117, + "origin_slot": 0, + "target_id": 110, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 706, + "origin_id": 442, + "origin_slot": 0, + "target_id": 3, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 707, + "origin_id": 441, + "origin_slot": 0, + "target_id": 3, + "target_slot": 5, + "type": "INT" + }, + { + "id": 708, + "origin_id": 440, + "origin_slot": 0, + "target_id": 66, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 709, + "origin_id": 89, + "origin_slot": 0, + "target_id": 440, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 710, + "origin_id": 37, + "origin_slot": 0, + "target_id": 440, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 711, + "origin_id": 438, + "origin_slot": 0, + "target_id": 441, + "target_slot": 0, + "type": "INT" + }, + { + "id": 712, + "origin_id": 439, + "origin_slot": 0, + "target_id": 442, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 713, + "origin_id": 436, + "origin_slot": 0, + "target_id": 441, + "target_slot": 1, + "type": "INT" + }, + { + "id": 714, + "origin_id": 437, + "origin_slot": 0, + "target_id": 442, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 715, + "origin_id": 443, + "origin_slot": 0, + "target_id": 440, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 716, + "origin_id": 443, + "origin_slot": 0, + "target_id": 441, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 717, + "origin_id": 443, + "origin_slot": 0, + "target_id": 442, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 718, + "origin_id": -10, + "origin_slot": 4, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 719, + "origin_id": -10, + "origin_slot": 5, + "target_id": 443, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 720, + "origin_id": -10, + "origin_slot": 6, + "target_id": 37, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 721, + "origin_id": -10, + "origin_slot": 7, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 722, + "origin_id": -10, + "origin_slot": 8, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Edit image", + "description": "Edits images from text instructions using Qwen-Image-Edit-2509 with optional Lightning LoRA for few-step sampling." + } + ] + }, + "extra": {} +} diff --git a/blueprints/Image Edit (Qwen 2511).json b/blueprints/Image Edit (Qwen 2511).json index 33e85333b..1aa7e5765 100644 --- a/blueprints/Image Edit (Qwen 2511).json +++ b/blueprints/Image Edit (Qwen 2511).json @@ -1 +1,1493 @@ -{"id": "d84b7d1a-a73f-4e31-bd16-983ac0cf5f1b", "revision": 0, "last_node_id": 17, "last_link_id": 32, "nodes": [{"id": 17, "type": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", "pos": [183.33334355513557, -120.00000702649223], "size": [383.0729166666667, 381.10677083333337], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image 1", "name": "image1", "type": "IMAGE", "link": null}, {"label": "image 2 (optional)", "name": "image2", "type": "IMAGE", "link": null}, {"label": "image 3 (optional)", "name": "image3", "type": "IMAGE", "link": null}, {"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "prompt"], ["15", "seed"], ["15", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", null, null, "qwen_image_edit_2511_bf16.safetensors", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", "version": 1, "state": {"lastGroupId": 2, "lastNodeId": 17, "lastLinkId": 32, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Edit (Qwen 2511)", "inputNode": {"id": -10, "bounding": [-412.6162343565087, 327.2321295314722, 142.59765625, 180]}, "outputNode": {"id": -20, "bounding": [1631.0466138212807, 305.6854343585077, 120, 60]}, "inputs": [{"id": "6e401a3f-21a6-4552-8ee4-179c313c1910", "name": "image1", "type": "IMAGE", "linkIds": [25], "label": "image 1", "pos": [-290.0185781065087, 347.2321295314722]}, {"id": "a0a6307b-62b8-481e-bb17-d332eceadbe4", "name": "image2", "type": "IMAGE", "linkIds": [21, 26], "label": "image 2 (optional)", "pos": [-290.0185781065087, 367.2321295314722]}, {"id": "232fe944-fc3f-43dd-bb34-112d0360cb5f", "name": "image3", "type": "IMAGE", "linkIds": [22, 27], "label": "image 3 (optional)", "pos": [-290.0185781065087, 387.2321295314722]}, {"id": "9b8ed2f4-5875-4f59-b4c1-5ab79a412f4e", "name": "prompt", "type": "STRING", "linkIds": [23], "pos": [-290.0185781065087, 407.2321295314722]}, {"id": "403a6bd0-f170-4cfb-b72e-cd7fa1dbcd06", "name": "unet_name", "type": "COMBO", "linkIds": [30], "pos": [-290.0185781065087, 427.2321295314722]}, {"id": "86a53531-2fab-47da-9525-858c80737044", "name": "clip_name", "type": "COMBO", "linkIds": [31], "pos": [-290.0185781065087, 447.2321295314722]}, {"id": "499f39e9-d698-41dc-b126-b7ea6024cf5d", "name": "vae_name", "type": "COMBO", "linkIds": [32], "pos": [-290.0185781065087, 467.2321295314722]}], "outputs": [{"id": "f2ccd1fa-428e-4127-89a6-760906013172", "name": "IMAGE", "type": "IMAGE", "linkIds": [24], "pos": [1651.0466138212807, 325.6854343585077]}], "widgets": [], "nodes": [{"id": 2, "type": "ModelSamplingAuraFlow", "pos": [791.0465113899395, -54.3145423152618], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 29}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3.1]}, {"id": 3, "type": "VAELoader", "pos": [-174.9530552190643, 462.6706561999898], "size": [396.1328125, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 32}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [6, 10, 12, 15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 4, "type": "UNETLoader", "pos": [-174.9530552190643, -23.329297689188216], "size": [396.1328125, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 30}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [29]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_edit_2511_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_edit_2511_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_edit_2511_bf16.safetensors", "default"]}, {"id": 5, "type": "FluxKontextMultiReferenceLatentMethod", "pos": [781.0466382725523, 315.68545764091465], "size": [309.66145833333337, 58], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 2}, {"localized_name": "reference_latents_method", "name": "reference_latents_method", "type": "COMBO", "widget": {"name": "reference_latents_method"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["index_timestep_zero"], "color": "#222", "bgcolor": "#000"}, {"id": 6, "type": "FluxKontextMultiReferenceLatentMethod", "pos": [781.0466382725523, 185.68543791920104], "size": [309.66145833333337, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 3}, {"localized_name": "reference_latents_method", "name": "reference_latents_method", "type": "COMBO", "widget": {"name": "reference_latents_method"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [17]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["index_timestep_zero"], "color": "#222", "bgcolor": "#000"}, {"id": 7, "type": "CFGNorm", "pos": [791.0465113899395, 55.68545297239743], "size": [270, 58], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "patched_model", "name": "patched_model", "type": "MODEL", "links": [16]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CFGNorm", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 8, "type": "MarkdownNote", "pos": [1111.0466241355298, 555.6854726502876], "size": [270, 195.10416666666669], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [], "title": "KSampler settings", "properties": {}, "widgets_values": ["You can test and find the best setting by yourself. The following table is for reference.\n| | Qwen | Comfy | lightning LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 40 | 20 | 4 |\n| CFG | 4.0 | 4.0 | 1.0 |\n\nBy default, we use 20 steps as we just don't want it to take too long. Try 40 if you want a better result, but it will take longer."], "color": "#222", "bgcolor": "#000"}, {"id": 9, "type": "TextEncodeQwenImageEditPlus", "pos": [301.0466082538065, 305.6854454238875], "size": [420, 170], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 5}, {"localized_name": "vae", "name": "vae", "shape": 7, "type": "VAE", "link": 6}, {"localized_name": "image1", "name": "image1", "shape": 7, "type": "IMAGE", "link": 28}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 21}, {"localized_name": "image3", "name": "image3", "shape": 7, "type": "IMAGE", "link": 22}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [2]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "TextEncodeQwenImageEditPlus", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 10, "type": "Note", "pos": [801.0465236069665, 435.6854651456011], "size": [280, 88], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "properties": {}, "widgets_values": ["The \"Edit Model Reference Method\" nodes above are not needed if you use Comfy files, but may be needed if you use repackaged ones from other people."], "color": "#432", "bgcolor": "#653"}, {"id": 13, "type": "TextEncodeQwenImageEditPlus", "pos": [301.0466082538065, -14.314562996972978], "size": [426.6276041666667, 215.55989583333334], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 11}, {"localized_name": "vae", "name": "vae", "shape": 7, "type": "VAE", "link": 12}, {"localized_name": "image1", "name": "image1", "shape": 7, "type": "IMAGE", "link": 13}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 26}, {"localized_name": "image3", "name": "image3", "shape": 7, "type": "IMAGE", "link": 27}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 23}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [3]}], "title": "TextEncodeQwenImageEditPlus (Positive)", "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "TextEncodeQwenImageEditPlus", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 14, "type": "VAEEncode", "pos": [511.0465866120977, 645.6854435038923], "size": [187.5, 46], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 14}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 15}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [19]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 15, "type": "KSampler", "pos": [1101.0466119185025, -54.3145423152618], "size": [280, 510], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 16}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 17}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 18}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 19}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 40, 4, "euler", "simple", 1]}, {"id": 12, "type": "VAEDecode", "pos": [1431.0464586818402, -44.31456487314459], "size": [187.5, 46], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 9}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 10}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [24]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 16, "type": "FluxKontextImageScale", "pos": [-170, 630], "size": [194.9458984375, 26], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 25}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [7, 13, 14, 28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextImageScale", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 1, "type": "CLIPLoader", "pos": [-170, 200], "size": [396.1328125, 106], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 31}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [5, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}], "groups": [{"id": 1, "title": "Models", "bounding": [-180, -90, 416.1419982910156, 630.0299011230469], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [250, -90, 510, 630], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 2, "origin_id": 9, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "CONDITIONING"}, {"id": 3, "origin_id": 13, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CONDITIONING"}, {"id": 4, "origin_id": 2, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 1, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "CLIP"}, {"id": 6, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "VAE"}, {"id": 9, "origin_id": 15, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "LATENT"}, {"id": 10, "origin_id": 3, "origin_slot": 0, "target_id": 12, "target_slot": 1, "type": "VAE"}, {"id": 11, "origin_id": 1, "origin_slot": 0, "target_id": 13, "target_slot": 0, "type": "CLIP"}, {"id": 12, "origin_id": 3, "origin_slot": 0, "target_id": 13, "target_slot": 1, "type": "VAE"}, {"id": 13, "origin_id": 16, "origin_slot": 0, "target_id": 13, "target_slot": 2, "type": "IMAGE"}, {"id": 14, "origin_id": 16, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 15, "origin_id": 3, "origin_slot": 0, "target_id": 14, "target_slot": 1, "type": "VAE"}, {"id": 16, "origin_id": 7, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "MODEL"}, {"id": 17, "origin_id": 6, "origin_slot": 0, "target_id": 15, "target_slot": 1, "type": "CONDITIONING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "CONDITIONING"}, {"id": 19, "origin_id": 14, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "LATENT"}, {"id": 21, "origin_id": -10, "origin_slot": 1, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 22, "origin_id": -10, "origin_slot": 2, "target_id": 9, "target_slot": 4, "type": "IMAGE"}, {"id": 23, "origin_id": -10, "origin_slot": 3, "target_id": 13, "target_slot": 5, "type": "STRING"}, {"id": 24, "origin_id": 12, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 25, "origin_id": -10, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": -10, "origin_slot": 1, "target_id": 13, "target_slot": 3, "type": "IMAGE"}, {"id": 27, "origin_id": -10, "origin_slot": 2, "target_id": 13, "target_slot": 4, "type": "IMAGE"}, {"id": 28, "origin_id": 16, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 4, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "MODEL"}, {"id": 30, "origin_id": -10, "origin_slot": 4, "target_id": 4, "target_slot": 0, "type": "COMBO"}, {"id": 31, "origin_id": -10, "origin_slot": 5, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 32, "origin_id": -10, "origin_slot": 6, "target_id": 3, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Edit image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.8597138248970195, "offset": [716.4750075519744, 479.19752576099086]}}, "version": 0.4} +{ + "id": "d84b7d1a-a73f-4e31-bd16-983ac0cf5f1b", + "revision": 0, + "last_node_id": 17, + "last_link_id": 32, + "nodes": [ + { + "id": 17, + "type": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", + "pos": [ + 183.33334355513557, + -120.00000702649223 + ], + "size": [ + 383.0729166666667, + 381.10677083333337 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image 1", + "name": "image1", + "type": "IMAGE", + "link": null + }, + { + "label": "image 2 (optional)", + "name": "image2", + "type": "IMAGE", + "link": null + }, + { + "label": "image 3 (optional)", + "name": "image3", + "type": "IMAGE", + "link": null + }, + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "prompt" + ], + [ + "15", + "seed" + ], + [ + "15", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + null, + null, + "qwen_image_edit_2511_bf16.safetensors", + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image_vae.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", + "version": 1, + "state": { + "lastGroupId": 2, + "lastNodeId": 17, + "lastLinkId": 32, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Edit (Qwen 2511)", + "inputNode": { + "id": -10, + "bounding": [ + -412.6162343565087, + 327.2321295314722, + 142.59765625, + 180 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1631.0466138212807, + 305.6854343585077, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "6e401a3f-21a6-4552-8ee4-179c313c1910", + "name": "image1", + "type": "IMAGE", + "linkIds": [ + 25 + ], + "label": "image 1", + "pos": [ + -290.0185781065087, + 347.2321295314722 + ] + }, + { + "id": "a0a6307b-62b8-481e-bb17-d332eceadbe4", + "name": "image2", + "type": "IMAGE", + "linkIds": [ + 21, + 26 + ], + "label": "image 2 (optional)", + "pos": [ + -290.0185781065087, + 367.2321295314722 + ] + }, + { + "id": "232fe944-fc3f-43dd-bb34-112d0360cb5f", + "name": "image3", + "type": "IMAGE", + "linkIds": [ + 22, + 27 + ], + "label": "image 3 (optional)", + "pos": [ + -290.0185781065087, + 387.2321295314722 + ] + }, + { + "id": "9b8ed2f4-5875-4f59-b4c1-5ab79a412f4e", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 23 + ], + "pos": [ + -290.0185781065087, + 407.2321295314722 + ] + }, + { + "id": "403a6bd0-f170-4cfb-b72e-cd7fa1dbcd06", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 30 + ], + "pos": [ + -290.0185781065087, + 427.2321295314722 + ] + }, + { + "id": "86a53531-2fab-47da-9525-858c80737044", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 31 + ], + "pos": [ + -290.0185781065087, + 447.2321295314722 + ] + }, + { + "id": "499f39e9-d698-41dc-b126-b7ea6024cf5d", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 32 + ], + "pos": [ + -290.0185781065087, + 467.2321295314722 + ] + } + ], + "outputs": [ + { + "id": "f2ccd1fa-428e-4127-89a6-760906013172", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 24 + ], + "pos": [ + 1651.0466138212807, + 325.6854343585077 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 2, + "type": "ModelSamplingAuraFlow", + "pos": [ + 791.0465113899395, + -54.3145423152618 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 29 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 4 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3.1 + ] + }, + { + "id": 3, + "type": "VAELoader", + "pos": [ + -174.9530552190643, + 462.6706561999898 + ], + "size": [ + 396.1328125, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 32 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 6, + 10, + 12, + 15 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 4, + "type": "UNETLoader", + "pos": [ + -174.9530552190643, + -23.329297689188216 + ], + "size": [ + 396.1328125, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 30 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 29 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "qwen_image_edit_2511_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_edit_2511_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_edit_2511_bf16.safetensors", + "default" + ] + }, + { + "id": 5, + "type": "FluxKontextMultiReferenceLatentMethod", + "pos": [ + 781.0466382725523, + 315.68545764091465 + ], + "size": [ + 309.66145833333337, + 58 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 2 + }, + { + "localized_name": "reference_latents_method", + "name": "reference_latents_method", + "type": "COMBO", + "widget": { + "name": "reference_latents_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 18 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "index_timestep_zero" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 6, + "type": "FluxKontextMultiReferenceLatentMethod", + "pos": [ + 781.0466382725523, + 185.68543791920104 + ], + "size": [ + 309.66145833333337, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 3 + }, + { + "localized_name": "reference_latents_method", + "name": "reference_latents_method", + "type": "COMBO", + "widget": { + "name": "reference_latents_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 17 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "index_timestep_zero" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 7, + "type": "CFGNorm", + "pos": [ + 791.0465113899395, + 55.68545297239743 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 4 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "patched_model", + "name": "patched_model", + "type": "MODEL", + "links": [ + 16 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CFGNorm", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 8, + "type": "MarkdownNote", + "pos": [ + 1111.0466241355298, + 555.6854726502876 + ], + "size": [ + 270, + 195.10416666666669 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "KSampler settings", + "properties": {}, + "widgets_values": [ + "You can test and find the best setting by yourself. The following table is for reference.\n| | Qwen | Comfy | lightning LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 40 | 20 | 4 |\n| CFG | 4.0 | 4.0 | 1.0 |\n\nBy default, we use 20 steps as we just don't want it to take too long. Try 40 if you want a better result, but it will take longer." + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 9, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + 301.0466082538065, + 305.6854454238875 + ], + "size": [ + 420, + 170 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 5 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 6 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 28 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 21 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 22 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 2 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 10, + "type": "Note", + "pos": [ + 801.0465236069665, + 435.6854651456011 + ], + "size": [ + 280, + 88 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The \"Edit Model Reference Method\" nodes above are not needed if you use Comfy files, but may be needed if you use repackaged ones from other people." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 13, + "type": "TextEncodeQwenImageEditPlus", + "pos": [ + 301.0466082538065, + -14.314562996972978 + ], + "size": [ + 426.6276041666667, + 215.55989583333334 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 11 + }, + { + "localized_name": "vae", + "name": "vae", + "shape": 7, + "type": "VAE", + "link": 12 + }, + { + "localized_name": "image1", + "name": "image1", + "shape": 7, + "type": "IMAGE", + "link": 13 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 26 + }, + { + "localized_name": "image3", + "name": "image3", + "shape": 7, + "type": "IMAGE", + "link": 27 + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 23 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 3 + ] + } + ], + "title": "TextEncodeQwenImageEditPlus (Positive)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "TextEncodeQwenImageEditPlus", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 14, + "type": "VAEEncode", + "pos": [ + 511.0465866120977, + 645.6854435038923 + ], + "size": [ + 187.5, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 14 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 15 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 19 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "VAEEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 15, + "type": "KSampler", + "pos": [ + 1101.0466119185025, + -54.3145423152618 + ], + "size": [ + 280, + 510 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 16 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 17 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 18 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 19 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 40, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 12, + "type": "VAEDecode", + "pos": [ + 1431.0464586818402, + -44.31456487314459 + ], + "size": [ + 187.5, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 9 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 10 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 24 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 16, + "type": "FluxKontextImageScale", + "pos": [ + -170, + 630 + ], + "size": [ + 194.9458984375, + 26 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 25 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 7, + 13, + 14, + 28 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "FluxKontextImageScale", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 1, + "type": "CLIPLoader", + "pos": [ + -170, + 200 + ], + "size": [ + 396.1328125, + 106 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 31 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 5, + 11 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Models", + "bounding": [ + -180, + -90, + 416.1419982910156, + 630.0299011230469 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Prompt", + "bounding": [ + 250, + -90, + 510, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 2, + "origin_id": 9, + "origin_slot": 0, + "target_id": 5, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 3, + "origin_id": 13, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 4, + "origin_id": 2, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 5, + "origin_id": 1, + "origin_slot": 0, + "target_id": 9, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 6, + "origin_id": 3, + "origin_slot": 0, + "target_id": 9, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 9, + "origin_id": 15, + "origin_slot": 0, + "target_id": 12, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 10, + "origin_id": 3, + "origin_slot": 0, + "target_id": 12, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 11, + "origin_id": 1, + "origin_slot": 0, + "target_id": 13, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 12, + "origin_id": 3, + "origin_slot": 0, + "target_id": 13, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 13, + "origin_id": 16, + "origin_slot": 0, + "target_id": 13, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 14, + "origin_id": 16, + "origin_slot": 0, + "target_id": 14, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 15, + "origin_id": 3, + "origin_slot": 0, + "target_id": 14, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 16, + "origin_id": 7, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 17, + "origin_id": 6, + "origin_slot": 0, + "target_id": 15, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 18, + "origin_id": 5, + "origin_slot": 0, + "target_id": 15, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 19, + "origin_id": 14, + "origin_slot": 0, + "target_id": 15, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 21, + "origin_id": -10, + "origin_slot": 1, + "target_id": 9, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 22, + "origin_id": -10, + "origin_slot": 2, + "target_id": 9, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 23, + "origin_id": -10, + "origin_slot": 3, + "target_id": 13, + "target_slot": 5, + "type": "STRING" + }, + { + "id": 24, + "origin_id": 12, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 25, + "origin_id": -10, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 26, + "origin_id": -10, + "origin_slot": 1, + "target_id": 13, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 27, + "origin_id": -10, + "origin_slot": 2, + "target_id": 13, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 28, + "origin_id": 16, + "origin_slot": 0, + "target_id": 9, + "target_slot": 2, + "type": "IMAGE" + }, + { + "id": 29, + "origin_id": 4, + "origin_slot": 0, + "target_id": 2, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 30, + "origin_id": -10, + "origin_slot": 4, + "target_id": 4, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 31, + "origin_id": -10, + "origin_slot": 5, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 32, + "origin_id": -10, + "origin_slot": 6, + "target_id": 3, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "frontendVersion": "1.37.11", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "category": "Image generation and editing/Edit image", + "description": "Edits images via text instructions using Qwen-Image-Edit-2511 with improved character consistency and integrated LoRA." + } + ] + }, + "config": {}, + "extra": { + "frontendVersion": "1.37.11", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true, + "ds": { + "scale": 0.8597138248970195, + "offset": [ + 716.4750075519744, + 479.19752576099086 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Image Inpainting (Flux.1 Fill Dev).json b/blueprints/Image Inpainting (Flux.1 Fill Dev).json new file mode 100644 index 000000000..c1326ed3d --- /dev/null +++ b/blueprints/Image Inpainting (Flux.1 Fill Dev).json @@ -0,0 +1,1206 @@ +{ + "revision": 0, + "last_node_id": 232, + "last_link_id": 0, + "nodes": [ + { + "id": 232, + "type": "6e8d6e38-bdc3-436c-be85-ef9e67e70e07", + "pos": [ + 1270, + 4640 + ], + "size": [ + 400, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": null + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": null + }, + { + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "23", + "text" + ], + [ + "3", + "seed" + ], + [ + "31", + "unet_name" + ], + [ + "34", + "clip_name1" + ], + [ + "34", + "clip_name2" + ], + [ + "230", + "vae_name" + ] + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "Image Inpainting (Flux.1 Fill Dev)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "6e8d6e38-bdc3-436c-be85-ef9e67e70e07", + "version": 1, + "state": { + "lastGroupId": 22, + "lastNodeId": 232, + "lastLinkId": 286, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Inpainting (Flux.1 Fill Dev)", + "inputNode": { + "id": -10, + "bounding": [ + -850, + 164, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1230, + 140, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "65727ee9-09d0-40c9-bd86-11e0823eb676", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 99 + ], + "localized_name": "pixels", + "label": "image", + "pos": [ + -750, + 184 + ] + }, + { + "id": "28424f77-56c5-49c1-ba41-6bd78287c186", + "name": "mask", + "type": "MASK", + "linkIds": [ + 100 + ], + "localized_name": "mask", + "pos": [ + -750, + 204 + ] + }, + { + "id": "2339e5e0-8f8d-4600-b158-7d7dae5f0535", + "name": "text", + "type": "STRING", + "linkIds": [ + 277 + ], + "label": "prompt", + "pos": [ + -750, + 224 + ] + }, + { + "id": "5f433d9b-b97e-4bac-bb88-eb668de2d5a7", + "name": "seed", + "type": "INT", + "linkIds": [ + 282 + ], + "pos": [ + -750, + 244 + ] + }, + { + "id": "35a8b6c1-c92c-4c1a-9b24-2e9bae7808f6", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 283 + ], + "pos": [ + -750, + 264 + ] + }, + { + "id": "3af8f8be-bce8-4ba0-aea0-ccf6b377d5f6", + "name": "clip_name1", + "type": "COMBO", + "linkIds": [ + 284 + ], + "pos": [ + -750, + 284 + ] + }, + { + "id": "d9a4af80-4fa1-4792-b955-78bdaef4596e", + "name": "clip_name2", + "type": "COMBO", + "linkIds": [ + 285 + ], + "pos": [ + -750, + 304 + ] + }, + { + "id": "d59398cf-7e9c-4dae-8c5a-08c4756f256a", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 286 + ], + "pos": [ + -750, + 324 + ] + } + ], + "outputs": [ + { + "id": "1dee24ec-54a8-41be-aa30-a8fb797d3d23", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 95 + ], + "localized_name": "IMAGE", + "pos": [ + 1250, + 160 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 34, + "type": "DualCLIPLoader", + "pos": [ + -590, + 150 + ], + "size": [ + 320, + 180 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name1", + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": 284 + }, + { + "localized_name": "clip_name2", + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": 285 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 62 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "DualCLIPLoader", + "models": [ + { + "name": "clip_l.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors", + "directory": "text_encoders" + }, + { + "name": "t5xxl_fp16.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "clip_l.safetensors", + "t5xxl_fp16.safetensors", + "flux", + "default" + ] + }, + { + "id": 229, + "type": "FluxGuidance", + "pos": [ + 410, + -40 + ], + "size": [ + 320, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 41 + }, + { + "localized_name": "guidance", + "name": "guidance", + "type": "FLOAT", + "widget": { + "name": "guidance" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 80 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 30 + ] + }, + { + "id": 230, + "type": "VAELoader", + "pos": [ + -590, + 450 + ], + "size": [ + 320, + 110 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 286 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 60, + 82 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 31, + "type": "UNETLoader", + "pos": [ + -590, + -90 + ], + "size": [ + 320, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 283 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 85 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "flux1-fill-dev.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux1-dev/resolve/main/split_files/diffusion_models/flux1-fill-dev.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "flux1-fill-dev.safetensors", + "default" + ] + }, + { + "id": 46, + "type": "ConditioningZeroOut", + "pos": [ + 90, + 420 + ], + "size": [ + 230, + 80 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 101 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 102 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "ConditioningZeroOut" + } + }, + { + "id": 23, + "type": "CLIPTextEncode", + "pos": [ + -160, + -70 + ], + "size": [ + 480, + 410 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 62 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 277 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 41, + 101 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 39, + "type": "DifferentialDiffusion", + "pos": [ + 780, + -110 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 85 + }, + { + "localized_name": "strength", + "name": "strength", + "shape": 7, + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 86 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "DifferentialDiffusion" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 231, + "type": "VAEDecode", + "pos": [ + 780, + 590 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 7 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 60 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 38, + "type": "InpaintModelConditioning", + "pos": [ + 420, + 120 + ], + "size": [ + 310, + 200 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 80 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 102 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 82 + }, + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 99 + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 100 + }, + { + "localized_name": "noise_mask", + "name": "noise_mask", + "type": "BOOLEAN", + "widget": { + "name": "noise_mask" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 77 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 78 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 88 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "InpaintModelConditioning" + }, + "widgets_values": [ + true + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 770, + 40 + ], + "size": [ + 290, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 86 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 77 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 78 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 88 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 282 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 0, + "randomize", + 20, + 1, + "euler", + "normal", + 1 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Load models", + "bounding": [ + -620, + -160, + 410, + 790 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -180, + -160, + 520, + 670 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 41, + "origin_id": 23, + "origin_slot": 0, + "target_id": 229, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 101, + "origin_id": 23, + "origin_slot": 0, + "target_id": 46, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 62, + "origin_id": 34, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 85, + "origin_id": 31, + "origin_slot": 0, + "target_id": 39, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 86, + "origin_id": 39, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 77, + "origin_id": 38, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 78, + "origin_id": 38, + "origin_slot": 1, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 88, + "origin_id": 38, + "origin_slot": 2, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 7, + "origin_id": 3, + "origin_slot": 0, + "target_id": 231, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 60, + "origin_id": 230, + "origin_slot": 0, + "target_id": 231, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 80, + "origin_id": 229, + "origin_slot": 0, + "target_id": 38, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 102, + "origin_id": 46, + "origin_slot": 0, + "target_id": 38, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 82, + "origin_id": 230, + "origin_slot": 0, + "target_id": 38, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 99, + "origin_id": -10, + "origin_slot": 0, + "target_id": 38, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 100, + "origin_id": -10, + "origin_slot": 1, + "target_id": 38, + "target_slot": 4, + "type": "MASK" + }, + { + "id": 95, + "origin_id": 231, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 277, + "origin_id": -10, + "origin_slot": 2, + "target_id": 23, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 282, + "origin_id": -10, + "origin_slot": 3, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 283, + "origin_id": -10, + "origin_slot": 4, + "target_id": 31, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 284, + "origin_id": -10, + "origin_slot": 5, + "target_id": 34, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 285, + "origin_id": -10, + "origin_slot": 6, + "target_id": 34, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 286, + "origin_id": -10, + "origin_slot": 7, + "target_id": 230, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Inpaint image", + "description": "Inpaints masked image regions using Flux.1 fill [dev], Black Forest Labs' inpainting/outpainting model." + } + ] + }, + "extra": { + "ds": { + "scale": 0.8480949417360862, + "offset": [ + 833.9510730024642, + 210.32152847588895 + ] + }, + "ue_links": [] + } +} diff --git a/blueprints/Image Inpainting (Qwen-image).json b/blueprints/Image Inpainting (Qwen-image).json index 5f8ef81f9..a06d57e19 100644 --- a/blueprints/Image Inpainting (Qwen-image).json +++ b/blueprints/Image Inpainting (Qwen-image).json @@ -1 +1,1928 @@ -{"id": "84318cde-a839-41d4-8632-df6d7c50ffc5", "revision": 0, "last_node_id": 256, "last_link_id": 403, "nodes": [{"id": 256, "type": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", "pos": [2271.698367680439, -460.52399024524993], "size": [420, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": null}, {"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "control_net_name"], ["3", "seed"], ["3", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors", "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 256, "lastLinkId": 403, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Inpainting (Qwen-image)", "inputNode": {"id": -10, "bounding": [-860, 530, 140.587890625, 160]}, "outputNode": {"id": -20, "bounding": [1290, 530, 120, 60]}, "inputs": [{"id": "61dc027a-a7fc-4c40-8aa4-fd4a6e36d00f", "name": "image", "type": "IMAGE", "linkIds": [399], "localized_name": "image", "pos": [-739.412109375, 550]}, {"id": "28f4cf42-1c6d-49b8-abce-53ef9c628907", "name": "mask", "type": "MASK", "linkIds": [205], "localized_name": "mask", "pos": [-739.412109375, 570]}, {"id": "f082f9ab-9a31-4d99-b4fd-4900453a30a8", "name": "text", "type": "STRING", "linkIds": [394], "pos": [-739.412109375, 590]}, {"id": "9e692477-812a-4054-b780-471228a9821c", "name": "clip_name", "type": "COMBO", "linkIds": [401], "pos": [-739.412109375, 610]}, {"id": "dfbf7eac-1f92-4636-9ead-6a1c2595c5e2", "name": "vae_name", "type": "COMBO", "linkIds": [402], "pos": [-739.412109375, 630]}, {"id": "cfaf4549-e61b-4a88-a514-24894142433a", "name": "control_net_name", "type": "COMBO", "linkIds": [403], "pos": [-739.412109375, 650]}], "outputs": [{"id": "45b4d67e-3d8f-4936-9599-607a23161a3c", "name": "IMAGE", "type": "IMAGE", "linkIds": [400], "pos": [1310, 550]}], "widgets": [], "nodes": [{"id": 38, "type": "CLIPLoader", "pos": [-90, 70], "size": [380, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 401}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 37, "type": "UNETLoader", "pos": [-90, -60], "size": [380, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_fp8_e4m3fn.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["qwen_image_fp8_e4m3fn.safetensors", "default"]}, {"id": 7, "type": "CLIPTextEncode", "pos": [330, 320], "size": [460, 140], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [191]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [" "], "color": "#223", "bgcolor": "#335"}, {"id": 84, "type": "ControlNetLoader", "pos": [-90, 340], "size": [380, 58], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "control_net_name", "name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": 403}], "outputs": [{"localized_name": "CONTROL_NET", "name": "CONTROL_NET", "type": "CONTROL_NET", "links": [192]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ControlNetLoader", "models": [{"name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "directory": "controlnet"}]}, "widgets_values": ["Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}, {"id": 39, "type": "VAELoader", "pos": [-90, 230], "size": [380, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 402}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 144, 193]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 66, "type": "ModelSamplingAuraFlow", "pos": [860, -100], "size": [310, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 149}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [156]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [3.1000000000000005]}, {"id": 108, "type": "ControlNetInpaintingAliMamaApply", "pos": [430, 560], "size": [317.0093688964844, 206], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 190}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 191}, {"localized_name": "control_net", "name": "control_net", "type": "CONTROL_NET", "link": 192}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 193}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 397}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 220}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"localized_name": "start_percent", "name": "start_percent", "type": "FLOAT", "widget": {"name": "start_percent"}, "link": null}, {"localized_name": "end_percent", "name": "end_percent", "type": "FLOAT", "widget": {"name": "end_percent"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [188]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [189]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ControlNetInpaintingAliMamaApply"}, "widgets_values": [1, 0, 1]}, {"id": 86, "type": "Note", "pos": [860, 500], "size": [307.4002380371094, 127.38092803955078], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "properties": {}, "widgets_values": ["Set cfg to 1.0 for a speed boost at the cost of consistency. Samplers like res_multistep work pretty well at cfg 1.0\n\nThe official number of steps is 50 but I think that's too much. Even just 10 steps seems to work."], "color": "#432", "bgcolor": "#653"}, {"id": 76, "type": "VAEEncode", "pos": [430, 830], "size": [140, 46], "flags": {"collapsed": true}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 396}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 144}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [208]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 122, "type": "SetLatentNoiseMask", "pos": [430, 890], "size": [230, 50], "flags": {"collapsed": true}, "order": 15, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 208}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 219}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [210]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "SetLatentNoiseMask"}, "widgets_values": []}, {"id": 223, "type": "MarkdownNote", "pos": [860, 670], "size": [300, 160], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [], "title": "Note: KSampler settings", "properties": {}, "widgets_values": ["You can test and find the best setting by yourself. The following table is for reference.\n| Parameters | Qwen Team | Comfy Original | with 4steps LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 50 | 20 | 4 |\n| CFG | 4.0 | 2.5 | 1.0 |"], "color": "#432", "bgcolor": "#653"}, {"id": 80, "type": "LoraLoaderModelOnly", "pos": [350, -70], "size": [430, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 145}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [149]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", "directory": "loras"}]}, "widgets_values": ["Qwen-Image-Lightning-4steps-V1.0.safetensors", 1]}, {"id": 6, "type": "CLIPTextEncode", "pos": [330, 110], "size": [460, 164.31304931640625], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 394}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [190]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 121, "type": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", "pos": [430, 950], "size": [330, 100], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 205}, {"name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": null}, {"name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [215, 219, 220]}], "properties": {"proxyWidgets": [["-1", "expand"], ["-1", "blur_radius"]], "cnr_id": "comfy-core", "ver": "0.3.59"}, "widgets_values": [0, 1]}, {"id": 3, "type": "KSampler", "pos": [860, 20], "size": [310, 430], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 156}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 188}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 189}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 210}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [128]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "KSampler"}, "widgets_values": [0, "randomize", 4, 1, "euler", "simple", 1]}, {"id": 224, "type": "FluxKontextImageScale", "pos": [10, 1090], "size": [194.9458984375, 26], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 399}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [396, 397]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "FluxKontextImageScale"}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [900, 880], "size": [250, 46], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 128}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [110, 400]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 124, "type": "MaskPreview", "pos": [440, 1100], "size": [320, 340], "flags": {}, "order": 16, "mode": 4, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 215}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskPreview"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Step 1 - Upload models", "bounding": [-100, -140, 400, 610], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 4, "title": "Step 3 - Prompt", "bounding": [320, 40, 490, 430], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "4 steps lightning LoRA", "bounding": [320, -140, 490, 160], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 14, "title": "Inpainting", "bounding": [-110, -180, 1340, 1650], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 149, "origin_id": 80, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "MODEL"}, {"id": 190, "origin_id": 6, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "CONDITIONING"}, {"id": 191, "origin_id": 7, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 192, "origin_id": 84, "origin_slot": 0, "target_id": 108, "target_slot": 2, "type": "CONTROL_NET"}, {"id": 193, "origin_id": 39, "origin_slot": 0, "target_id": 108, "target_slot": 3, "type": "VAE"}, {"id": 220, "origin_id": 121, "origin_slot": 0, "target_id": 108, "target_slot": 5, "type": "MASK"}, {"id": 144, "origin_id": 39, "origin_slot": 0, "target_id": 76, "target_slot": 1, "type": "VAE"}, {"id": 208, "origin_id": 76, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 219, "origin_id": 121, "origin_slot": 0, "target_id": 122, "target_slot": 1, "type": "MASK"}, {"id": 215, "origin_id": 121, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "MASK"}, {"id": 128, "origin_id": 3, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 145, "origin_id": 37, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "MODEL"}, {"id": 156, "origin_id": 66, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 188, "origin_id": 108, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 189, "origin_id": 108, "origin_slot": 1, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 210, "origin_id": 122, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 205, "origin_id": -10, "origin_slot": 1, "target_id": 121, "target_slot": 0, "type": "MASK"}, {"id": 110, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 394, "origin_id": -10, "origin_slot": 2, "target_id": 6, "target_slot": 1, "type": "STRING"}, {"id": 396, "origin_id": 224, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "IMAGE"}, {"id": 397, "origin_id": 224, "origin_slot": 0, "target_id": 108, "target_slot": 4, "type": "IMAGE"}, {"id": 399, "origin_id": -10, "origin_slot": 0, "target_id": 224, "target_slot": 0, "type": "IMAGE"}, {"id": 400, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 401, "origin_id": -10, "origin_slot": 3, "target_id": 38, "target_slot": 0, "type": "COMBO"}, {"id": 402, "origin_id": -10, "origin_slot": 4, "target_id": 39, "target_slot": 0, "type": "COMBO"}, {"id": 403, "origin_id": -10, "origin_slot": 5, "target_id": 84, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Inpaint image"}, {"id": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 256, "lastLinkId": 403, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Grow and Blur Mask", "inputNode": {"id": -10, "bounding": [290, 3536, 120, 100]}, "outputNode": {"id": -20, "bounding": [1130, 3536, 120, 60]}, "inputs": [{"id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", "name": "mask", "type": "MASK", "linkIds": [279], "localized_name": "mask", "pos": [390, 3556]}, {"id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", "name": "expand", "type": "INT", "linkIds": [379], "pos": [390, 3576]}, {"id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", "name": "blur_radius", "type": "INT", "linkIds": [380], "pos": [390, 3596]}], "outputs": [{"id": "1f97f683-13d3-4871-876d-678fca850d89", "name": "MASK", "type": "MASK", "linkIds": [378], "localized_name": "MASK", "pos": [1150, 3556]}], "widgets": [], "nodes": [{"id": 253, "type": "ImageToMask", "pos": [800, 3630], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 377}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [378]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 251, "type": "MaskToImage", "pos": [780, 3470], "size": [260, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 372}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [373]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 199, "type": "GrowMask", "pos": [470, 3460], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 279}, {"localized_name": "expand", "name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": 379}, {"localized_name": "tapered_corners", "name": "tapered_corners", "type": "BOOLEAN", "widget": {"name": "tapered_corners"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [372]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "GrowMask"}, "widgets_values": [0, true]}, {"id": 252, "type": "ImageBlur", "pos": [480, 3620], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 373}, {"localized_name": "blur_radius", "name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": 380}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [377]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageBlur"}, "widgets_values": [1, 1]}], "groups": [], "links": [{"id": 373, "origin_id": 251, "origin_slot": 0, "target_id": 252, "target_slot": 0, "type": "IMAGE"}, {"id": 377, "origin_id": 252, "origin_slot": 0, "target_id": 253, "target_slot": 0, "type": "IMAGE"}, {"id": 372, "origin_id": 199, "origin_slot": 0, "target_id": 251, "target_slot": 0, "type": "MASK"}, {"id": 279, "origin_id": -10, "origin_slot": 0, "target_id": 199, "target_slot": 0, "type": "MASK"}, {"id": 378, "origin_id": 253, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 379, "origin_id": -10, "origin_slot": 1, "target_id": 199, "target_slot": 1, "type": "INT"}, {"id": 380, "origin_id": -10, "origin_slot": 2, "target_id": 252, "target_slot": 1, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 1.088930769230769, "offset": [-1576.5829757292656, 657.608356702113]}, "workflowRendererVersion": "LG"}, "version": 0.4} +{ + "id": "84318cde-a839-41d4-8632-df6d7c50ffc5", + "revision": 0, + "last_node_id": 256, + "last_link_id": 403, + "nodes": [ + { + "id": 256, + "type": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", + "pos": [ + 2271.698367680439, + -460.52399024524993 + ], + "size": [ + 420, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "control_net_name", + "type": "COMBO", + "widget": { + "name": "control_net_name" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "control_net_name" + ], + [ + "3", + "seed" + ], + [ + "3", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + "", + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image_vae.safetensors", + "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 256, + "lastLinkId": 403, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Inpainting (Qwen-image)", + "inputNode": { + "id": -10, + "bounding": [ + -860, + 530, + 140.587890625, + 160 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1290, + 530, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "61dc027a-a7fc-4c40-8aa4-fd4a6e36d00f", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 399 + ], + "localized_name": "image", + "pos": [ + -739.412109375, + 550 + ] + }, + { + "id": "28f4cf42-1c6d-49b8-abce-53ef9c628907", + "name": "mask", + "type": "MASK", + "linkIds": [ + 205 + ], + "localized_name": "mask", + "pos": [ + -739.412109375, + 570 + ] + }, + { + "id": "f082f9ab-9a31-4d99-b4fd-4900453a30a8", + "name": "text", + "type": "STRING", + "linkIds": [ + 394 + ], + "pos": [ + -739.412109375, + 590 + ] + }, + { + "id": "9e692477-812a-4054-b780-471228a9821c", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 401 + ], + "pos": [ + -739.412109375, + 610 + ] + }, + { + "id": "dfbf7eac-1f92-4636-9ead-6a1c2595c5e2", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 402 + ], + "pos": [ + -739.412109375, + 630 + ] + }, + { + "id": "cfaf4549-e61b-4a88-a514-24894142433a", + "name": "control_net_name", + "type": "COMBO", + "linkIds": [ + 403 + ], + "pos": [ + -739.412109375, + 650 + ] + } + ], + "outputs": [ + { + "id": "45b4d67e-3d8f-4936-9599-607a23161a3c", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 400 + ], + "pos": [ + 1310, + 550 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -90, + 70 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 401 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -90, + -60 + ], + "size": [ + 380, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 145 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "qwen_image_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "qwen_image_fp8_e4m3fn.safetensors", + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 330, + 320 + ], + "size": [ + 460, + 140 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 75 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 191 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + " " + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 84, + "type": "ControlNetLoader", + "pos": [ + -90, + 340 + ], + "size": [ + 380, + 58 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "control_net_name", + "name": "control_net_name", + "type": "COMBO", + "widget": { + "name": "control_net_name" + }, + "link": 403 + } + ], + "outputs": [ + { + "localized_name": "CONTROL_NET", + "name": "CONTROL_NET", + "type": "CONTROL_NET", + "links": [ + 192 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ControlNetLoader", + "models": [ + { + "name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", + "directory": "controlnet" + } + ] + }, + "widgets_values": [ + "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + -90, + 230 + ], + "size": [ + 380, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 402 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 144, + 193 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 66, + "type": "ModelSamplingAuraFlow", + "pos": [ + 860, + -100 + ], + "size": [ + 310, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 149 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 156 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ModelSamplingAuraFlow" + }, + "widgets_values": [ + 3.1000000000000005 + ] + }, + { + "id": 108, + "type": "ControlNetInpaintingAliMamaApply", + "pos": [ + 430, + 560 + ], + "size": [ + 317.0093688964844, + 206 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 190 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 191 + }, + { + "localized_name": "control_net", + "name": "control_net", + "type": "CONTROL_NET", + "link": 192 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 193 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 397 + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 220 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "start_percent", + "name": "start_percent", + "type": "FLOAT", + "widget": { + "name": "start_percent" + }, + "link": null + }, + { + "localized_name": "end_percent", + "name": "end_percent", + "type": "FLOAT", + "widget": { + "name": "end_percent" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 188 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 189 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ControlNetInpaintingAliMamaApply" + }, + "widgets_values": [ + 1, + 0, + 1 + ] + }, + { + "id": 86, + "type": "Note", + "pos": [ + 860, + 500 + ], + "size": [ + 307.4002380371094, + 127.38092803955078 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Set cfg to 1.0 for a speed boost at the cost of consistency. Samplers like res_multistep work pretty well at cfg 1.0\n\nThe official number of steps is 50 but I think that's too much. Even just 10 steps seems to work." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 76, + "type": "VAEEncode", + "pos": [ + 430, + 830 + ], + "size": [ + 140, + 46 + ], + "flags": { + "collapsed": true + }, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 396 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 144 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 208 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [] + }, + { + "id": 122, + "type": "SetLatentNoiseMask", + "pos": [ + 430, + 890 + ], + "size": [ + 230, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 208 + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 219 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 210 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "SetLatentNoiseMask" + }, + "widgets_values": [] + }, + { + "id": 223, + "type": "MarkdownNote", + "pos": [ + 860, + 670 + ], + "size": [ + 300, + 160 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Note: KSampler settings", + "properties": {}, + "widgets_values": [ + "You can test and find the best setting by yourself. The following table is for reference.\n| Parameters | Qwen Team | Comfy Original | with 4steps LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 50 | 20 | 4 |\n| CFG | 4.0 | 2.5 | 1.0 |" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 80, + "type": "LoraLoaderModelOnly", + "pos": [ + 350, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 145 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 149 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", + "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Qwen-Image-Lightning-4steps-V1.0.safetensors", + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 330, + 110 + ], + "size": [ + 460, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 74 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 394 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 190 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 121, + "type": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", + "pos": [ + 430, + 950 + ], + "size": [ + 330, + 100 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 205 + }, + { + "name": "expand", + "type": "INT", + "widget": { + "name": "expand" + }, + "link": null + }, + { + "name": "blur_radius", + "type": "INT", + "widget": { + "name": "blur_radius" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 215, + 219, + 220 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "expand" + ], + [ + "-1", + "blur_radius" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.59" + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 860, + 20 + ], + "size": [ + 310, + 430 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 156 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 188 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 189 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 210 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 128 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 0, + "randomize", + 4, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 224, + "type": "FluxKontextImageScale", + "pos": [ + 10, + 1090 + ], + "size": [ + 194.9458984375, + 26 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 399 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 396, + 397 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "FluxKontextImageScale" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 900, + 880 + ], + "size": [ + 250, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 128 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 110, + 400 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 124, + "type": "MaskPreview", + "pos": [ + 440, + 1100 + ], + "size": [ + 320, + 340 + ], + "flags": {}, + "order": 16, + "mode": 4, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 215 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "MaskPreview" + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 1, + "title": "Step 1 - Upload models", + "bounding": [ + -100, + -140, + 400, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Step 3 - Prompt", + "bounding": [ + 320, + 40, + 490, + 430 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "4 steps lightning LoRA", + "bounding": [ + 320, + -140, + 490, + 160 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 14, + "title": "Inpainting", + "bounding": [ + -110, + -180, + 1340, + 1650 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 75, + "origin_id": 38, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 149, + "origin_id": 80, + "origin_slot": 0, + "target_id": 66, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 190, + "origin_id": 6, + "origin_slot": 0, + "target_id": 108, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 191, + "origin_id": 7, + "origin_slot": 0, + "target_id": 108, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 192, + "origin_id": 84, + "origin_slot": 0, + "target_id": 108, + "target_slot": 2, + "type": "CONTROL_NET" + }, + { + "id": 193, + "origin_id": 39, + "origin_slot": 0, + "target_id": 108, + "target_slot": 3, + "type": "VAE" + }, + { + "id": 220, + "origin_id": 121, + "origin_slot": 0, + "target_id": 108, + "target_slot": 5, + "type": "MASK" + }, + { + "id": 144, + "origin_id": 39, + "origin_slot": 0, + "target_id": 76, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 208, + "origin_id": 76, + "origin_slot": 0, + "target_id": 122, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 219, + "origin_id": 121, + "origin_slot": 0, + "target_id": 122, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 215, + "origin_id": 121, + "origin_slot": 0, + "target_id": 124, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 128, + "origin_id": 3, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 76, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 74, + "origin_id": 38, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 145, + "origin_id": 37, + "origin_slot": 0, + "target_id": 80, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 156, + "origin_id": 66, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 188, + "origin_id": 108, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 189, + "origin_id": 108, + "origin_slot": 1, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 210, + "origin_id": 122, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 205, + "origin_id": -10, + "origin_slot": 1, + "target_id": 121, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 110, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 394, + "origin_id": -10, + "origin_slot": 2, + "target_id": 6, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 396, + "origin_id": 224, + "origin_slot": 0, + "target_id": 76, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 397, + "origin_id": 224, + "origin_slot": 0, + "target_id": 108, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 399, + "origin_id": -10, + "origin_slot": 0, + "target_id": 224, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 400, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 401, + "origin_id": -10, + "origin_slot": 3, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 402, + "origin_id": -10, + "origin_slot": 4, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 403, + "origin_id": -10, + "origin_slot": 5, + "target_id": 84, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Inpaint image", + "description": "Inpaints masked regions using Qwen-Image, extending its multilingual text rendering to inpainting tasks." + }, + { + "id": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 256, + "lastLinkId": 403, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Grow and Blur Mask", + "inputNode": { + "id": -10, + "bounding": [ + 290, + 3536, + 120, + 100 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1130, + 3536, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", + "name": "mask", + "type": "MASK", + "linkIds": [ + 279 + ], + "localized_name": "mask", + "pos": [ + 390, + 3556 + ] + }, + { + "id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", + "name": "expand", + "type": "INT", + "linkIds": [ + 379 + ], + "pos": [ + 390, + 3576 + ] + }, + { + "id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", + "name": "blur_radius", + "type": "INT", + "linkIds": [ + 380 + ], + "pos": [ + 390, + 3596 + ] + } + ], + "outputs": [ + { + "id": "1f97f683-13d3-4871-876d-678fca850d89", + "name": "MASK", + "type": "MASK", + "linkIds": [ + 378 + ], + "localized_name": "MASK", + "pos": [ + 1150, + 3556 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 253, + "type": "ImageToMask", + "pos": [ + 800, + 3630 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 377 + }, + { + "localized_name": "channel", + "name": "channel", + "type": "COMBO", + "widget": { + "name": "channel" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 378 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageToMask" + }, + "widgets_values": [ + "red" + ] + }, + { + "id": 251, + "type": "MaskToImage", + "pos": [ + 780, + 3470 + ], + "size": [ + 260, + 70 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 372 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 373 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "MaskToImage" + }, + "widgets_values": [] + }, + { + "id": 199, + "type": "GrowMask", + "pos": [ + 470, + 3460 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 279 + }, + { + "localized_name": "expand", + "name": "expand", + "type": "INT", + "widget": { + "name": "expand" + }, + "link": 379 + }, + { + "localized_name": "tapered_corners", + "name": "tapered_corners", + "type": "BOOLEAN", + "widget": { + "name": "tapered_corners" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 372 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "GrowMask" + }, + "widgets_values": [ + 0, + true + ] + }, + { + "id": 252, + "type": "ImageBlur", + "pos": [ + 480, + 3620 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 373 + }, + { + "localized_name": "blur_radius", + "name": "blur_radius", + "type": "INT", + "widget": { + "name": "blur_radius" + }, + "link": 380 + }, + { + "localized_name": "sigma", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 377 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageBlur" + }, + "widgets_values": [ + 1, + 1 + ] + } + ], + "groups": [], + "links": [ + { + "id": 373, + "origin_id": 251, + "origin_slot": 0, + "target_id": 252, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 377, + "origin_id": 252, + "origin_slot": 0, + "target_id": 253, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 372, + "origin_id": 199, + "origin_slot": 0, + "target_id": 251, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 279, + "origin_id": -10, + "origin_slot": 0, + "target_id": 199, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 378, + "origin_id": 253, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 379, + "origin_id": -10, + "origin_slot": 1, + "target_id": 199, + "target_slot": 1, + "type": "INT" + }, + { + "id": 380, + "origin_id": -10, + "origin_slot": 2, + "target_id": 252, + "target_slot": 1, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Expands and softens mask edges to reduce visible seams after image processing." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 1.088930769230769, + "offset": [ + -1576.5829757292656, + 657.608356702113 + ] + }, + "workflowRendererVersion": "LG" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Image Levels.json b/blueprints/Image Levels.json index f028662bd..1a1b18932 100644 --- a/blueprints/Image Levels.json +++ b/blueprints/Image Levels.json @@ -1 +1,751 @@ -{"revision": 0, "last_node_id": 139, "last_link_id": 0, "nodes": [{"id": 139, "type": "75bf8a72-aad8-4f3e-83ee-380e70248240", "pos": [620, 900], "size": [240, 178], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["5", "choice"], ["3", "value"], ["6", "value"], ["7", "value"], ["8", "value"], ["9", "value"]]}, "widgets_values": [], "title": "Image Levels"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "75bf8a72-aad8-4f3e-83ee-380e70248240", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 144, "lastLinkId": 118, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Levels", "inputNode": {"id": -10, "bounding": [3840, -3430, 120, 60]}, "outputNode": {"id": -20, "bounding": [4950, -3430, 120, 60]}, "inputs": [{"id": "b53e5012-fa47-400f-a324-28c74854ccae", "name": "images.image0", "type": "IMAGE", "linkIds": [1], "localized_name": "images.image0", "label": "image", "pos": [3940, -3410]}], "outputs": [{"id": "de7f2ffa-155f-41fd-b054-aa4d91ef49ca", "name": "IMAGE0", "type": "IMAGE", "linkIds": [8], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4970, -3410]}], "widgets": [], "nodes": [{"id": 5, "type": "CustomCombo", "pos": [4020, -3350], "size": [270, 198], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "channel", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [3]}], "title": "Channel", "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["RGB", 0, "RGB", "R", "G", "B", ""]}, {"id": 8, "type": "PrimitiveFloat", "pos": [4020, -3550], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "output_black", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [6]}], "title": "Output Black", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 3, "type": "PrimitiveFloat", "pos": [4020, -3850], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "input_black", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [2]}], "title": "Input Black", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 255, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 6, "type": "PrimitiveFloat", "pos": [4020, -3750], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "input_white", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [4]}], "title": "Input White", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [255]}, {"id": 7, "type": "PrimitiveFloat", "pos": [4020, -3650], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "gamma", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [5]}], "title": "Gamma", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 10, "min": 0, "step": 0.01, "precision": 2, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [1]}, {"id": 9, "type": "PrimitiveFloat", "pos": [4020, -3450], "size": [270, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "output_white", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [7]}], "title": "Output White", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [255]}, {"id": 1, "type": "GLSLShader", "pos": [4310, -3850], "size": [580, 272], "flags": {}, "order": 6, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 1}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 2}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 4}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 5}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 6}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": 7}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 3}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [8]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\n// Levels Adjustment\n// u_int0: channel (0=RGB, 1=R, 2=G, 3=B) default: 0\n// u_float0: input black (0-255) default: 0\n// u_float1: input white (0-255) default: 255\n// u_float2: gamma (0.01-9.99) default: 1.0\n// u_float3: output black (0-255) default: 0\n// u_float4: output white (0-255) default: 255\n\nuniform sampler2D u_image0;\nuniform int u_int0;\nuniform float u_float0;\nuniform float u_float1;\nuniform float u_float2;\nuniform float u_float3;\nuniform float u_float4;\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nvec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, vec3(1.0 / gamma));\n result = mix(vec3(outBlack), vec3(outWhite), result);\n return result;\n}\n\nfloat applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n float result = clamp((value - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, 1.0 / gamma);\n result = mix(outBlack, outWhite, result);\n return result;\n}\n\nvoid main() {\n vec4 texColor = texture(u_image0, v_texCoord);\n vec3 color = texColor.rgb;\n \n float inBlack = u_float0 / 255.0;\n float inWhite = u_float1 / 255.0;\n float gamma = u_float2;\n float outBlack = u_float3 / 255.0;\n float outWhite = u_float4 / 255.0;\n \n vec3 result;\n \n if (u_int0 == 0) {\n result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 1) {\n result = color;\n result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 2) {\n result = color;\n result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 3) {\n result = color;\n result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else {\n result = color;\n }\n \n fragColor = vec4(result, texColor.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 1, "target_slot": 3, "type": "FLOAT"}, {"id": 5, "origin_id": 7, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "FLOAT"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 1, "target_slot": 5, "type": "FLOAT"}, {"id": 7, "origin_id": 9, "origin_slot": 0, "target_id": 1, "target_slot": 6, "type": "FLOAT"}, {"id": 3, "origin_id": 5, "origin_slot": 1, "target_id": 1, "target_slot": 7, "type": "INT"}, {"id": 1, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 8, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}, "extra": {}} +{ + "revision": 0, + "last_node_id": 139, + "last_link_id": 0, + "nodes": [ + { + "id": 139, + "type": "75bf8a72-aad8-4f3e-83ee-380e70248240", + "pos": [ + 620, + 900 + ], + "size": [ + 240, + 178 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "5", + "choice" + ], + [ + "3", + "value" + ], + [ + "6", + "value" + ], + [ + "7", + "value" + ], + [ + "8", + "value" + ], + [ + "9", + "value" + ] + ] + }, + "widgets_values": [], + "title": "Image Levels" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "75bf8a72-aad8-4f3e-83ee-380e70248240", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 144, + "lastLinkId": 118, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Levels", + "inputNode": { + "id": -10, + "bounding": [ + 3840, + -3430, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4950, + -3430, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "b53e5012-fa47-400f-a324-28c74854ccae", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 1 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 3940, + -3410 + ] + } + ], + "outputs": [ + { + "id": "de7f2ffa-155f-41fd-b054-aa4d91ef49ca", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 8 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 4970, + -3410 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 5, + "type": "CustomCombo", + "pos": [ + 4020, + -3350 + ], + "size": [ + 270, + 198 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "channel", + "localized_name": "choice", + "name": "choice", + "type": "COMBO", + "widget": { + "name": "choice" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": null + }, + { + "localized_name": "INDEX", + "name": "INDEX", + "type": "INT", + "links": [ + 3 + ] + } + ], + "title": "Channel", + "properties": { + "Node name for S&R": "CustomCombo" + }, + "widgets_values": [ + "RGB", + 0, + "RGB", + "R", + "G", + "B", + "" + ] + }, + { + "id": 8, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -3550 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "output_black", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 6 + ] + } + ], + "title": "Output Black", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 255, + "min": 0, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 3, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -3850 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "input_black", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 2 + ] + } + ], + "title": "Input Black", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 255, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 6, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -3750 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "input_white", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 4 + ] + } + ], + "title": "Input White", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 255, + "min": 0, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 255 + ] + }, + { + "id": 7, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -3650 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "gamma", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 5 + ] + } + ], + "title": "Gamma", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 10, + "min": 0, + "step": 0.01, + "precision": 2, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 0.5, + "color": [ + 128, + 128, + 128 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 9, + "type": "PrimitiveFloat", + "pos": [ + 4020, + -3450 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "output_white", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 7 + ] + } + ], + "title": "Output White", + "properties": { + "Node name for S&R": "PrimitiveFloat", + "max": 255, + "min": 0, + "step": 1, + "display": "gradientslider", + "gradient_stops": [ + { + "offset": 0, + "color": [ + 0, + 0, + 0 + ] + }, + { + "offset": 1, + "color": [ + 255, + 255, + 255 + ] + } + ] + }, + "widgets_values": [ + 255 + ] + }, + { + "id": 1, + "type": "GLSLShader", + "pos": [ + 4310, + -3850 + ], + "size": [ + 580, + 272 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 1 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 2 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 4 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 5 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": 6 + }, + { + "label": "u_float4", + "localized_name": "floats.u_float4", + "name": "floats.u_float4", + "shape": 7, + "type": "FLOAT", + "link": 7 + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": 3 + }, + { + "label": "u_int1", + "localized_name": "ints.u_int1", + "name": "ints.u_int1", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 8 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\n// Levels Adjustment\n// u_int0: channel (0=RGB, 1=R, 2=G, 3=B) default: 0\n// u_float0: input black (0-255) default: 0\n// u_float1: input white (0-255) default: 255\n// u_float2: gamma (0.01-9.99) default: 1.0\n// u_float3: output black (0-255) default: 0\n// u_float4: output white (0-255) default: 255\n\nuniform sampler2D u_image0;\nuniform int u_int0;\nuniform float u_float0;\nuniform float u_float1;\nuniform float u_float2;\nuniform float u_float3;\nuniform float u_float4;\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nvec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, vec3(1.0 / gamma));\n result = mix(vec3(outBlack), vec3(outWhite), result);\n return result;\n}\n\nfloat applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n float result = clamp((value - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, 1.0 / gamma);\n result = mix(outBlack, outWhite, result);\n return result;\n}\n\nvoid main() {\n vec4 texColor = texture(u_image0, v_texCoord);\n vec3 color = texColor.rgb;\n \n float inBlack = u_float0 / 255.0;\n float inWhite = u_float1 / 255.0;\n float gamma = u_float2;\n float outBlack = u_float3 / 255.0;\n float outWhite = u_float4 / 255.0;\n \n vec3 result;\n \n if (u_int0 == 0) {\n result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 1) {\n result = color;\n result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 2) {\n result = color;\n result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 3) {\n result = color;\n result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else {\n result = color;\n }\n \n fragColor = vec4(result, texColor.a);\n}", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 2, + "origin_id": 3, + "origin_slot": 0, + "target_id": 1, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 4, + "origin_id": 6, + "origin_slot": 0, + "target_id": 1, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 5, + "origin_id": 7, + "origin_slot": 0, + "target_id": 1, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 6, + "origin_id": 8, + "origin_slot": 0, + "target_id": 1, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 7, + "origin_id": 9, + "origin_slot": 0, + "target_id": 1, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 3, + "origin_id": 5, + "origin_slot": 1, + "target_id": 1, + "target_slot": 7, + "type": "INT" + }, + { + "id": 1, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 8, + "origin_id": 1, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Color adjust", + "description": "Adjusts black point, white point, and gamma for tonal range control via GPU shader." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Image Outpainting (Qwen-Image).json b/blueprints/Image Outpainting (Qwen-Image).json index f36e0bd77..6c07227c0 100644 --- a/blueprints/Image Outpainting (Qwen-Image).json +++ b/blueprints/Image Outpainting (Qwen-Image).json @@ -1 +1,2755 @@ -{"id": "8f79c27f-bec4-412e-9b82-7c5b3b778ecf", "revision": 0, "last_node_id": 255, "last_link_id": 401, "nodes": [{"id": 224, "type": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", "pos": [3200, 740], "size": [400, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "left", "type": "INT", "widget": {"name": "left"}, "link": null}, {"name": "top", "type": "INT", "widget": {"name": "top"}, "link": null}, {"name": "right", "type": "INT", "widget": {"name": "right"}, "link": null}, {"name": "bottom", "type": "INT", "widget": {"name": "bottom"}, "link": null}, {"name": "feathering", "type": "INT", "widget": {"name": "feathering"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["182", "text"], ["-1", "left"], ["-1", "top"], ["-1", "right"], ["-1", "bottom"], ["-1", "feathering"], ["190", "seed"], ["190", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "control_net_name"], ["-1", "lora_name"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": [null, 0, 0, 0, 0, 0, null, null, "qwen_image_fp8_e4m3fn.safetensors", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors", "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "Qwen-Image-Lightning-4steps-V1.0.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Outpainting (Qwen-Image)", "inputNode": {"id": -10, "bounding": [1940, 610, 140.587890625, 260]}, "outputNode": {"id": -20, "bounding": [4240, 765, 120, 60]}, "inputs": [{"id": "466b9998-797f-4c6f-92e9-39120712c1a9", "name": "image", "type": "IMAGE", "linkIds": [351], "localized_name": "image", "pos": [2060.587890625, 630]}, {"id": "c5befee8-d6c4-493e-8ae1-e09d46268d10", "name": "left", "type": "INT", "linkIds": [392], "pos": [2060.587890625, 650]}, {"id": "c0b028a1-fcc0-4a54-9bdf-fa9e76992c40", "name": "top", "type": "INT", "linkIds": [393], "pos": [2060.587890625, 670]}, {"id": "22e43278-694c-410f-9043-f88b8dfdca28", "name": "right", "type": "INT", "linkIds": [394], "pos": [2060.587890625, 690]}, {"id": "f19fec20-a43d-4562-a0f8-bd6955091c1b", "name": "bottom", "type": "INT", "linkIds": [395], "pos": [2060.587890625, 710]}, {"id": "ba832b36-2199-4e1e-a28d-5f2e8acc99a3", "name": "feathering", "type": "INT", "linkIds": [396], "pos": [2060.587890625, 730]}, {"id": "437d6324-2d3c-4c50-ac21-1ea9aab57f4e", "name": "unet_name", "type": "COMBO", "linkIds": [397], "pos": [2060.587890625, 750]}, {"id": "4d58dde7-4402-45d5-ade9-9c41e99e0757", "name": "clip_name", "type": "COMBO", "linkIds": [398], "pos": [2060.587890625, 770]}, {"id": "a7558cc4-d4c4-4b4a-b2a3-0d7229a8ff65", "name": "vae_name", "type": "COMBO", "linkIds": [399], "pos": [2060.587890625, 790]}, {"id": "7d8ffb86-2ff3-49fc-8e96-94d3e530f154", "name": "control_net_name", "type": "COMBO", "linkIds": [400], "pos": [2060.587890625, 810]}, {"id": "a81e0fa5-5984-47ae-bb4f-108a2b92d373", "name": "lora_name", "type": "COMBO", "linkIds": [401], "pos": [2060.587890625, 830]}], "outputs": [{"id": "506ced76-78be-4eb2-ae70-eaa708a4cb98", "name": "IMAGE", "type": "IMAGE", "linkIds": [314], "localized_name": "IMAGE", "pos": [4260, 785]}], "widgets": [], "nodes": [{"id": 174, "type": "CLIPLoader", "pos": [2430, 60], "size": [380, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 398}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [296, 305]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 175, "type": "UNETLoader", "pos": [2430, -70], "size": [380, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 397}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [306]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_fp8_e4m3fn.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["qwen_image_fp8_e4m3fn.safetensors", "default"]}, {"id": 177, "type": "ControlNetLoader", "pos": [2430, 330], "size": [380, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "control_net_name", "name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": 400}], "outputs": [{"localized_name": "CONTROL_NET", "name": "CONTROL_NET", "type": "CONTROL_NET", "links": [301]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ControlNetLoader", "models": [{"name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "directory": "controlnet"}]}, "widgets_values": ["Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}, {"id": 180, "type": "ModelSamplingAuraFlow", "pos": [3400, -110], "size": [310, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 298}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [308]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [3.1000000000000005]}, {"id": 185, "type": "LoraLoaderModelOnly", "pos": [2870, -80], "size": [430, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 306}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 401}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [298]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", "directory": "loras"}]}, "widgets_values": ["Qwen-Image-Lightning-4steps-V1.0.safetensors", 1]}, {"id": 190, "type": "KSampler", "pos": [3400, 10], "size": [310, 474], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 308}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 386}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 387}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 358}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [312]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "KSampler"}, "widgets_values": [375729975350303, "randomize", 4, 1, "euler", "simple", 1]}, {"id": 220, "type": "f93c215e-c393-460e-9534-ed2c3d8a652e", "pos": [2480, 1450], "size": [330, 100], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 377}, {"name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": null}, {"name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [374, 375, 376]}], "properties": {"proxyWidgets": [["-1", "expand"], ["-1", "blur_radius"]], "cnr_id": "comfy-core", "ver": "0.3.59"}, "widgets_values": [20, 31]}, {"id": 195, "type": "VAEEncode", "pos": [2950, 820], "size": [140, 46], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 371}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 317}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [358]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 181, "type": "ControlNetInpaintingAliMamaApply", "pos": [2940, 560], "size": [317.0093688964844, 206], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 299}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 300}, {"localized_name": "control_net", "name": "control_net", "type": "CONTROL_NET", "link": 301}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 384}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 385}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 375}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"localized_name": "start_percent", "name": "start_percent", "type": "FLOAT", "widget": {"name": "start_percent"}, "link": null}, {"localized_name": "end_percent", "name": "end_percent", "type": "FLOAT", "widget": {"name": "end_percent"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [386]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [387]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ControlNetInpaintingAliMamaApply"}, "widgets_values": [1, 0, 1]}, {"id": 178, "type": "VAELoader", "pos": [2430, 220], "size": [380, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 399}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [313, 317, 384]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 182, "type": "CLIPTextEncode", "pos": [2850, 100], "size": [460, 164.31304931640625], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 305}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [299]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 176, "type": "CLIPTextEncode", "pos": [2850, 310], "size": [460, 140], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 296}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [300]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#223", "bgcolor": "#335"}, {"id": 191, "type": "VAEDecode", "pos": [3440, 580], "size": [250, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 312}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 313}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [314, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 219, "type": "2a4b2cc0-db37-4302-a067-da392f38f06b", "pos": [2480, 1260], "size": [280, 80], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 365}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 366}, {"name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [377]}, {"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [369, 370, 371, 385]}], "properties": {"proxyWidgets": [["-1", "value"]], "cnr_id": "comfy-core", "ver": "0.3.65"}, "widgets_values": [1536]}, {"id": 207, "type": "MaskPreview", "pos": [3430, 1270], "size": [340, 430], "flags": {}, "order": 15, "mode": 4, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 376}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskPreview"}, "widgets_values": []}, {"id": 203, "type": "PreviewImage", "pos": [2990, 1270], "size": [310, 430], "flags": {}, "order": 14, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 370}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 200, "type": "ImageCompositeMasked", "pos": [3850, 1280], "size": [250, 150], "flags": {}, "order": 12, "mode": 4, "inputs": [{"localized_name": "destination", "name": "destination", "type": "IMAGE", "link": 369}, {"localized_name": "source", "name": "source", "type": "IMAGE", "link": 323}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": 374}, {"localized_name": "x", "name": "x", "type": "INT", "widget": {"name": "x"}, "link": null}, {"localized_name": "y", "name": "y", "type": "INT", "widget": {"name": "y"}, "link": null}, {"localized_name": "resize_source", "name": "resize_source", "type": "BOOLEAN", "widget": {"name": "resize_source"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageCompositeMasked"}, "widgets_values": [0, 0, false]}, {"id": 202, "type": "ImagePadForOutpaint", "pos": [2490, 1030], "size": [270, 174], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 351}, {"localized_name": "left", "name": "left", "type": "INT", "widget": {"name": "left"}, "link": 392}, {"localized_name": "top", "name": "top", "type": "INT", "widget": {"name": "top"}, "link": 393}, {"localized_name": "right", "name": "right", "type": "INT", "widget": {"name": "right"}, "link": 394}, {"localized_name": "bottom", "name": "bottom", "type": "INT", "widget": {"name": "bottom"}, "link": 395}, {"localized_name": "feathering", "name": "feathering", "type": "INT", "widget": {"name": "feathering"}, "link": 396}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [366]}, {"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImagePadForOutpaint"}, "widgets_values": [0, 0, 0, 0, 0]}], "groups": [{"id": 12, "title": "For outpainting Ctrl-B to enable", "bounding": [2410, -190, 1770, 1970], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Step 1 - Upload models", "bounding": [2420, -150, 400, 610], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Step 3 - Prompt", "bounding": [2840, 30, 490, 430], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 10, "title": "4 steps lightning LoRA", "bounding": [2840, -150, 490, 160], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Ctrl-B to enable it", "bounding": [2420, 940, 430, 460], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 298, "origin_id": 185, "origin_slot": 0, "target_id": 180, "target_slot": 0, "type": "MODEL"}, {"id": 306, "origin_id": 175, "origin_slot": 0, "target_id": 185, "target_slot": 0, "type": "MODEL"}, {"id": 308, "origin_id": 180, "origin_slot": 0, "target_id": 190, "target_slot": 0, "type": "MODEL"}, {"id": 386, "origin_id": 181, "origin_slot": 0, "target_id": 190, "target_slot": 1, "type": "CONDITIONING"}, {"id": 387, "origin_id": 181, "origin_slot": 1, "target_id": 190, "target_slot": 2, "type": "CONDITIONING"}, {"id": 358, "origin_id": 195, "origin_slot": 0, "target_id": 190, "target_slot": 3, "type": "LATENT"}, {"id": 377, "origin_id": 219, "origin_slot": 0, "target_id": 220, "target_slot": 0, "type": "MASK"}, {"id": 371, "origin_id": 219, "origin_slot": 1, "target_id": 195, "target_slot": 0, "type": "IMAGE"}, {"id": 317, "origin_id": 178, "origin_slot": 0, "target_id": 195, "target_slot": 1, "type": "VAE"}, {"id": 299, "origin_id": 182, "origin_slot": 0, "target_id": 181, "target_slot": 0, "type": "CONDITIONING"}, {"id": 300, "origin_id": 176, "origin_slot": 0, "target_id": 181, "target_slot": 1, "type": "CONDITIONING"}, {"id": 301, "origin_id": 177, "origin_slot": 0, "target_id": 181, "target_slot": 2, "type": "CONTROL_NET"}, {"id": 384, "origin_id": 178, "origin_slot": 0, "target_id": 181, "target_slot": 3, "type": "VAE"}, {"id": 385, "origin_id": 219, "origin_slot": 1, "target_id": 181, "target_slot": 4, "type": "IMAGE"}, {"id": 375, "origin_id": 220, "origin_slot": 0, "target_id": 181, "target_slot": 5, "type": "MASK"}, {"id": 305, "origin_id": 174, "origin_slot": 0, "target_id": 182, "target_slot": 0, "type": "CLIP"}, {"id": 296, "origin_id": 174, "origin_slot": 0, "target_id": 176, "target_slot": 0, "type": "CLIP"}, {"id": 312, "origin_id": 190, "origin_slot": 0, "target_id": 191, "target_slot": 0, "type": "LATENT"}, {"id": 313, "origin_id": 178, "origin_slot": 0, "target_id": 191, "target_slot": 1, "type": "VAE"}, {"id": 365, "origin_id": 202, "origin_slot": 1, "target_id": 219, "target_slot": 0, "type": "MASK"}, {"id": 366, "origin_id": 202, "origin_slot": 0, "target_id": 219, "target_slot": 1, "type": "IMAGE"}, {"id": 376, "origin_id": 220, "origin_slot": 0, "target_id": 207, "target_slot": 0, "type": "MASK"}, {"id": 370, "origin_id": 219, "origin_slot": 1, "target_id": 203, "target_slot": 0, "type": "IMAGE"}, {"id": 369, "origin_id": 219, "origin_slot": 1, "target_id": 200, "target_slot": 0, "type": "IMAGE"}, {"id": 323, "origin_id": 191, "origin_slot": 0, "target_id": 200, "target_slot": 1, "type": "IMAGE"}, {"id": 374, "origin_id": 220, "origin_slot": 0, "target_id": 200, "target_slot": 2, "type": "MASK"}, {"id": 351, "origin_id": -10, "origin_slot": 0, "target_id": 202, "target_slot": 0, "type": "IMAGE"}, {"id": 314, "origin_id": 191, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 392, "origin_id": -10, "origin_slot": 1, "target_id": 202, "target_slot": 1, "type": "INT"}, {"id": 393, "origin_id": -10, "origin_slot": 2, "target_id": 202, "target_slot": 2, "type": "INT"}, {"id": 394, "origin_id": -10, "origin_slot": 3, "target_id": 202, "target_slot": 3, "type": "INT"}, {"id": 395, "origin_id": -10, "origin_slot": 4, "target_id": 202, "target_slot": 4, "type": "INT"}, {"id": 396, "origin_id": -10, "origin_slot": 5, "target_id": 202, "target_slot": 5, "type": "INT"}, {"id": 397, "origin_id": -10, "origin_slot": 6, "target_id": 175, "target_slot": 0, "type": "COMBO"}, {"id": 398, "origin_id": -10, "origin_slot": 7, "target_id": 174, "target_slot": 0, "type": "COMBO"}, {"id": 399, "origin_id": -10, "origin_slot": 8, "target_id": 178, "target_slot": 0, "type": "COMBO"}, {"id": 400, "origin_id": -10, "origin_slot": 9, "target_id": 177, "target_slot": 0, "type": "COMBO"}, {"id": 401, "origin_id": -10, "origin_slot": 10, "target_id": 185, "target_slot": 1, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Outpaint image"}, {"id": "f93c215e-c393-460e-9534-ed2c3d8a652e", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Grow and Blur Mask", "inputNode": {"id": -10, "bounding": [290, 3536, 120, 100]}, "outputNode": {"id": -20, "bounding": [1130, 3536, 120, 60]}, "inputs": [{"id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", "name": "mask", "type": "MASK", "linkIds": [279], "localized_name": "mask", "pos": [390, 3556]}, {"id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", "name": "expand", "type": "INT", "linkIds": [379], "pos": [390, 3576]}, {"id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", "name": "blur_radius", "type": "INT", "linkIds": [380], "pos": [390, 3596]}], "outputs": [{"id": "1f97f683-13d3-4871-876d-678fca850d89", "name": "MASK", "type": "MASK", "linkIds": [378], "localized_name": "MASK", "pos": [1150, 3556]}], "widgets": [], "nodes": [{"id": 253, "type": "ImageToMask", "pos": [800, 3630], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 377}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [378]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 251, "type": "MaskToImage", "pos": [780, 3470], "size": [260, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 372}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [373]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 199, "type": "GrowMask", "pos": [470, 3460], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 279}, {"localized_name": "expand", "name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": 379}, {"localized_name": "tapered_corners", "name": "tapered_corners", "type": "BOOLEAN", "widget": {"name": "tapered_corners"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [372]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "GrowMask"}, "widgets_values": [20, true]}, {"id": 252, "type": "ImageBlur", "pos": [480, 3620], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 373}, {"localized_name": "blur_radius", "name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": 380}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [377]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageBlur"}, "widgets_values": [31, 1]}], "groups": [], "links": [{"id": 373, "origin_id": 251, "origin_slot": 0, "target_id": 252, "target_slot": 0, "type": "IMAGE"}, {"id": 377, "origin_id": 252, "origin_slot": 0, "target_id": 253, "target_slot": 0, "type": "IMAGE"}, {"id": 372, "origin_id": 199, "origin_slot": 0, "target_id": 251, "target_slot": 0, "type": "MASK"}, {"id": 279, "origin_id": -10, "origin_slot": 0, "target_id": 199, "target_slot": 0, "type": "MASK"}, {"id": 378, "origin_id": 253, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 379, "origin_id": -10, "origin_slot": 1, "target_id": 199, "target_slot": 1, "type": "INT"}, {"id": 380, "origin_id": -10, "origin_slot": 2, "target_id": 252, "target_slot": 1, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}, {"id": "2a4b2cc0-db37-4302-a067-da392f38f06b", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Scale image and mask", "inputNode": {"id": -10, "bounding": [2110, 1406, 120, 100]}, "outputNode": {"id": -20, "bounding": [3320, 1406, 120, 80]}, "inputs": [{"id": "53ec80db-b075-446c-a79b-891d82ae3cf1", "name": "mask", "type": "MASK", "linkIds": [360], "localized_name": "mask", "pos": [2210, 1426]}, {"id": "37820e3d-f495-4b41-b0c6-58765a0c1766", "name": "image", "type": "IMAGE", "linkIds": [350], "localized_name": "image", "pos": [2210, 1446]}, {"id": "d388f5f1-7a36-4563-b104-9f7ec77f636d", "name": "value", "type": "INT", "linkIds": [365], "pos": [2210, 1466]}], "outputs": [{"id": "7ef75a31-2e69-4dce-8e13-76cd17b4c272", "name": "MASK", "type": "MASK", "linkIds": [364], "localized_name": "MASK", "pos": [3340, 1426]}, {"id": "36058145-b72c-4bd4-bb63-e2e22456d003", "name": "IMAGE", "type": "IMAGE", "linkIds": [352, 353, 354], "localized_name": "IMAGE", "pos": [3340, 1446]}], "widgets": [], "nodes": [{"id": 218, "type": "ImageToMask", "pos": [2990, 1540], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 363}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [364]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 216, "type": "ImageScaleToMaxDimension", "pos": [2610, 1570], "size": [281.2027282714844, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 361}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "largest_size", "name": "largest_size", "type": "INT", "widget": {"name": "largest_size"}, "link": 362}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [363]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageScaleToMaxDimension"}, "widgets_values": ["area", 1536]}, {"id": 217, "type": "MaskToImage", "pos": [2700, 1420], "size": [193.2779296875, 26], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 360}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [361]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 194, "type": "ImageScaleToMaxDimension", "pos": [2590, 1280], "size": [281.2027282714844, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 350}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "largest_size", "name": "largest_size", "type": "INT", "widget": {"name": "largest_size"}, "link": 359}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352, 353, 354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageScaleToMaxDimension"}, "widgets_values": ["area", 1536]}, {"id": 215, "type": "PrimitiveInt", "pos": [2260, 1560], "size": [270, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": 365}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [359, 362]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "PrimitiveInt"}, "widgets_values": [1536, "fixed"]}], "groups": [], "links": [{"id": 363, "origin_id": 216, "origin_slot": 0, "target_id": 218, "target_slot": 0, "type": "IMAGE"}, {"id": 361, "origin_id": 217, "origin_slot": 0, "target_id": 216, "target_slot": 0, "type": "IMAGE"}, {"id": 362, "origin_id": 215, "origin_slot": 0, "target_id": 216, "target_slot": 2, "type": "INT"}, {"id": 359, "origin_id": 215, "origin_slot": 0, "target_id": 194, "target_slot": 2, "type": "INT"}, {"id": 360, "origin_id": -10, "origin_slot": 0, "target_id": 217, "target_slot": 0, "type": "MASK"}, {"id": 350, "origin_id": -10, "origin_slot": 1, "target_id": 194, "target_slot": 0, "type": "IMAGE"}, {"id": 364, "origin_id": 218, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 352, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 353, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 354, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 365, "origin_id": -10, "origin_slot": 2, "target_id": 215, "target_slot": 0, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 1.170393777345649, "offset": [-2589.3260157061272, -547.3616692627206]}}, "version": 0.4} +{ + "id": "8f79c27f-bec4-412e-9b82-7c5b3b778ecf", + "revision": 0, + "last_node_id": 255, + "last_link_id": 401, + "nodes": [ + { + "id": 224, + "type": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", + "pos": [ + 3200, + 740 + ], + "size": [ + 400, + 460 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "left", + "type": "INT", + "widget": { + "name": "left" + }, + "link": null + }, + { + "name": "top", + "type": "INT", + "widget": { + "name": "top" + }, + "link": null + }, + { + "name": "right", + "type": "INT", + "widget": { + "name": "right" + }, + "link": null + }, + { + "name": "bottom", + "type": "INT", + "widget": { + "name": "bottom" + }, + "link": null + }, + { + "name": "feathering", + "type": "INT", + "widget": { + "name": "feathering" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "control_net_name", + "type": "COMBO", + "widget": { + "name": "control_net_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "182", + "text" + ], + [ + "-1", + "left" + ], + [ + "-1", + "top" + ], + [ + "-1", + "right" + ], + [ + "-1", + "bottom" + ], + [ + "-1", + "feathering" + ], + [ + "190", + "seed" + ], + [ + "190", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "control_net_name" + ], + [ + "-1", + "lora_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + null, + 0, + 0, + 0, + 0, + 0, + null, + null, + "qwen_image_fp8_e4m3fn.safetensors", + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image_vae.safetensors", + "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", + "Qwen-Image-Lightning-4steps-V1.0.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 255, + "lastLinkId": 401, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Outpainting (Qwen-Image)", + "inputNode": { + "id": -10, + "bounding": [ + 1940, + 610, + 140.587890625, + 260 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4240, + 765, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "466b9998-797f-4c6f-92e9-39120712c1a9", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 351 + ], + "localized_name": "image", + "pos": [ + 2060.587890625, + 630 + ] + }, + { + "id": "c5befee8-d6c4-493e-8ae1-e09d46268d10", + "name": "left", + "type": "INT", + "linkIds": [ + 392 + ], + "pos": [ + 2060.587890625, + 650 + ] + }, + { + "id": "c0b028a1-fcc0-4a54-9bdf-fa9e76992c40", + "name": "top", + "type": "INT", + "linkIds": [ + 393 + ], + "pos": [ + 2060.587890625, + 670 + ] + }, + { + "id": "22e43278-694c-410f-9043-f88b8dfdca28", + "name": "right", + "type": "INT", + "linkIds": [ + 394 + ], + "pos": [ + 2060.587890625, + 690 + ] + }, + { + "id": "f19fec20-a43d-4562-a0f8-bd6955091c1b", + "name": "bottom", + "type": "INT", + "linkIds": [ + 395 + ], + "pos": [ + 2060.587890625, + 710 + ] + }, + { + "id": "ba832b36-2199-4e1e-a28d-5f2e8acc99a3", + "name": "feathering", + "type": "INT", + "linkIds": [ + 396 + ], + "pos": [ + 2060.587890625, + 730 + ] + }, + { + "id": "437d6324-2d3c-4c50-ac21-1ea9aab57f4e", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 397 + ], + "pos": [ + 2060.587890625, + 750 + ] + }, + { + "id": "4d58dde7-4402-45d5-ade9-9c41e99e0757", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 398 + ], + "pos": [ + 2060.587890625, + 770 + ] + }, + { + "id": "a7558cc4-d4c4-4b4a-b2a3-0d7229a8ff65", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 399 + ], + "pos": [ + 2060.587890625, + 790 + ] + }, + { + "id": "7d8ffb86-2ff3-49fc-8e96-94d3e530f154", + "name": "control_net_name", + "type": "COMBO", + "linkIds": [ + 400 + ], + "pos": [ + 2060.587890625, + 810 + ] + }, + { + "id": "a81e0fa5-5984-47ae-bb4f-108a2b92d373", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 401 + ], + "pos": [ + 2060.587890625, + 830 + ] + } + ], + "outputs": [ + { + "id": "506ced76-78be-4eb2-ae70-eaa708a4cb98", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 314 + ], + "localized_name": "IMAGE", + "pos": [ + 4260, + 785 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 174, + "type": "CLIPLoader", + "pos": [ + 2430, + 60 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 398 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 296, + 305 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 175, + "type": "UNETLoader", + "pos": [ + 2430, + -70 + ], + "size": [ + 380, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 397 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 306 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "qwen_image_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "qwen_image_fp8_e4m3fn.safetensors", + "default" + ] + }, + { + "id": 177, + "type": "ControlNetLoader", + "pos": [ + 2430, + 330 + ], + "size": [ + 380, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "control_net_name", + "name": "control_net_name", + "type": "COMBO", + "widget": { + "name": "control_net_name" + }, + "link": 400 + } + ], + "outputs": [ + { + "localized_name": "CONTROL_NET", + "name": "CONTROL_NET", + "type": "CONTROL_NET", + "links": [ + 301 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ControlNetLoader", + "models": [ + { + "name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", + "directory": "controlnet" + } + ] + }, + "widgets_values": [ + "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors" + ] + }, + { + "id": 180, + "type": "ModelSamplingAuraFlow", + "pos": [ + 3400, + -110 + ], + "size": [ + 310, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 298 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 308 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ModelSamplingAuraFlow" + }, + "widgets_values": [ + 3.1000000000000005 + ] + }, + { + "id": 185, + "type": "LoraLoaderModelOnly", + "pos": [ + 2870, + -80 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 306 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 401 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 298 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", + "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Qwen-Image-Lightning-4steps-V1.0.safetensors", + 1 + ] + }, + { + "id": 190, + "type": "KSampler", + "pos": [ + 3400, + 10 + ], + "size": [ + 310, + 474 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 308 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 386 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 387 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 358 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 312 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 375729975350303, + "randomize", + 4, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 220, + "type": "f93c215e-c393-460e-9534-ed2c3d8a652e", + "pos": [ + 2480, + 1450 + ], + "size": [ + 330, + 100 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 377 + }, + { + "name": "expand", + "type": "INT", + "widget": { + "name": "expand" + }, + "link": null + }, + { + "name": "blur_radius", + "type": "INT", + "widget": { + "name": "blur_radius" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 374, + 375, + 376 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "expand" + ], + [ + "-1", + "blur_radius" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.59" + }, + "widgets_values": [ + 20, + 31 + ] + }, + { + "id": 195, + "type": "VAEEncode", + "pos": [ + 2950, + 820 + ], + "size": [ + 140, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 371 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 317 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 358 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [] + }, + { + "id": 181, + "type": "ControlNetInpaintingAliMamaApply", + "pos": [ + 2940, + 560 + ], + "size": [ + 317.0093688964844, + 206 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 299 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 300 + }, + { + "localized_name": "control_net", + "name": "control_net", + "type": "CONTROL_NET", + "link": 301 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 384 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 385 + }, + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 375 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "start_percent", + "name": "start_percent", + "type": "FLOAT", + "widget": { + "name": "start_percent" + }, + "link": null + }, + { + "localized_name": "end_percent", + "name": "end_percent", + "type": "FLOAT", + "widget": { + "name": "end_percent" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 386 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 387 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ControlNetInpaintingAliMamaApply" + }, + "widgets_values": [ + 1, + 0, + 1 + ] + }, + { + "id": 178, + "type": "VAELoader", + "pos": [ + 2430, + 220 + ], + "size": [ + 380, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 399 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 313, + 317, + 384 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 182, + "type": "CLIPTextEncode", + "pos": [ + 2850, + 100 + ], + "size": [ + 460, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 305 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 299 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 176, + "type": "CLIPTextEncode", + "pos": [ + 2850, + 310 + ], + "size": [ + 460, + 140 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 296 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 300 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 191, + "type": "VAEDecode", + "pos": [ + 3440, + 580 + ], + "size": [ + 250, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 312 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 313 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 314, + 323 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 219, + "type": "2a4b2cc0-db37-4302-a067-da392f38f06b", + "pos": [ + 2480, + 1260 + ], + "size": [ + 280, + 80 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 365 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 366 + }, + { + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 377 + ] + }, + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 369, + 370, + 371, + 385 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "value" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.65" + }, + "widgets_values": [ + 1536 + ] + }, + { + "id": 207, + "type": "MaskPreview", + "pos": [ + 3430, + 1270 + ], + "size": [ + 340, + 430 + ], + "flags": {}, + "order": 15, + "mode": 4, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 376 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "MaskPreview" + }, + "widgets_values": [] + }, + { + "id": 203, + "type": "PreviewImage", + "pos": [ + 2990, + 1270 + ], + "size": [ + 310, + 430 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 370 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "PreviewImage" + }, + "widgets_values": [] + }, + { + "id": 200, + "type": "ImageCompositeMasked", + "pos": [ + 3850, + 1280 + ], + "size": [ + 250, + 150 + ], + "flags": {}, + "order": 12, + "mode": 4, + "inputs": [ + { + "localized_name": "destination", + "name": "destination", + "type": "IMAGE", + "link": 369 + }, + { + "localized_name": "source", + "name": "source", + "type": "IMAGE", + "link": 323 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": 374 + }, + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "resize_source", + "name": "resize_source", + "type": "BOOLEAN", + "widget": { + "name": "resize_source" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageCompositeMasked" + }, + "widgets_values": [ + 0, + 0, + false + ] + }, + { + "id": 202, + "type": "ImagePadForOutpaint", + "pos": [ + 2490, + 1030 + ], + "size": [ + 270, + 174 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 351 + }, + { + "localized_name": "left", + "name": "left", + "type": "INT", + "widget": { + "name": "left" + }, + "link": 392 + }, + { + "localized_name": "top", + "name": "top", + "type": "INT", + "widget": { + "name": "top" + }, + "link": 393 + }, + { + "localized_name": "right", + "name": "right", + "type": "INT", + "widget": { + "name": "right" + }, + "link": 394 + }, + { + "localized_name": "bottom", + "name": "bottom", + "type": "INT", + "widget": { + "name": "bottom" + }, + "link": 395 + }, + { + "localized_name": "feathering", + "name": "feathering", + "type": "INT", + "widget": { + "name": "feathering" + }, + "link": 396 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 366 + ] + }, + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 365 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImagePadForOutpaint" + }, + "widgets_values": [ + 0, + 0, + 0, + 0, + 0 + ] + } + ], + "groups": [ + { + "id": 12, + "title": "For outpainting Ctrl-B to enable", + "bounding": [ + 2410, + -190, + 1770, + 1970 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Step 1 - Upload models", + "bounding": [ + 2420, + -150, + 400, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 9, + "title": "Step 3 - Prompt", + "bounding": [ + 2840, + 30, + 490, + 430 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 10, + "title": "4 steps lightning LoRA", + "bounding": [ + 2840, + -150, + 490, + 160 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Ctrl-B to enable it", + "bounding": [ + 2420, + 940, + 430, + 460 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 298, + "origin_id": 185, + "origin_slot": 0, + "target_id": 180, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 306, + "origin_id": 175, + "origin_slot": 0, + "target_id": 185, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 308, + "origin_id": 180, + "origin_slot": 0, + "target_id": 190, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 386, + "origin_id": 181, + "origin_slot": 0, + "target_id": 190, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 387, + "origin_id": 181, + "origin_slot": 1, + "target_id": 190, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 358, + "origin_id": 195, + "origin_slot": 0, + "target_id": 190, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 377, + "origin_id": 219, + "origin_slot": 0, + "target_id": 220, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 371, + "origin_id": 219, + "origin_slot": 1, + "target_id": 195, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 317, + "origin_id": 178, + "origin_slot": 0, + "target_id": 195, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 299, + "origin_id": 182, + "origin_slot": 0, + "target_id": 181, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 300, + "origin_id": 176, + "origin_slot": 0, + "target_id": 181, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 301, + "origin_id": 177, + "origin_slot": 0, + "target_id": 181, + "target_slot": 2, + "type": "CONTROL_NET" + }, + { + "id": 384, + "origin_id": 178, + "origin_slot": 0, + "target_id": 181, + "target_slot": 3, + "type": "VAE" + }, + { + "id": 385, + "origin_id": 219, + "origin_slot": 1, + "target_id": 181, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 375, + "origin_id": 220, + "origin_slot": 0, + "target_id": 181, + "target_slot": 5, + "type": "MASK" + }, + { + "id": 305, + "origin_id": 174, + "origin_slot": 0, + "target_id": 182, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 296, + "origin_id": 174, + "origin_slot": 0, + "target_id": 176, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 312, + "origin_id": 190, + "origin_slot": 0, + "target_id": 191, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 313, + "origin_id": 178, + "origin_slot": 0, + "target_id": 191, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 365, + "origin_id": 202, + "origin_slot": 1, + "target_id": 219, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 366, + "origin_id": 202, + "origin_slot": 0, + "target_id": 219, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 376, + "origin_id": 220, + "origin_slot": 0, + "target_id": 207, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 370, + "origin_id": 219, + "origin_slot": 1, + "target_id": 203, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 369, + "origin_id": 219, + "origin_slot": 1, + "target_id": 200, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 323, + "origin_id": 191, + "origin_slot": 0, + "target_id": 200, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 374, + "origin_id": 220, + "origin_slot": 0, + "target_id": 200, + "target_slot": 2, + "type": "MASK" + }, + { + "id": 351, + "origin_id": -10, + "origin_slot": 0, + "target_id": 202, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 314, + "origin_id": 191, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 392, + "origin_id": -10, + "origin_slot": 1, + "target_id": 202, + "target_slot": 1, + "type": "INT" + }, + { + "id": 393, + "origin_id": -10, + "origin_slot": 2, + "target_id": 202, + "target_slot": 2, + "type": "INT" + }, + { + "id": 394, + "origin_id": -10, + "origin_slot": 3, + "target_id": 202, + "target_slot": 3, + "type": "INT" + }, + { + "id": 395, + "origin_id": -10, + "origin_slot": 4, + "target_id": 202, + "target_slot": 4, + "type": "INT" + }, + { + "id": 396, + "origin_id": -10, + "origin_slot": 5, + "target_id": 202, + "target_slot": 5, + "type": "INT" + }, + { + "id": 397, + "origin_id": -10, + "origin_slot": 6, + "target_id": 175, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 398, + "origin_id": -10, + "origin_slot": 7, + "target_id": 174, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 399, + "origin_id": -10, + "origin_slot": 8, + "target_id": 178, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 400, + "origin_id": -10, + "origin_slot": 9, + "target_id": 177, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 401, + "origin_id": -10, + "origin_slot": 10, + "target_id": 185, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Outpaint image", + "description": "Outpaints beyond image boundaries using Qwen-Image's outpainting capabilities." + }, + { + "id": "f93c215e-c393-460e-9534-ed2c3d8a652e", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 255, + "lastLinkId": 401, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Grow and Blur Mask", + "inputNode": { + "id": -10, + "bounding": [ + 290, + 3536, + 120, + 100 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1130, + 3536, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", + "name": "mask", + "type": "MASK", + "linkIds": [ + 279 + ], + "localized_name": "mask", + "pos": [ + 390, + 3556 + ] + }, + { + "id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", + "name": "expand", + "type": "INT", + "linkIds": [ + 379 + ], + "pos": [ + 390, + 3576 + ] + }, + { + "id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", + "name": "blur_radius", + "type": "INT", + "linkIds": [ + 380 + ], + "pos": [ + 390, + 3596 + ] + } + ], + "outputs": [ + { + "id": "1f97f683-13d3-4871-876d-678fca850d89", + "name": "MASK", + "type": "MASK", + "linkIds": [ + 378 + ], + "localized_name": "MASK", + "pos": [ + 1150, + 3556 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 253, + "type": "ImageToMask", + "pos": [ + 800, + 3630 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 377 + }, + { + "localized_name": "channel", + "name": "channel", + "type": "COMBO", + "widget": { + "name": "channel" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 378 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageToMask" + }, + "widgets_values": [ + "red" + ] + }, + { + "id": 251, + "type": "MaskToImage", + "pos": [ + 780, + 3470 + ], + "size": [ + 260, + 70 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 372 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 373 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "MaskToImage" + }, + "widgets_values": [] + }, + { + "id": 199, + "type": "GrowMask", + "pos": [ + 470, + 3460 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 279 + }, + { + "localized_name": "expand", + "name": "expand", + "type": "INT", + "widget": { + "name": "expand" + }, + "link": 379 + }, + { + "localized_name": "tapered_corners", + "name": "tapered_corners", + "type": "BOOLEAN", + "widget": { + "name": "tapered_corners" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 372 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "GrowMask" + }, + "widgets_values": [ + 20, + true + ] + }, + { + "id": 252, + "type": "ImageBlur", + "pos": [ + 480, + 3620 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 373 + }, + { + "localized_name": "blur_radius", + "name": "blur_radius", + "type": "INT", + "widget": { + "name": "blur_radius" + }, + "link": 380 + }, + { + "localized_name": "sigma", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 377 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageBlur" + }, + "widgets_values": [ + 31, + 1 + ] + } + ], + "groups": [], + "links": [ + { + "id": 373, + "origin_id": 251, + "origin_slot": 0, + "target_id": 252, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 377, + "origin_id": 252, + "origin_slot": 0, + "target_id": 253, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 372, + "origin_id": 199, + "origin_slot": 0, + "target_id": 251, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 279, + "origin_id": -10, + "origin_slot": 0, + "target_id": 199, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 378, + "origin_id": 253, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 379, + "origin_id": -10, + "origin_slot": 1, + "target_id": 199, + "target_slot": 1, + "type": "INT" + }, + { + "id": 380, + "origin_id": -10, + "origin_slot": 2, + "target_id": 252, + "target_slot": 1, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Expands and softens mask edges to reduce visible seams after image processing." + }, + { + "id": "2a4b2cc0-db37-4302-a067-da392f38f06b", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 255, + "lastLinkId": 401, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Scale image and mask", + "inputNode": { + "id": -10, + "bounding": [ + 2110, + 1406, + 120, + 100 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 3320, + 1406, + 120, + 80 + ] + }, + "inputs": [ + { + "id": "53ec80db-b075-446c-a79b-891d82ae3cf1", + "name": "mask", + "type": "MASK", + "linkIds": [ + 360 + ], + "localized_name": "mask", + "pos": [ + 2210, + 1426 + ] + }, + { + "id": "37820e3d-f495-4b41-b0c6-58765a0c1766", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 350 + ], + "localized_name": "image", + "pos": [ + 2210, + 1446 + ] + }, + { + "id": "d388f5f1-7a36-4563-b104-9f7ec77f636d", + "name": "value", + "type": "INT", + "linkIds": [ + 365 + ], + "pos": [ + 2210, + 1466 + ] + } + ], + "outputs": [ + { + "id": "7ef75a31-2e69-4dce-8e13-76cd17b4c272", + "name": "MASK", + "type": "MASK", + "linkIds": [ + 364 + ], + "localized_name": "MASK", + "pos": [ + 3340, + 1426 + ] + }, + { + "id": "36058145-b72c-4bd4-bb63-e2e22456d003", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 352, + 353, + 354 + ], + "localized_name": "IMAGE", + "pos": [ + 3340, + 1446 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 218, + "type": "ImageToMask", + "pos": [ + 2990, + 1540 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 363 + }, + { + "localized_name": "channel", + "name": "channel", + "type": "COMBO", + "widget": { + "name": "channel" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 364 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.65", + "Node name for S&R": "ImageToMask" + }, + "widgets_values": [ + "red" + ] + }, + { + "id": 216, + "type": "ImageScaleToMaxDimension", + "pos": [ + 2610, + 1570 + ], + "size": [ + 281.2027282714844, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 361 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "largest_size", + "name": "largest_size", + "type": "INT", + "widget": { + "name": "largest_size" + }, + "link": 362 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 363 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageScaleToMaxDimension" + }, + "widgets_values": [ + "area", + 1536 + ] + }, + { + "id": 217, + "type": "MaskToImage", + "pos": [ + 2700, + 1420 + ], + "size": [ + 193.2779296875, + 26 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 360 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 361 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.65", + "Node name for S&R": "MaskToImage" + }, + "widgets_values": [] + }, + { + "id": 194, + "type": "ImageScaleToMaxDimension", + "pos": [ + 2590, + 1280 + ], + "size": [ + 281.2027282714844, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 350 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "largest_size", + "name": "largest_size", + "type": "INT", + "widget": { + "name": "largest_size" + }, + "link": 359 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 352, + 353, + 354 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageScaleToMaxDimension" + }, + "widgets_values": [ + "area", + 1536 + ] + }, + { + "id": 215, + "type": "PrimitiveInt", + "pos": [ + 2260, + 1560 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 365 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 359, + 362 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.65", + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 1536, + "fixed" + ] + } + ], + "groups": [], + "links": [ + { + "id": 363, + "origin_id": 216, + "origin_slot": 0, + "target_id": 218, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 361, + "origin_id": 217, + "origin_slot": 0, + "target_id": 216, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 362, + "origin_id": 215, + "origin_slot": 0, + "target_id": 216, + "target_slot": 2, + "type": "INT" + }, + { + "id": 359, + "origin_id": 215, + "origin_slot": 0, + "target_id": 194, + "target_slot": 2, + "type": "INT" + }, + { + "id": 360, + "origin_id": -10, + "origin_slot": 0, + "target_id": 217, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 350, + "origin_id": -10, + "origin_slot": 1, + "target_id": 194, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 364, + "origin_id": 218, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 352, + "origin_id": 194, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 353, + "origin_id": 194, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 354, + "origin_id": 194, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 365, + "origin_id": -10, + "origin_slot": 2, + "target_id": 215, + "target_slot": 0, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Scales both image and mask together while preserving alignment for editing workflows." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG", + "ds": { + "scale": 1.170393777345649, + "offset": [ + -2589.3260157061272, + -547.3616692627206 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Image Segmentation (SAM3).json b/blueprints/Image Segmentation (SAM3).json new file mode 100644 index 000000000..b405bf623 --- /dev/null +++ b/blueprints/Image Segmentation (SAM3).json @@ -0,0 +1,714 @@ +{ + "revision": 0, + "last_node_id": 99, + "last_link_id": 0, + "nodes": [ + { + "id": 99, + "type": "6e7ab3ea-96aa-470f-9b94-3d9d0e01f481", + "pos": [ + -1630, + -3270 + ], + "size": [ + 290, + 370 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "object", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "bboxes", + "type": "BOUNDING_BOX", + "link": null + }, + { + "name": "positive_coords", + "type": "STRING", + "link": null + }, + { + "name": "negative_coords", + "type": "STRING", + "link": null + }, + { + "name": "threshold", + "type": "FLOAT", + "widget": { + "name": "threshold" + }, + "link": null + }, + { + "name": "refine_iterations", + "type": "INT", + "widget": { + "name": "refine_iterations" + }, + "link": null + }, + { + "name": "individual_masks", + "type": "BOOLEAN", + "widget": { + "name": "individual_masks" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "masks", + "name": "masks", + "type": "MASK", + "links": [] + }, + { + "localized_name": "bboxes", + "name": "bboxes", + "type": "BOUNDING_BOX", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "78", + "text" + ], + [ + "75", + "threshold" + ], + [ + "75", + "refine_iterations" + ], + [ + "75", + "individual_masks" + ], + [ + "77", + "ckpt_name" + ] + ], + "ue_properties": { + "widget_ue_connectable": { + "text": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Image Segmentation (SAM3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "6e7ab3ea-96aa-470f-9b94-3d9d0e01f481", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 113, + "lastLinkId": 283, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Segmentation (SAM3)", + "inputNode": { + "id": -10, + "bounding": [ + -2260, + -3450, + 136.369140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -1130, + -3305, + 120, + 80 + ] + }, + "inputs": [ + { + "id": "a6e75fa2-162a-4af0-a2fd-1e9c899a5ab6", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 264 + ], + "localized_name": "image", + "label": "image", + "pos": [ + -2143.630859375, + -3430 + ] + }, + { + "id": "3cefd304-7631-4ff6-a5a0-5a0ffb120745", + "name": "text", + "type": "STRING", + "linkIds": [ + 265 + ], + "label": "object", + "pos": [ + -2143.630859375, + -3410 + ] + }, + { + "id": "1aec91c5-d8d2-441c-928c-49c14e7e80ed", + "name": "bboxes", + "type": "BOUNDING_BOX", + "linkIds": [ + 266 + ], + "pos": [ + -2143.630859375, + -3390 + ] + }, + { + "id": "1ec7ce1a-8257-4719-8a81-60ebc8a98899", + "name": "positive_coords", + "type": "STRING", + "linkIds": [ + 267 + ], + "pos": [ + -2143.630859375, + -3370 + ] + }, + { + "id": "c65f8b87-9bd7-48be-9fc2-823431e95019", + "name": "negative_coords", + "type": "STRING", + "linkIds": [ + 268 + ], + "pos": [ + -2143.630859375, + -3350 + ] + }, + { + "id": "bb4ba35a-ccfe-4c37-98e5-d9b0d69585fb", + "name": "threshold", + "type": "FLOAT", + "linkIds": [ + 269 + ], + "pos": [ + -2143.630859375, + -3330 + ] + }, + { + "id": "b1439668-b050-490b-a5dc-fc4052c55666", + "name": "refine_iterations", + "type": "INT", + "linkIds": [ + 270 + ], + "pos": [ + -2143.630859375, + -3310 + ] + }, + { + "id": "86e239e5-c098-4302-b54d-d42a38bc0f89", + "name": "individual_masks", + "type": "BOOLEAN", + "linkIds": [ + 271 + ], + "pos": [ + -2143.630859375, + -3290 + ] + }, + { + "id": "f9e0b9d4-b2f1-4907-a4a5-305656576706", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 272 + ], + "pos": [ + -2143.630859375, + -3270 + ] + } + ], + "outputs": [ + { + "id": "ff50da09-1e59-4a58-9b7f-be1a00aa5913", + "name": "masks", + "type": "MASK", + "linkIds": [ + 231 + ], + "localized_name": "masks", + "pos": [ + -1110, + -3285 + ] + }, + { + "id": "8f622e40-8528-4078-b7d3-147e9f872194", + "name": "bboxes", + "type": "BOUNDING_BOX", + "linkIds": [ + 232 + ], + "localized_name": "bboxes", + "pos": [ + -1110, + -3265 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 75, + "type": "SAM3_Detect", + "pos": [ + -1470, + -3460 + ], + "size": [ + 270, + 260 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 237 + }, + { + "label": "image", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 264 + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 200 + }, + { + "label": "bboxes", + "localized_name": "bboxes", + "name": "bboxes", + "shape": 7, + "type": "BOUNDING_BOX", + "link": 266 + }, + { + "label": "positive_coords", + "localized_name": "positive_coords", + "name": "positive_coords", + "shape": 7, + "type": "STRING", + "link": 267 + }, + { + "label": "negative_coords", + "localized_name": "negative_coords", + "name": "negative_coords", + "shape": 7, + "type": "STRING", + "link": 268 + }, + { + "localized_name": "threshold", + "name": "threshold", + "type": "FLOAT", + "widget": { + "name": "threshold" + }, + "link": 269 + }, + { + "localized_name": "refine_iterations", + "name": "refine_iterations", + "type": "INT", + "widget": { + "name": "refine_iterations" + }, + "link": 270 + }, + { + "localized_name": "individual_masks", + "name": "individual_masks", + "type": "BOOLEAN", + "widget": { + "name": "individual_masks" + }, + "link": 271 + } + ], + "outputs": [ + { + "localized_name": "masks", + "name": "masks", + "type": "MASK", + "links": [ + 231 + ] + }, + { + "localized_name": "bboxes", + "name": "bboxes", + "type": "BOUNDING_BOX", + "links": [ + 232 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.19.3", + "Node name for S&R": "SAM3_Detect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0.5, + 2, + false + ] + }, + { + "id": 77, + "type": "CheckpointLoaderSimple", + "pos": [ + -1970, + -3200 + ], + "size": [ + 330, + 140 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 237 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 240 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": null + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.19.3", + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "sam3.1_multiplex_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/sam3.1/resolve/main/checkpoints/sam3.1_multiplex_fp16.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "sam3.1_multiplex_fp16.safetensors" + ] + }, + { + "id": 78, + "type": "CLIPTextEncode", + "pos": [ + -2000, + -3000 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 240 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 265 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 200 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.19.3", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + } + ], + "groups": [], + "links": [ + { + "id": 237, + "origin_id": 77, + "origin_slot": 0, + "target_id": 75, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 200, + "origin_id": 78, + "origin_slot": 0, + "target_id": 75, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 240, + "origin_id": 77, + "origin_slot": 1, + "target_id": 78, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 231, + "origin_id": 75, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 232, + "origin_id": 75, + "origin_slot": 1, + "target_id": -20, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 264, + "origin_id": -10, + "origin_slot": 0, + "target_id": 75, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 265, + "origin_id": -10, + "origin_slot": 1, + "target_id": 78, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 266, + "origin_id": -10, + "origin_slot": 2, + "target_id": 75, + "target_slot": 3, + "type": "BOUNDING_BOX" + }, + { + "id": 267, + "origin_id": -10, + "origin_slot": 3, + "target_id": 75, + "target_slot": 4, + "type": "STRING" + }, + { + "id": 268, + "origin_id": -10, + "origin_slot": 4, + "target_id": 75, + "target_slot": 5, + "type": "STRING" + }, + { + "id": 269, + "origin_id": -10, + "origin_slot": 5, + "target_id": 75, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 270, + "origin_id": -10, + "origin_slot": 6, + "target_id": 75, + "target_slot": 7, + "type": "INT" + }, + { + "id": 271, + "origin_id": -10, + "origin_slot": 7, + "target_id": 75, + "target_slot": 8, + "type": "BOOLEAN" + }, + { + "id": 272, + "origin_id": -10, + "origin_slot": 8, + "target_id": 77, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Image Tools/Image Segmentation", + "description": "Segments images into masks using Meta SAM3 from text prompts, points, or boxes." + } + ] + }, + "extra": { + "ue_links": [] + } +} diff --git a/blueprints/Image Upscale(Z-image-Turbo).json b/blueprints/Image Upscale(Z-image-Turbo).json index a67d6a2d8..bd803a0b1 100644 --- a/blueprints/Image Upscale(Z-image-Turbo).json +++ b/blueprints/Image Upscale(Z-image-Turbo).json @@ -1 +1,1315 @@ -{"id": "bf8108f3-d857-46c9-aef5-0e8ad2a64bf5", "revision": 0, "last_node_id": 95, "last_link_id": 115, "nodes": [{"id": 87, "type": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", "pos": [960.6668984200231, 332.66676187423354], "size": [400, 469.9869791666667], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE_1", "name": "IMAGE_1", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["67", "text"], ["69", "seed"], ["69", "control_after_generate"], ["-1", "denoise"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [null, null, null, 0.33, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "RealESRGAN_x4plus.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", "version": 1, "state": {"lastGroupId": 5, "lastNodeId": 95, "lastLinkId": 115, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Upscale(Z-image-Turbo)", "inputNode": {"id": -10, "bounding": [-150, 390, 125.224609375, 160]}, "outputNode": {"id": -20, "bounding": [2070, 490, 120, 60]}, "inputs": [{"id": "e9a14390-4f93-4065-8b02-323f999527c0", "name": "image", "type": "IMAGE", "linkIds": [86], "localized_name": "image", "pos": [-44.775390625, 410]}, {"id": "c5655e11-9531-4949-996c-958b5fe92085", "name": "unet_name", "type": "COMBO", "linkIds": [109], "pos": [-44.775390625, 430]}, {"id": "82576043-dd69-4604-b572-09fabb6e602d", "name": "clip_name", "type": "COMBO", "linkIds": [110], "pos": [-44.775390625, 450]}, {"id": "59e20fb5-cd61-4d4b-a1fd-15a90c7ba6c2", "name": "vae_name", "type": "COMBO", "linkIds": [111], "pos": [-44.775390625, 470]}, {"id": "adc35153-dc52-4bac-be7e-9da19471f441", "name": "model_name", "type": "COMBO", "linkIds": [112], "label": "upscale_model", "pos": [-44.775390625, 490]}, {"id": "c1b2f097-616e-4420-93c8-04eb79f4ba1e", "name": "denoise", "type": "FLOAT", "linkIds": [115], "pos": [-44.775390625, 510]}], "outputs": [{"id": "f138a0aa-489a-42e1-92f7-e3747688c94d", "name": "IMAGE_1", "type": "IMAGE", "linkIds": [97, 103], "localized_name": "IMAGE_1", "label": "IMAGE", "pos": [2090, 510]}], "widgets": [], "nodes": [{"id": 71, "type": "CLIPTextEncode", "pos": [648.333324162179, 398.3333435177784], "size": [491.6666666666667, 150], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 82}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [83]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#323", "bgcolor": "#535"}, {"id": 79, "type": "ImageUpscaleWithModel", "pos": [623.3333541162552, 714.9999406294688], "size": [233.5689453125, 60], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 87}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 88}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [92]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageUpscaleWithModel"}, "widgets_values": []}, {"id": 80, "type": "VAEEncode", "pos": [1173.3330331592938, 631.6665944654844], "size": [187.5, 60], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 93}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 90}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [91]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 81, "type": "ImageScaleBy", "pos": [865.0000410901742, 714.9999828835583], "size": [225, 95.546875], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 92}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "scale_by", "name": "scale_by", "type": "FLOAT", "widget": {"name": "scale_by"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [93]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageScaleBy"}, "widgets_values": ["lanczos", 0.5]}, {"id": 66, "type": "UNETLoader", "pos": [280, -20], "size": [323.984375, 118.64583333333334], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 109}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [104]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 62, "type": "CLIPLoader", "pos": [280, 140], "size": [323.984375, 150.65104166666669], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 110}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [78, 82]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 67, "type": "CLIPTextEncode", "pos": [650.621298596813, -33.81729273975067], "size": [491.9791666666667, 377.98177083333337], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 78}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [75]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["masterpiece, 8k"], "color": "#232", "bgcolor": "#353"}, {"id": 63, "type": "VAELoader", "pos": [280, 330], "size": [323.984375, 83.99739583333334], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 111}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [73, 90]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 76, "type": "UpscaleModelLoader", "pos": [264.07395879037364, 704.8118881098496], "size": [323.984375, 83.99739583333334], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 112}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [87]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}, {"id": 70, "type": "ModelSamplingAuraFlow", "pos": [1200, -50], "size": [371.9791666666667, 80.1171875], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 104}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [74]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 65, "type": "VAEDecode", "pos": [1610, -50], "size": [251.97916666666669, 72.13541666666667], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 72}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 73}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [97, 103]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 78, "type": "ImageScaleToTotalPixels", "pos": [260, 850], "size": [325, 122.21354166666667], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 86}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [88]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["lanczos", 1, 1]}, {"id": 69, "type": "KSampler", "pos": [1200, 80], "size": [366.6666666666667, 474], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 74}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 75}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 83}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 91}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": 115}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [72]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1098688918602660, "randomize", 5, 1, "dpmpp_2m_sde", "beta", 0.33]}], "groups": [{"id": 3, "title": "Prompt", "bounding": [640, -90, 508.64583333333337, 662.0666813520016], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Models", "bounding": [260, -90, 344.6965254233087, 516.414685926878], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 104, "origin_id": 66, "origin_slot": 0, "target_id": 70, "target_slot": 0, "type": "MODEL"}, {"id": 82, "origin_id": 62, "origin_slot": 0, "target_id": 71, "target_slot": 0, "type": "CLIP"}, {"id": 87, "origin_id": 76, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 88, "origin_id": 78, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "IMAGE"}, {"id": 93, "origin_id": 81, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 90, "origin_id": 63, "origin_slot": 0, "target_id": 80, "target_slot": 1, "type": "VAE"}, {"id": 92, "origin_id": 79, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "IMAGE"}, {"id": 74, "origin_id": 70, "origin_slot": 0, "target_id": 69, "target_slot": 0, "type": "MODEL"}, {"id": 75, "origin_id": 67, "origin_slot": 0, "target_id": 69, "target_slot": 1, "type": "CONDITIONING"}, {"id": 83, "origin_id": 71, "origin_slot": 0, "target_id": 69, "target_slot": 2, "type": "CONDITIONING"}, {"id": 91, "origin_id": 80, "origin_slot": 0, "target_id": 69, "target_slot": 3, "type": "LATENT"}, {"id": 72, "origin_id": 69, "origin_slot": 0, "target_id": 65, "target_slot": 0, "type": "LATENT"}, {"id": 73, "origin_id": 63, "origin_slot": 0, "target_id": 65, "target_slot": 1, "type": "VAE"}, {"id": 78, "origin_id": 62, "origin_slot": 0, "target_id": 67, "target_slot": 0, "type": "CLIP"}, {"id": 86, "origin_id": -10, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 97, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 103, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 109, "origin_id": -10, "origin_slot": 1, "target_id": 66, "target_slot": 0, "type": "COMBO"}, {"id": 110, "origin_id": -10, "origin_slot": 2, "target_id": 62, "target_slot": 0, "type": "COMBO"}, {"id": 111, "origin_id": -10, "origin_slot": 3, "target_id": 63, "target_slot": 0, "type": "COMBO"}, {"id": 112, "origin_id": -10, "origin_slot": 4, "target_id": 76, "target_slot": 0, "type": "COMBO"}, {"id": 115, "origin_id": -10, "origin_slot": 5, "target_id": 69, "target_slot": 9, "type": "FLOAT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Enhance"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG"}, "version": 0.4} +{ + "id": "bf8108f3-d857-46c9-aef5-0e8ad2a64bf5", + "revision": 0, + "last_node_id": 95, + "last_link_id": 115, + "nodes": [ + { + "id": 87, + "type": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", + "pos": [ + 960.6668984200231, + 332.66676187423354 + ], + "size": [ + 400, + 469.9869791666667 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "upscale_model", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + }, + { + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE_1", + "name": "IMAGE_1", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "67", + "text" + ], + [ + "69", + "seed" + ], + [ + "69", + "control_after_generate" + ], + [ + "-1", + "denoise" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.14.1" + }, + "widgets_values": [ + null, + null, + null, + 0.33, + "z_image_turbo_bf16.safetensors", + "qwen_3_4b.safetensors", + "ae.safetensors", + "RealESRGAN_x4plus.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", + "version": 1, + "state": { + "lastGroupId": 5, + "lastNodeId": 95, + "lastLinkId": 115, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image Upscale (Z-image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + -150, + 390, + 125.224609375, + 160 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2070, + 490, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "e9a14390-4f93-4065-8b02-323f999527c0", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 86 + ], + "localized_name": "image", + "pos": [ + -44.775390625, + 410 + ] + }, + { + "id": "c5655e11-9531-4949-996c-958b5fe92085", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 109 + ], + "pos": [ + -44.775390625, + 430 + ] + }, + { + "id": "82576043-dd69-4604-b572-09fabb6e602d", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 110 + ], + "pos": [ + -44.775390625, + 450 + ] + }, + { + "id": "59e20fb5-cd61-4d4b-a1fd-15a90c7ba6c2", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 111 + ], + "pos": [ + -44.775390625, + 470 + ] + }, + { + "id": "adc35153-dc52-4bac-be7e-9da19471f441", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 112 + ], + "label": "upscale_model", + "pos": [ + -44.775390625, + 490 + ] + }, + { + "id": "c1b2f097-616e-4420-93c8-04eb79f4ba1e", + "name": "denoise", + "type": "FLOAT", + "linkIds": [ + 115 + ], + "pos": [ + -44.775390625, + 510 + ] + } + ], + "outputs": [ + { + "id": "f138a0aa-489a-42e1-92f7-e3747688c94d", + "name": "IMAGE_1", + "type": "IMAGE", + "linkIds": [ + 97, + 103 + ], + "localized_name": "IMAGE_1", + "label": "IMAGE", + "pos": [ + 2090, + 510 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 71, + "type": "CLIPTextEncode", + "pos": [ + 648.333324162179, + 398.3333435177784 + ], + "size": [ + 491.6666666666667, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 82 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 83 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 79, + "type": "ImageUpscaleWithModel", + "pos": [ + 623.3333541162552, + 714.9999406294688 + ], + "size": [ + 233.5689453125, + 60 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "UPSCALE_MODEL", + "link": 87 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 88 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 92 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "ImageUpscaleWithModel" + }, + "widgets_values": [] + }, + { + "id": 80, + "type": "VAEEncode", + "pos": [ + 1173.3330331592938, + 631.6665944654844 + ], + "size": [ + 187.5, + 60 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 93 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 90 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 91 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [] + }, + { + "id": 81, + "type": "ImageScaleBy", + "pos": [ + 865.0000410901742, + 714.9999828835583 + ], + "size": [ + 225, + 95.546875 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 92 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "scale_by", + "name": "scale_by", + "type": "FLOAT", + "widget": { + "name": "scale_by" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 93 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "ImageScaleBy" + }, + "widgets_values": [ + "lanczos", + 0.5 + ] + }, + { + "id": 66, + "type": "UNETLoader", + "pos": [ + 280, + -20 + ], + "size": [ + 323.984375, + 118.64583333333334 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 109 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 104 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 62, + "type": "CLIPLoader", + "pos": [ + 280, + 140 + ], + "size": [ + 323.984375, + 150.65104166666669 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 110 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 78, + 82 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + 650.621298596813, + -33.81729273975067 + ], + "size": [ + 491.9791666666667, + 377.98177083333337 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 78 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 75 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "masterpiece, 8k" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 63, + "type": "VAELoader", + "pos": [ + 280, + 330 + ], + "size": [ + 323.984375, + 83.99739583333334 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 111 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 73, + 90 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 76, + "type": "UpscaleModelLoader", + "pos": [ + 264.07395879037364, + 704.8118881098496 + ], + "size": [ + 323.984375, + 83.99739583333334 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 112 + } + ], + "outputs": [ + { + "localized_name": "UPSCALE_MODEL", + "name": "UPSCALE_MODEL", + "type": "UPSCALE_MODEL", + "links": [ + 87 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "UpscaleModelLoader", + "models": [ + { + "name": "RealESRGAN_x4plus.safetensors", + "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", + "directory": "upscale_models" + } + ] + }, + "widgets_values": [ + "RealESRGAN_x4plus.safetensors" + ] + }, + { + "id": 70, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1200, + -50 + ], + "size": [ + 371.9791666666667, + 80.1171875 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 104 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 74 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 1610, + -50 + ], + "size": [ + 251.97916666666669, + 72.13541666666667 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 72 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 73 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 97, + 103 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 78, + "type": "ImageScaleToTotalPixels", + "pos": [ + 260, + 850 + ], + "size": [ + 325, + 122.21354166666667 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 86 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "megapixels", + "name": "megapixels", + "type": "FLOAT", + "widget": { + "name": "megapixels" + }, + "link": null + }, + { + "localized_name": "resolution_steps", + "name": "resolution_steps", + "type": "INT", + "widget": { + "name": "resolution_steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 88 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "ImageScaleToTotalPixels" + }, + "widgets_values": [ + "lanczos", + 1, + 1 + ] + }, + { + "id": 69, + "type": "KSampler", + "pos": [ + 1200, + 80 + ], + "size": [ + 366.6666666666667, + 474 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 74 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 75 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 83 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 91 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": 115 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 72 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1098688918602660, + "randomize", + 5, + 1, + "dpmpp_2m_sde", + "beta", + 0.33 + ] + } + ], + "groups": [ + { + "id": 3, + "title": "Prompt", + "bounding": [ + 640, + -90, + 508.64583333333337, + 662.0666813520016 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Models", + "bounding": [ + 260, + -90, + 344.6965254233087, + 516.414685926878 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 104, + "origin_id": 66, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 82, + "origin_id": 62, + "origin_slot": 0, + "target_id": 71, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 87, + "origin_id": 76, + "origin_slot": 0, + "target_id": 79, + "target_slot": 0, + "type": "UPSCALE_MODEL" + }, + { + "id": 88, + "origin_id": 78, + "origin_slot": 0, + "target_id": 79, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 93, + "origin_id": 81, + "origin_slot": 0, + "target_id": 80, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 90, + "origin_id": 63, + "origin_slot": 0, + "target_id": 80, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 92, + "origin_id": 79, + "origin_slot": 0, + "target_id": 81, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 74, + "origin_id": 70, + "origin_slot": 0, + "target_id": 69, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 75, + "origin_id": 67, + "origin_slot": 0, + "target_id": 69, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 83, + "origin_id": 71, + "origin_slot": 0, + "target_id": 69, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 91, + "origin_id": 80, + "origin_slot": 0, + "target_id": 69, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 72, + "origin_id": 69, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 73, + "origin_id": 63, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 78, + "origin_id": 62, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 86, + "origin_id": -10, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 97, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 103, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 109, + "origin_id": -10, + "origin_slot": 1, + "target_id": 66, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 110, + "origin_id": -10, + "origin_slot": 2, + "target_id": 62, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 111, + "origin_id": -10, + "origin_slot": 3, + "target_id": 63, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 112, + "origin_id": -10, + "origin_slot": 4, + "target_id": 76, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 115, + "origin_id": -10, + "origin_slot": 5, + "target_id": 69, + "target_slot": 9, + "type": "FLOAT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Enhance", + "description": "Upscales images to higher resolution using Z-Image-Turbo." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG" + }, + "version": 0.4 +} diff --git a/blueprints/Image to Depth Map (Lotus).json b/blueprints/Image to Depth Map (Lotus).json index 5b3f7a1d6..12f10ba5b 100644 --- a/blueprints/Image to Depth Map (Lotus).json +++ b/blueprints/Image to Depth Map (Lotus).json @@ -1 +1,968 @@ -{"id": "6af0a6c1-0161-4528-8685-65776e838d44", "revision": 0, "last_node_id": 75, "last_link_id": 245, "nodes": [{"id": 75, "type": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", "pos": [600, 830], "size": [400, 110], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": null}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 75, "lastLinkId": 245, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "cb96b9fe-93e7-41cf-b27f-6d6dc3a1890b", "name": "unet_name", "type": "COMBO", "linkIds": [244], "pos": [46.625, -112.61268043518066]}, {"id": "42c8efad-1661-49c7-89b5-2b735b72424d", "name": "vae_name", "type": "COMBO", "linkIds": [245], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 10, "type": "UNETLoader", "pos": [108.05555555555557, -253.05555555555557], "size": [254.93706597222226, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 244}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 18, "type": "DisableNoise", "pos": [607.0641494069639, -268.33337840371513], "size": [175, 33.333333333333336], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [620, 160], "size": [175, 50], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 21, "type": "KSamplerSelect", "pos": [610, -60], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 19, "type": "BasicGuider", "pos": [610, -170], "size": [175, 50], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [890, -130], "size": [295.99609375, 271.65798611111114], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [620, 50], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 8, "type": "VAEDecode", "pos": [1210, -120], "size": [175, 50], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 22, "type": "ImageInvert", "pos": [1200, -220], "size": [175, 33.333333333333336], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 14, "type": "VAELoader", "pos": [120, -90], "size": [254.93706597222226, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 245}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 68, "type": "LotusConditioning", "pos": [400, -150], "size": [175, 33.333333333333336], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [170, 40], "size": [210, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}], "groups": [], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 244, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 245, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Depth to image"}]}, "config": {}, "extra": {"ds": {"scale": 1.3589709866044692, "offset": [-138.53613935617864, -786.0629126022195]}, "workflowRendererVersion": "LG"}, "version": 0.4} +{ + "id": "6af0a6c1-0161-4528-8685-65776e838d44", + "revision": 0, + "last_node_id": 75, + "last_link_id": 245, + "nodes": [ + { + "id": 75, + "type": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", + "pos": [ + 600, + 830 + ], + "size": [ + 400, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": null + }, + { + "label": "depth_intensity", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "sigma" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.14.1" + }, + "widgets_values": [ + 999.0000000000002, + "lotus-depth-d-v1-1.safetensors", + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", + "version": 1, + "state": { + "lastGroupId": 1, + "lastNodeId": 75, + "lastLinkId": 245, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Depth Map (Lotus)", + "inputNode": { + "id": -10, + "bounding": [ + -60, + -172.61268043518066, + 126.625, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1650, + -172.61268043518066, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", + "name": "pixels", + "type": "IMAGE", + "linkIds": [ + 37 + ], + "localized_name": "pixels", + "pos": [ + 46.625, + -152.61268043518066 + ] + }, + { + "id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", + "name": "sigma", + "type": "FLOAT", + "linkIds": [ + 243 + ], + "label": "depth_intensity", + "pos": [ + 46.625, + -132.61268043518066 + ] + }, + { + "id": "cb96b9fe-93e7-41cf-b27f-6d6dc3a1890b", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 244 + ], + "pos": [ + 46.625, + -112.61268043518066 + ] + }, + { + "id": "42c8efad-1661-49c7-89b5-2b735b72424d", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 245 + ], + "pos": [ + 46.625, + -92.61268043518066 + ] + } + ], + "outputs": [ + { + "id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 242 + ], + "localized_name": "IMAGE", + "pos": [ + 1670, + -152.61268043518066 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 10, + "type": "UNETLoader", + "pos": [ + 108.05555555555557, + -253.05555555555557 + ], + "size": [ + 254.93706597222226, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 244 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 31, + 241 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "lotus-depth-d-v1-1.safetensors", + "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", + "directory": "diffusion_models" + } + ], + "widget_ue_connectable": {} + }, + "widgets_values": [ + "lotus-depth-d-v1-1.safetensors", + "default" + ] + }, + { + "id": 18, + "type": "DisableNoise", + "pos": [ + 607.0641494069639, + -268.33337840371513 + ], + "size": [ + 175, + 33.333333333333336 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [ + 237 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "DisableNoise", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 23, + "type": "VAEEncode", + "pos": [ + 620, + 160 + ], + "size": [ + 175, + 50 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 37 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 38 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 201 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEEncode", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 21, + "type": "KSamplerSelect", + "pos": [ + 610, + -60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "KSamplerSelect", + "widget_ue_connectable": {} + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 19, + "type": "BasicGuider", + "pos": [ + 610, + -170 + ], + "size": [ + 175, + 50 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 241 + }, + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 238 + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicGuider", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 16, + "type": "SamplerCustomAdvanced", + "pos": [ + 890, + -130 + ], + "size": [ + 295.99609375, + 271.65798611111114 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 237 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 27 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 33 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 194 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 201 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 232 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SamplerCustomAdvanced", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SetFirstSigma", + "pos": [ + 620, + 50 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 66 + }, + { + "localized_name": "sigma", + "name": "sigma", + "type": "FLOAT", + "widget": { + "name": "sigma" + }, + "link": 243 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 194 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "SetFirstSigma", + "widget_ue_connectable": {} + }, + "widgets_values": [ + 999.0000000000002 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + -120 + ], + "size": [ + 175, + 50 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 232 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 240 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEDecode", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 22, + "type": "ImageInvert", + "pos": [ + 1200, + -220 + ], + "size": [ + 175, + 33.333333333333336 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 35 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 242 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "ImageInvert", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 14, + "type": "VAELoader", + "pos": [ + 120, + -90 + ], + "size": [ + 254.93706597222226, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 245 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 38, + 240 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "vae-ft-mse-840000-ema-pruned.safetensors", + "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", + "directory": "vae" + } + ], + "widget_ue_connectable": {} + }, + "widgets_values": [ + "vae-ft-mse-840000-ema-pruned.safetensors" + ] + }, + { + "id": 68, + "type": "LotusConditioning", + "pos": [ + 400, + -150 + ], + "size": [ + 175, + 33.333333333333336 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 238 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "LotusConditioning", + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 20, + "type": "BasicScheduler", + "pos": [ + 170, + 40 + ], + "size": [ + 210, + 106 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 31 + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 66 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "BasicScheduler", + "widget_ue_connectable": {} + }, + "widgets_values": [ + "normal", + 1, + 1 + ] + } + ], + "groups": [], + "links": [ + { + "id": 232, + "origin_id": 16, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 240, + "origin_id": 14, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 237, + "origin_id": 18, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 27, + "origin_id": 19, + "origin_slot": 0, + "target_id": 16, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 33, + "origin_id": 21, + "origin_slot": 0, + "target_id": 16, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 194, + "origin_id": 28, + "origin_slot": 0, + "target_id": 16, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 201, + "origin_id": 23, + "origin_slot": 0, + "target_id": 16, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 241, + "origin_id": 10, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 238, + "origin_id": 68, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 31, + "origin_id": 10, + "origin_slot": 0, + "target_id": 20, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 35, + "origin_id": 8, + "origin_slot": 0, + "target_id": 22, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 38, + "origin_id": 14, + "origin_slot": 0, + "target_id": 23, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 66, + "origin_id": 20, + "origin_slot": 0, + "target_id": 28, + "target_slot": 0, + "type": "SIGMAS" + }, + { + "id": 37, + "origin_id": -10, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 242, + "origin_id": 22, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 243, + "origin_id": -10, + "origin_slot": 1, + "target_id": 28, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 244, + "origin_id": -10, + "origin_slot": 2, + "target_id": 10, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 245, + "origin_id": -10, + "origin_slot": 3, + "target_id": 14, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Depth to image", + "description": "Estimates a monocular depth map from an input image using the Lotus depth estimation model." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 1.3589709866044692, + "offset": [ + -138.53613935617864, + -786.0629126022195 + ] + }, + "workflowRendererVersion": "LG" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Image to Layers(Qwen-Image Layered).json b/blueprints/Image to Layers(Qwen-Image Layered).json deleted file mode 100644 index f4c7f0b5f..000000000 --- a/blueprints/Image to Layers(Qwen-Image Layered).json +++ /dev/null @@ -1 +0,0 @@ -{"id": "1a761372-7c82-4016-b9bf-fa285967e1e9", "revision": 0, "last_node_id": 83, "last_link_id": 0, "nodes": [{"id": 83, "type": "f754a936-daaf-4b6e-9658-41fdc54d301d", "pos": [61.999827823554256, 153.3332507624185], "size": [400, 550], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"name": "layers", "type": "INT", "widget": {"name": "layers"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "steps"], ["-1", "cfg"], ["-1", "layers"], ["3", "seed"], ["3", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 20, 2.5, 2]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "f754a936-daaf-4b6e-9658-41fdc54d301d", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 83, "lastLinkId": 159, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Layers (Qwen-Image-Layered)", "inputNode": {"id": -10, "bounding": [-510, 523, 120, 140]}, "outputNode": {"id": -20, "bounding": [1160, 523, 120, 60]}, "inputs": [{"id": "6c36b5bc-c9a5-4b07-8b52-6fe0df434cce", "name": "image", "type": "IMAGE", "linkIds": [148, 149], "localized_name": "image", "pos": [-410, 543]}, {"id": "8497fe33-124d-4e3e-9ab6-fc4a56a98dde", "name": "text", "type": "STRING", "linkIds": [150], "pos": [-410, 563]}, {"id": "509ab2c1-e6da-47ba-8714-023100ab92bd", "name": "steps", "type": "INT", "linkIds": [153], "pos": [-410, 583]}, {"id": "dd81894e-5def-4c75-9b17-d8f89fe095d6", "name": "cfg", "type": "FLOAT", "linkIds": [154], "pos": [-410, 603]}, {"id": "66da7c8a-3369-4a3f-92f2-3073afc55e7d", "name": "layers", "type": "INT", "linkIds": [159], "pos": [-410, 623]}], "outputs": [{"id": "7df75921-6729-4aad-bfc1-fcc536c2d298", "name": "IMAGE", "type": "IMAGE", "linkIds": [110], "localized_name": "IMAGE", "pos": [1180, 543]}], "widgets": [], "nodes": [{"id": 38, "type": "CLIPLoader", "pos": [-320, 310], "size": [346.7470703125, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"Node name for S&R": "CLIPLoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 39, "type": "VAELoader", "pos": [-320, 460], "size": [346.7470703125, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 139]}], "properties": {"Node name for S&R": "VAELoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_image_layered_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/vae/qwen_image_layered_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_layered_vae.safetensors"]}, {"id": 7, "type": "CLIPTextEncode", "pos": [70, 420], "size": [425.27801513671875, 180.6060791015625], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [131]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"Node name for S&R": "CLIPTextEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 70, "type": "ReferenceLatent", "pos": [330, 670], "size": [204.1666717529297, 46], "flags": {"collapsed": true}, "order": 9, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 131}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 134}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [132]}], "properties": {"Node name for S&R": "ReferenceLatent", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 69, "type": "ReferenceLatent", "pos": [330, 710], "size": [204.1666717529297, 46], "flags": {"collapsed": true}, "order": 8, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 129}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 133}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [130]}], "properties": {"Node name for S&R": "ReferenceLatent", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 66, "type": "ModelSamplingAuraFlow", "pos": [530, 150], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 126}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [125]}], "properties": {"Node name for S&R": "ModelSamplingAuraFlow", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 76, "type": "LatentCutToBatch", "pos": [830, 160], "size": [270, 82], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 142}, {"localized_name": "dim", "name": "dim", "type": "COMBO", "widget": {"name": "dim"}, "link": null}, {"localized_name": "slice_size", "name": "slice_size", "type": "INT", "widget": {"name": "slice_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [143]}], "properties": {"Node name for S&R": "LatentCutToBatch", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["t", 1]}, {"id": 71, "type": "VAEEncode", "pos": [100, 690], "size": [140, 46], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 149}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 139}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [133, 134]}], "properties": {"Node name for S&R": "VAEEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [850, 310], "size": [210, 46], "flags": {"collapsed": true}, "order": 7, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 143}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [110]}], "properties": {"Node name for S&R": "VAEDecode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 6, "type": "CLIPTextEncode", "pos": [70, 180], "size": [422.84503173828125, 164.31304931640625], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 150}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [129]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"Node name for S&R": "CLIPTextEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 3, "type": "KSampler", "pos": [530, 280], "size": [270, 400], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 125}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 130}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 132}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 157}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": 153}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": 154}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [142]}], "properties": {"Node name for S&R": "KSampler", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 20, 2.5, "euler", "simple", 1]}, {"id": 78, "type": "GetImageSize", "pos": [80, 790], "size": [210, 136], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 148}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [155]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [156]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"Node name for S&R": "GetImageSize", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 83, "type": "EmptyQwenImageLayeredLatentImage", "pos": [320, 790], "size": [330.9341796875, 130], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 155}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 156}, {"localized_name": "layers", "name": "layers", "type": "INT", "widget": {"name": "layers"}, "link": 159}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [157]}], "properties": {"Node name for S&R": "EmptyQwenImageLayeredLatentImage", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [640, 640, 2, 1]}, {"id": 37, "type": "UNETLoader", "pos": [-320, 180], "size": [346.7470703125, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [126]}], "properties": {"Node name for S&R": "UNETLoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_image_layered_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_layered_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_layered_bf16.safetensors", "default"]}], "groups": [{"id": 1, "title": "Prompt(Optional)", "bounding": [60, 110, 450, 510], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Load Models", "bounding": [-330, 110, 366.7470703125, 421.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 131, "origin_id": 7, "origin_slot": 0, "target_id": 70, "target_slot": 0, "type": "CONDITIONING"}, {"id": 134, "origin_id": 71, "origin_slot": 0, "target_id": 70, "target_slot": 1, "type": "LATENT"}, {"id": 129, "origin_id": 6, "origin_slot": 0, "target_id": 69, "target_slot": 0, "type": "CONDITIONING"}, {"id": 133, "origin_id": 71, "origin_slot": 0, "target_id": 69, "target_slot": 1, "type": "LATENT"}, {"id": 126, "origin_id": 37, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "MODEL"}, {"id": 125, "origin_id": 66, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 130, "origin_id": 69, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 132, "origin_id": 70, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 142, "origin_id": 3, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "LATENT"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 139, "origin_id": 39, "origin_slot": 0, "target_id": 71, "target_slot": 1, "type": "VAE"}, {"id": 143, "origin_id": 76, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 148, "origin_id": -10, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 149, "origin_id": -10, "origin_slot": 0, "target_id": 71, "target_slot": 0, "type": "IMAGE"}, {"id": 110, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 150, "origin_id": -10, "origin_slot": 1, "target_id": 6, "target_slot": 1, "type": "STRING"}, {"id": 153, "origin_id": -10, "origin_slot": 2, "target_id": 3, "target_slot": 5, "type": "INT"}, {"id": 154, "origin_id": -10, "origin_slot": 3, "target_id": 3, "target_slot": 6, "type": "FLOAT"}, {"id": 155, "origin_id": 78, "origin_slot": 0, "target_id": 83, "target_slot": 0, "type": "INT"}, {"id": 156, "origin_id": 78, "origin_slot": 1, "target_id": 83, "target_slot": 1, "type": "INT"}, {"id": 157, "origin_id": 83, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 159, "origin_id": -10, "origin_slot": 4, "target_id": 83, "target_slot": 2, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Image to layers"}]}, "config": {}, "extra": {"ds": {"scale": 1.14, "offset": [695.5933739308316, 6.855893974423647]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image to Layers(Qwen-Image-Layered).json b/blueprints/Image to Layers(Qwen-Image-Layered).json new file mode 100644 index 000000000..7b44f0563 --- /dev/null +++ b/blueprints/Image to Layers(Qwen-Image-Layered).json @@ -0,0 +1,1604 @@ +{ + "revision": 0, + "last_node_id": 176, + "last_link_id": 0, + "nodes": [ + { + "id": 176, + "type": "2d2e3c8e-53b3-4618-be52-6d1d99382f0e", + "pos": [ + -1150, + 200 + ], + "size": [ + 400, + 550 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "name": "layers", + "type": "INT", + "widget": { + "name": "layers" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "title": "Image to Layers (Qwen-Image-Layered)", + "properties": { + "proxyWidgets": [ + [ + "6", + "text" + ], + [ + "3", + "steps" + ], + [ + "3", + "cfg" + ], + [ + "83", + "layers" + ], + [ + "3", + "seed" + ], + [ + "37", + "unet_name" + ], + [ + "38", + "clip_name" + ], + [ + "39", + "vae_name" + ], + [ + "3", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2d2e3c8e-53b3-4618-be52-6d1d99382f0e", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 176, + "lastLinkId": 380, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Layers (Qwen-Image-Layered)", + "inputNode": { + "id": -10, + "bounding": [ + -720, + 720, + 120, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1160, + 523, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "6c36b5bc-c9a5-4b07-8b52-6fe0df434cce", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 148, + 149 + ], + "localized_name": "image", + "pos": [ + -620, + 740 + ] + }, + { + "id": "8497fe33-124d-4e3e-9ab6-fc4a56a98dde", + "name": "text", + "type": "STRING", + "linkIds": [ + 150 + ], + "pos": [ + -620, + 760 + ] + }, + { + "id": "509ab2c1-e6da-47ba-8714-023100ab92bd", + "name": "steps", + "type": "INT", + "linkIds": [ + 153 + ], + "pos": [ + -620, + 780 + ] + }, + { + "id": "dd81894e-5def-4c75-9b17-d8f89fe095d6", + "name": "cfg", + "type": "FLOAT", + "linkIds": [ + 154 + ], + "pos": [ + -620, + 800 + ] + }, + { + "id": "66da7c8a-3369-4a3f-92f2-3073afc55e7d", + "name": "layers", + "type": "INT", + "linkIds": [ + 159 + ], + "pos": [ + -620, + 820 + ] + }, + { + "id": "9f76338b-f4ca-4bb3-b61a-57b3f233061e", + "name": "seed", + "type": "INT", + "linkIds": [ + 377 + ], + "pos": [ + -620, + 840 + ] + }, + { + "id": "8d0422d5-5eee-4f7e-9817-dc613cc62eca", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 378 + ], + "pos": [ + -620, + 860 + ] + }, + { + "id": "552eece2-a735-4d00-ae78-ded454622bc1", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 379 + ], + "pos": [ + -620, + 880 + ] + }, + { + "id": "1e6d141c-d0f9-4a2b-895c-b6780e57cfa0", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 380 + ], + "pos": [ + -620, + 900 + ] + } + ], + "outputs": [ + { + "id": "7df75921-6729-4aad-bfc1-fcc536c2d298", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 110 + ], + "localized_name": "IMAGE", + "pos": [ + 1180, + 543 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -320, + 360 + ], + "size": [ + 350, + 150 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 379 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + -320, + 580 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 380 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 139 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "qwen_image_layered_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/vae/qwen_image_layered_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_layered_vae.safetensors" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 70, + 420 + ], + "size": [ + 430, + 190 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 75 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 131 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 70, + "type": "ReferenceLatent", + "pos": [ + 140, + 700 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 131 + }, + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 134 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 132 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "ReferenceLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 69, + "type": "ReferenceLatent", + "pos": [ + 160, + 820 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 129 + }, + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 133 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 130 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "ReferenceLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 66, + "type": "ModelSamplingAuraFlow", + "pos": [ + 530, + 150 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 126 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 125 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 76, + "type": "LatentCutToBatch", + "pos": [ + 830, + 140 + ], + "size": [ + 270, + 140 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 142 + }, + { + "localized_name": "dim", + "name": "dim", + "type": "COMBO", + "widget": { + "name": "dim" + }, + "link": null + }, + { + "localized_name": "slice_size", + "name": "slice_size", + "type": "INT", + "widget": { + "name": "slice_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 143 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "LatentCutToBatch", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "t", + 1 + ] + }, + { + "id": 71, + "type": "VAEEncode", + "pos": [ + -280, + 780 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 149 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 139 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 133, + 134 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "VAEEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 850, + 370 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 143 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 70, + 180 + ], + "size": [ + 430, + 170 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 74 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 150 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 129 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 530, + 340 + ], + "size": [ + 270, + 400 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 130 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 132 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 157 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 377 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 153 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 154 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 142 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 20, + 2.5, + "euler", + "simple", + 1 + ] + }, + { + "id": 78, + "type": "GetImageSize", + "pos": [ + -280, + 930 + ], + "size": [ + 230, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 148 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 155 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 156 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 83, + "type": "EmptyQwenImageLayeredLatentImage", + "pos": [ + -280, + 1120 + ], + "size": [ + 340, + 200 + ], + "flags": {}, + "order": 13, + "mode": 0, + "showAdvanced": true, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 155 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 156 + }, + { + "localized_name": "layers", + "name": "layers", + "type": "INT", + "widget": { + "name": "layers" + }, + "link": 159 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 157 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "EmptyQwenImageLayeredLatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 640, + 640, + 2, + 1 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -320, + 180 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 378 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 126 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {}, + "version": "7.7" + }, + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "qwen_image_layered_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_layered_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_layered_bf16.safetensors", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt(Optional)", + "bounding": [ + 60, + 110, + 450, + 510 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Load Models", + "bounding": [ + -330, + 110, + 370, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 75, + "origin_id": 38, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 131, + "origin_id": 7, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 134, + "origin_id": 71, + "origin_slot": 0, + "target_id": 70, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 129, + "origin_id": 6, + "origin_slot": 0, + "target_id": 69, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 133, + "origin_id": 71, + "origin_slot": 0, + "target_id": 69, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 126, + "origin_id": 37, + "origin_slot": 0, + "target_id": 66, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 125, + "origin_id": 66, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 130, + "origin_id": 69, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 132, + "origin_id": 70, + "origin_slot": 0, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 142, + "origin_id": 3, + "origin_slot": 0, + "target_id": 76, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 74, + "origin_id": 38, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 139, + "origin_id": 39, + "origin_slot": 0, + "target_id": 71, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 143, + "origin_id": 76, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 76, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 148, + "origin_id": -10, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 149, + "origin_id": -10, + "origin_slot": 0, + "target_id": 71, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 110, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 150, + "origin_id": -10, + "origin_slot": 1, + "target_id": 6, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 153, + "origin_id": -10, + "origin_slot": 2, + "target_id": 3, + "target_slot": 5, + "type": "INT" + }, + { + "id": 154, + "origin_id": -10, + "origin_slot": 3, + "target_id": 3, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 155, + "origin_id": 78, + "origin_slot": 0, + "target_id": 83, + "target_slot": 0, + "type": "INT" + }, + { + "id": 156, + "origin_id": 78, + "origin_slot": 1, + "target_id": 83, + "target_slot": 1, + "type": "INT" + }, + { + "id": 157, + "origin_id": 83, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 159, + "origin_id": -10, + "origin_slot": 4, + "target_id": 83, + "target_slot": 2, + "type": "INT" + }, + { + "id": 377, + "origin_id": -10, + "origin_slot": 5, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 378, + "origin_id": -10, + "origin_slot": 6, + "target_id": 37, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 379, + "origin_id": -10, + "origin_slot": 7, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 380, + "origin_id": -10, + "origin_slot": 8, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Image to layers", + "description": "Decomposes an image into variable-resolution RGBA layers for independent editing using Qwen-Image-Layered." + } + ] + }, + "extra": { + "ds": { + "scale": 1.14, + "offset": [ + 695.5933739308316, + 6.855893974423647 + ] + }, + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Image to Model (Hunyuan3d 2.1).json b/blueprints/Image to Model (Hunyuan3d 2.1).json index 04b2d9bc9..ee5552656 100644 --- a/blueprints/Image to Model (Hunyuan3d 2.1).json +++ b/blueprints/Image to Model (Hunyuan3d 2.1).json @@ -1 +1,785 @@ -{"id": "8fe311ec-2147-47a8-b618-7bd6fb6d4f9d", "revision": 0, "last_node_id": 23, "last_link_id": 24, "nodes": [{"id": 19, "type": "feb7d184-edf3-4851-9fd6-57a92c00ec42", "pos": [277.7327250391088, 256.4066470374603], "size": [340, 70], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}], "outputs": [{"localized_name": "MESH", "name": "MESH", "type": "MESH", "links": []}], "properties": {"proxyWidgets": [["-1", "ckpt_name"]], "cnr_id": "comfy-core", "ver": "0.3.65"}, "widgets_values": ["hunyuan_3d_v2.1.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "feb7d184-edf3-4851-9fd6-57a92c00ec42", "version": 1, "state": {"lastGroupId": 2, "lastNodeId": 23, "lastLinkId": 24, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Model (Hunyuan3d 2.1)", "inputNode": {"id": -10, "bounding": [-138.94803619384766, -392.62060546875, 120, 80]}, "outputNode": {"id": -20, "bounding": [1090, -310, 120, 60]}, "inputs": [{"id": "ab9b5b83-88f9-4698-954d-93f644bd07aa", "name": "image", "type": "IMAGE", "linkIds": [21], "localized_name": "image", "pos": [-38.948036193847656, -372.62060546875]}, {"id": "e15b0ba4-b5fe-41eb-9266-006ce1f1cf79", "name": "ckpt_name", "type": "COMBO", "linkIds": [23], "pos": [-38.948036193847656, -352.62060546875]}], "outputs": [{"id": "c8744662-e812-49b3-8bc8-744d557db6d6", "name": "MESH", "type": "MESH", "linkIds": [11], "localized_name": "MESH", "pos": [1110, -290]}], "widgets": [], "nodes": [{"id": 7, "type": "KSampler", "pos": [760, -510], "size": [270, 262], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 19}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "KSampler"}, "widgets_values": [894796671366012, "randomize", 30, 5, "euler", "normal", 1]}, {"id": 13, "type": "CLIPVisionEncode", "pos": [450, -410], "size": [270, 80], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip_vision", "name": "clip_vision", "type": "CLIP_VISION", "link": 20}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 21}, {"localized_name": "crop", "name": "crop", "type": "COMBO", "widget": {"name": "crop"}, "link": null}], "outputs": [{"localized_name": "CLIP_VISION_OUTPUT", "name": "CLIP_VISION_OUTPUT", "type": "CLIP_VISION_OUTPUT", "links": [22]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "CLIPVisionEncode"}, "widgets_values": ["center"]}, {"id": 6, "type": "Hunyuan3Dv2Conditioning", "pos": [510, -280], "size": [217.82578125, 46], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip_vision_output", "name": "clip_vision_output", "type": "CLIP_VISION_OUTPUT", "link": 22}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [5]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "Hunyuan3Dv2Conditioning"}, "widgets_values": []}, {"id": 4, "type": "EmptyLatentHunyuan3Dv2", "pos": [450, -180], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "resolution", "name": "resolution", "type": "INT", "widget": {"name": "resolution"}, "link": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "EmptyLatentHunyuan3Dv2"}, "widgets_values": [4096, 1]}, {"id": 9, "type": "VoxelToMesh", "pos": [760, -40], "size": [270, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "voxel", "name": "voxel", "type": "VOXEL", "link": 10}, {"localized_name": "algorithm", "name": "algorithm", "type": "COMBO", "widget": {"name": "algorithm"}, "link": null}, {"localized_name": "threshold", "name": "threshold", "type": "FLOAT", "widget": {"name": "threshold"}, "link": null}], "outputs": [{"localized_name": "MESH", "name": "MESH", "type": "MESH", "links": [11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "VoxelToMesh"}, "widgets_values": ["surface net", 0.6]}, {"id": 8, "type": "VAEDecodeHunyuan3D", "pos": [760, -200], "size": [270, 102], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 8}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 18}, {"localized_name": "num_chunks", "name": "num_chunks", "type": "INT", "widget": {"name": "num_chunks"}, "link": null}, {"localized_name": "octree_resolution", "name": "octree_resolution", "type": "INT", "widget": {"name": "octree_resolution"}, "link": null}], "outputs": [{"localized_name": "VOXEL", "name": "VOXEL", "type": "VOXEL", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "VAEDecodeHunyuan3D"}, "widgets_values": [8000, 256]}, {"id": 1, "type": "ImageOnlyCheckpointLoader", "pos": [60, -510], "size": [356.0005859375, 100], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 23}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [16]}, {"localized_name": "CLIP_VISION", "name": "CLIP_VISION", "type": "CLIP_VISION", "links": [20]}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageOnlyCheckpointLoader", "models": [{"name": "hunyuan_3d_v2.1.safetensors", "url": "https://huggingface.co/Comfy-Org/hunyuan3D_2.1_repackaged/resolve/main/hunyuan_3d_v2.1.safetensors", "directory": "checkpoints"}]}, "widgets_values": ["hunyuan_3d_v2.1.safetensors"]}, {"id": 3, "type": "ModelSamplingAuraFlow", "pos": [450, -510], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 16}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [19]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [1]}], "groups": [], "links": [{"id": 16, "origin_id": 1, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 19, "origin_id": 3, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 6, "origin_slot": 1, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 4, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 7, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 18, "origin_id": 1, "origin_slot": 2, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 10, "origin_id": 8, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "VOXEL"}, {"id": 20, "origin_id": 1, "origin_slot": 1, "target_id": 13, "target_slot": 0, "type": "CLIP_VISION"}, {"id": 22, "origin_id": 13, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP_VISION_OUTPUT"}, {"id": 21, "origin_id": -10, "origin_slot": 0, "target_id": 13, "target_slot": 1, "type": "IMAGE"}, {"id": 11, "origin_id": 9, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MESH"}, {"id": 23, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "3D/Image to 3D Model"}]}, "config": {}, "extra": {"ds": {"scale": 0.620921323059155, "offset": [1636.2881100217016, 965.23503257945]}, "workflowRendererVersion": "LG"}, "version": 0.4} +{ + "id": "8fe311ec-2147-47a8-b618-7bd6fb6d4f9d", + "revision": 0, + "last_node_id": 23, + "last_link_id": 24, + "nodes": [ + { + "id": 19, + "type": "feb7d184-edf3-4851-9fd6-57a92c00ec42", + "pos": [ + 277.7327250391088, + 256.4066470374603 + ], + "size": [ + 340, + 70 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MESH", + "name": "MESH", + "type": "MESH", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "ckpt_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.65" + }, + "widgets_values": [ + "hunyuan_3d_v2.1.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "feb7d184-edf3-4851-9fd6-57a92c00ec42", + "version": 1, + "state": { + "lastGroupId": 2, + "lastNodeId": 23, + "lastLinkId": 24, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to 3D Model (Hunyuan3d 2.1)", + "inputNode": { + "id": -10, + "bounding": [ + -138.94803619384766, + -392.62060546875, + 120, + 80 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1090, + -310, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "ab9b5b83-88f9-4698-954d-93f644bd07aa", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 21 + ], + "localized_name": "image", + "pos": [ + -38.948036193847656, + -372.62060546875 + ] + }, + { + "id": "e15b0ba4-b5fe-41eb-9266-006ce1f1cf79", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 23 + ], + "pos": [ + -38.948036193847656, + -352.62060546875 + ] + } + ], + "outputs": [ + { + "id": "c8744662-e812-49b3-8bc8-744d557db6d6", + "name": "MESH", + "type": "MESH", + "linkIds": [ + 11 + ], + "localized_name": "MESH", + "pos": [ + 1110, + -290 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 7, + "type": "KSampler", + "pos": [ + 760, + -510 + ], + "size": [ + 270, + 262 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 19 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 5 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 7 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 8 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 894796671366012, + "randomize", + 30, + 5, + "euler", + "normal", + 1 + ] + }, + { + "id": 13, + "type": "CLIPVisionEncode", + "pos": [ + 450, + -410 + ], + "size": [ + 270, + 80 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_vision", + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 20 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 21 + }, + { + "localized_name": "crop", + "name": "crop", + "type": "COMBO", + "widget": { + "name": "crop" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP_VISION_OUTPUT", + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 22 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "center" + ] + }, + { + "id": 6, + "type": "Hunyuan3Dv2Conditioning", + "pos": [ + 510, + -280 + ], + "size": [ + 217.82578125, + 46 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_vision_output", + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "link": 22 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 5 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 6 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "Hunyuan3Dv2Conditioning" + }, + "widgets_values": [] + }, + { + "id": 4, + "type": "EmptyLatentHunyuan3Dv2", + "pos": [ + 450, + -180 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "resolution", + "name": "resolution", + "type": "INT", + "widget": { + "name": "resolution" + }, + "link": null + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 7 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "EmptyLatentHunyuan3Dv2" + }, + "widgets_values": [ + 4096, + 1 + ] + }, + { + "id": 9, + "type": "VoxelToMesh", + "pos": [ + 760, + -40 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "voxel", + "name": "voxel", + "type": "VOXEL", + "link": 10 + }, + { + "localized_name": "algorithm", + "name": "algorithm", + "type": "COMBO", + "widget": { + "name": "algorithm" + }, + "link": null + }, + { + "localized_name": "threshold", + "name": "threshold", + "type": "FLOAT", + "widget": { + "name": "threshold" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MESH", + "name": "MESH", + "type": "MESH", + "links": [ + 11 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "VoxelToMesh" + }, + "widgets_values": [ + "surface net", + 0.6 + ] + }, + { + "id": 8, + "type": "VAEDecodeHunyuan3D", + "pos": [ + 760, + -200 + ], + "size": [ + 270, + 102 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 8 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 18 + }, + { + "localized_name": "num_chunks", + "name": "num_chunks", + "type": "INT", + "widget": { + "name": "num_chunks" + }, + "link": null + }, + { + "localized_name": "octree_resolution", + "name": "octree_resolution", + "type": "INT", + "widget": { + "name": "octree_resolution" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VOXEL", + "name": "VOXEL", + "type": "VOXEL", + "links": [ + 10 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "VAEDecodeHunyuan3D" + }, + "widgets_values": [ + 8000, + 256 + ] + }, + { + "id": 1, + "type": "ImageOnlyCheckpointLoader", + "pos": [ + 60, + -510 + ], + "size": [ + 356.0005859375, + 100 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 23 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 16 + ] + }, + { + "localized_name": "CLIP_VISION", + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 20 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 18 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ImageOnlyCheckpointLoader", + "models": [ + { + "name": "hunyuan_3d_v2.1.safetensors", + "url": "https://huggingface.co/Comfy-Org/hunyuan3D_2.1_repackaged/resolve/main/hunyuan_3d_v2.1.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "hunyuan_3d_v2.1.safetensors" + ] + }, + { + "id": 3, + "type": "ModelSamplingAuraFlow", + "pos": [ + 450, + -510 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 16 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 19 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.59", + "Node name for S&R": "ModelSamplingAuraFlow" + }, + "widgets_values": [ + 1 + ] + } + ], + "groups": [], + "links": [ + { + "id": 16, + "origin_id": 1, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 19, + "origin_id": 3, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 5, + "origin_id": 6, + "origin_slot": 0, + "target_id": 7, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 6, + "origin_id": 6, + "origin_slot": 1, + "target_id": 7, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 7, + "origin_id": 4, + "origin_slot": 0, + "target_id": 7, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 8, + "origin_id": 7, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 18, + "origin_id": 1, + "origin_slot": 2, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 10, + "origin_id": 8, + "origin_slot": 0, + "target_id": 9, + "target_slot": 0, + "type": "VOXEL" + }, + { + "id": 20, + "origin_id": 1, + "origin_slot": 1, + "target_id": 13, + "target_slot": 0, + "type": "CLIP_VISION" + }, + { + "id": 22, + "origin_id": 13, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CLIP_VISION_OUTPUT" + }, + { + "id": 21, + "origin_id": -10, + "origin_slot": 0, + "target_id": 13, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 11, + "origin_id": 9, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MESH" + }, + { + "id": 23, + "origin_id": -10, + "origin_slot": 1, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "3D/Image to 3D Model", + "description": "Generates 3D mesh models from a single input image using Hunyuan3D 2.0/2.1." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 0.620921323059155, + "offset": [ + 1636.2881100217016, + 965.23503257945 + ] + }, + "workflowRendererVersion": "LG" + }, + "version": 0.4 +} diff --git a/blueprints/Image to Video (LTX-2.3).json b/blueprints/Image to Video (LTX-2.3).json new file mode 100644 index 000000000..3db524ea0 --- /dev/null +++ b/blueprints/Image to Video (LTX-2.3).json @@ -0,0 +1,4234 @@ +{ + "revision": 0, + "last_node_id": 320, + "last_link_id": 0, + "nodes": [ + { + "id": 320, + "type": "2454ad83-157c-40dd-9f19-5daaf4041ce0", + "pos": [ + 30, + 4150 + ], + "size": [ + 390, + 466.625 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "first_frame", + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": null + }, + { + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "width", + "name": "value_2", + "type": "INT", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "height", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "label": "duration", + "name": "value_4", + "type": "INT", + "widget": { + "name": "value_4" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "label": "distilled_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + }, + { + "label": "latent_upscale_model", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + }, + { + "label": "fps", + "name": "value_5", + "type": "INT", + "widget": { + "name": "value_5" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "title": "Image to Video (LTX-2.3)", + "properties": { + "proxyWidgets": [ + [ + "319", + "value" + ], + [ + "312", + "value" + ], + [ + "299", + "value" + ], + [ + "301", + "value" + ], + [ + "300", + "value" + ], + [ + "316", + "ckpt_name" + ], + [ + "277", + "control_after_generate" + ], + [ + "277", + "noise_seed" + ], + [ + "285", + "lora_name" + ], + [ + "317", + "text_encoder" + ], + [ + "311", + "model_name" + ] + ], + "ue_properties": { + "widget_ue_connectable": { + "value_1": true, + "value_2": true, + "value_3": true, + "value_4": true, + "lora_name": true, + "model_name": true, + "value_5": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2454ad83-157c-40dd-9f19-5daaf4041ce0", + "version": 1, + "state": { + "lastGroupId": 25, + "lastNodeId": 323, + "lastLinkId": 631, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Video (LTX-2.3)", + "inputNode": { + "id": -10, + "bounding": [ + 730, + 4110, + 162.162109375, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 6590, + 4360, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "7afd6ea8-c738-4fd9-97b8-66fa905cd381", + "name": "input", + "type": "IMAGE,MASK", + "linkIds": [ + 535 + ], + "localized_name": "input", + "label": "first_frame", + "pos": [ + 872.162109375, + 4130 + ] + }, + { + "id": "9494c550-4172-49c6-930e-5b508f775e77", + "name": "value", + "type": "STRING", + "linkIds": [ + 595 + ], + "pos": [ + 872.162109375, + 4150 + ] + }, + { + "id": "58dbb3f6-f924-4548-96ef-e0e34610bd4e", + "name": "value_2", + "type": "INT", + "linkIds": [ + 597 + ], + "label": "width", + "pos": [ + 872.162109375, + 4170 + ] + }, + { + "id": "6086d5b8-2586-448c-a641-dd14d76dd102", + "name": "value_3", + "type": "INT", + "linkIds": [ + 598 + ], + "label": "height", + "pos": [ + 872.162109375, + 4190 + ] + }, + { + "id": "feb8c2eb-ae48-4fa8-bc24-929552d656c3", + "name": "value_4", + "type": "INT", + "linkIds": [ + 599 + ], + "label": "duration", + "pos": [ + 872.162109375, + 4210 + ] + }, + { + "id": "d7255058-319a-4880-8f9a-7e542c8f3c3c", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 601, + 604, + 605 + ], + "pos": [ + 872.162109375, + 4230 + ] + }, + { + "id": "4afce68d-8f65-4342-9d6d-ae0a7688c3e3", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 602 + ], + "label": "distilled_lora", + "pos": [ + 872.162109375, + 4250 + ] + }, + { + "id": "ab842b4b-c977-4679-b421-424722785b57", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 606 + ], + "pos": [ + 872.162109375, + 4270 + ] + }, + { + "id": "9e47372d-28d9-4311-91e9-e90d03f4eb43", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 607 + ], + "label": "latent_upscale_model", + "pos": [ + 872.162109375, + 4290 + ] + }, + { + "id": "3e32ce15-0ae7-4cd0-909f-a354e8e9c4c9", + "name": "value_5", + "type": "INT", + "linkIds": [ + 624 + ], + "label": "fps", + "pos": [ + 872.162109375, + 4310 + ] + } + ], + "outputs": [ + { + "id": "954ef307-c897-4eea-8b5c-5c6ce15a5357", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 536 + ], + "localized_name": "VIDEO", + "pos": [ + 6610, + 4380 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 276, + "type": "RandomNoise", + "pos": [ + 4700, + 3650 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 490 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 42, + "fixed" + ] + }, + { + "id": 277, + "type": "RandomNoise", + "pos": [ + 3160, + 3630 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 483 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 519681071352364, + "randomize" + ] + }, + { + "id": 278, + "type": "LTXVConcatAVLatent", + "pos": [ + 4710, + 4490 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 512 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 513 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 494 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 279, + "type": "LTXVAudioVAELoader", + "pos": [ + 1660, + 4100 + ], + "size": [ + 430, + 110 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 604 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 481, + 496 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVAudioVAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-dev-fp8.safetensors" + ] + }, + { + "id": 280, + "type": "KSamplerSelect", + "pos": [ + 4700, + 4100 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 492 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler_cfg_pp" + ] + }, + { + "id": 281, + "type": "ManualSigmas", + "pos": [ + 4700, + 4290 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 493 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "0.85, 0.7250, 0.4219, 0.0" + ] + }, + { + "id": 282, + "type": "CFGGuider", + "pos": [ + 4700, + 3850 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 478 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 479 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 480 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 491 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.71", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 283, + "type": "SamplerCustomAdvanced", + "pos": [ + 3550, + 3630 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 483 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 484 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 485 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 544 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 487 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 488 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 284, + "type": "LTXVCropGuides", + "pos": [ + 3830, + 3810 + ], + "size": [ + 250, + 120 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 475 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 476 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 477 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 479 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 480 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 285, + "type": "LoraLoaderModelOnly", + "pos": [ + 1660, + 3890 + ], + "size": [ + 430, + 140 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 520 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 602 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 478, + 541 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-lora-384.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-22b-distilled-lora-384.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-lora-384.safetensors", + 0.5 + ] + }, + { + "id": 286, + "type": "ResizeImagesByLongerEdge", + "pos": [ + 2070, + 4810 + ], + "size": [ + 310, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 523 + }, + { + "localized_name": "longer_edge", + "name": "longer_edge", + "type": "INT", + "widget": { + "name": "longer_edge" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 505 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ResizeImagesByLongerEdge", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1536 + ] + }, + { + "id": 287, + "type": "LTXVLatentUpsampler", + "pos": [ + 4250, + 3760 + ], + "size": [ + 330, + 120 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 547 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 545 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 554 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 548 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "LTXVLatentUpsampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 288, + "type": "LTXVImgToVideoInplace", + "pos": [ + 4230, + 4100 + ], + "size": [ + 300, + 180 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 552 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 515 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 548 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 543 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 512 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 289, + "type": "LTXVPreprocess", + "pos": [ + 2100, + 5010 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 505 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 510, + 515 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 18 + ] + }, + { + "id": 290, + "type": "ResizeImageMaskNode", + "pos": [ + 1660, + 4810 + ], + "size": [ + 300, + 160 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 535 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 558 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 559 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 523 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 1920, + 1088, + "center", + "lanczos" + ] + }, + { + "id": 291, + "type": "KSamplerSelect", + "pos": [ + 3160, + 4040 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 485 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler_ancestral_cfg_pp" + ] + }, + { + "id": 292, + "type": "ComfyMathExpression", + "pos": [ + 2540, + 4830 + ], + "size": [ + 210, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 18, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 560 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 561 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a/2" + ] + }, + { + "id": 293, + "type": "Reroute", + "pos": [ + 3850, + 4050 + ], + "size": [ + 230, + 40 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 557 + } + ], + "outputs": [ + { + "name": "", + "type": "VAE", + "links": [ + 552, + 553, + 554 + ] + } + ], + "properties": { + "showOutputText": false, + "horizontal": false, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 294, + "type": "ComfyMathExpression", + "pos": [ + 2550, + 4890 + ], + "size": [ + 210, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 20, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 562 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 563 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a/2" + ] + }, + { + "id": 295, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 2870, + 4940 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 561 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 563 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 631 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 511 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 296, + "type": "LTXVImgToVideoInplace", + "pos": [ + 3230, + 4810 + ], + "size": [ + 280, + 180 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 556 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 510 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 511 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 542 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 497 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0.7, + false + ] + }, + { + "id": 297, + "type": "LTXVAudioVAEDecode", + "pos": [ + 5760, + 3970 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 495 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 496 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 534 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 298, + "type": "ComfyMathExpression", + "pos": [ + 2540, + 5030 + ], + "size": [ + 210, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 24, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 564 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 566, + 591 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 565 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a" + ] + }, + { + "id": 299, + "type": "PrimitiveInt", + "pos": [ + 1190, + 4650 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 598 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 559, + 562 + ] + } + ], + "title": "Height", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 720, + "fixed" + ] + }, + { + "id": 300, + "type": "PrimitiveInt", + "pos": [ + 1190, + 4840 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 624 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 564, + 629 + ] + } + ], + "title": "Frame Rate", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25, + "fixed" + ] + }, + { + "id": 301, + "type": "PrimitiveInt", + "pos": [ + 1190, + 4280 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 599 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 628 + ] + } + ], + "title": "Duration", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5, + "fixed" + ] + }, + { + "id": 302, + "type": "PrimitiveBoolean", + "pos": [ + 1190, + 4110 + ], + "size": [ + 370, + 100 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 542, + 543 + ] + } + ], + "title": "Switch to Text to Video?", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.0", + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 303, + "type": "CLIPTextEncode", + "pos": [ + 2170, + 3640 + ], + "size": [ + 600, + 390 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 615 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 625 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 526 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 304, + "type": "LTXVConditioning", + "pos": [ + 2800, + 3810 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 526 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 527 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 566 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 475, + 518 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 476, + 519 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 305, + "type": "LTXVEmptyLatentAudio", + "pos": [ + 3540, + 4960 + ], + "size": [ + 280, + 170 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 481 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 630 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 565 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 498 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 306, + "type": "ManualSigmas", + "pos": [ + 3160, + 4220 + ], + "size": [ + 500, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 544 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 307, + "type": "LTXVSeparateAVLatent", + "pos": [ + 3820, + 3630 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 488 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 477, + 547 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 513 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 308, + "type": "SamplerCustomAdvanced", + "pos": [ + 5050, + 3650 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 490 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 491 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 492 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 493 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 494 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 578 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 309, + "type": "LTXVSeparateAVLatent", + "pos": [ + 5390, + 3650 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 578 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 539 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 495 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 310, + "type": "CreateVideo", + "pos": [ + 6050, + 4490 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 538 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 534 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 591 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 536 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 311, + "type": "LatentUpscaleModelLoader", + "pos": [ + 1670, + 4550 + ], + "size": [ + 400, + 110 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 607 + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "links": [ + 545 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LatentUpscaleModelLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors", + "directory": "latent_upscale_models" + } + ] + }, + "widgets_values": [ + "ltx-2.3-spatial-upscaler-x2-1.1.safetensors" + ] + }, + { + "id": 312, + "type": "PrimitiveInt", + "pos": [ + 1190, + 4470 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 597 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 558, + 560 + ] + } + ], + "title": "Width", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1280, + "fixed" + ] + }, + { + "id": 313, + "type": "CLIPTextEncode", + "pos": [ + 2180, + 4120 + ], + "size": [ + 600, + 170 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 627 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 527 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "pc game, console game, video game, cartoon, childish, ugly" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 314, + "type": "CFGGuider", + "pos": [ + 3160, + 3810 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 541 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 518 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 519 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 484 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 315, + "type": "VAEDecodeTiled", + "pos": [ + 5750, + 3610 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 539 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 553 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 538 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 64, + 4096, + 4 + ] + }, + { + "id": 316, + "type": "CheckpointLoaderSimple", + "pos": [ + 1660, + 3660 + ], + "size": [ + 430, + 160 + ], + "flags": {}, + "order": 40, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 601 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 520 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 556, + 557 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-dev-fp8.safetensors" + ] + }, + { + "id": 317, + "type": "LTXAVTextEncoderLoader", + "pos": [ + 1660, + 4280 + ], + "size": [ + 430, + 170 + ], + "flags": {}, + "order": 41, + "mode": 0, + "showAdvanced": false, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 606 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 605 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 615, + 627 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXAVTextEncoderLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + }, + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2.3-22b-dev-fp8.safetensors", + "default" + ] + }, + { + "id": 318, + "type": "LTXVConcatAVLatent", + "pos": [ + 3860, + 4830 + ], + "size": [ + 240, + 100 + ], + "flags": {}, + "order": 42, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 497 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 498 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 487 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 319, + "type": "PrimitiveStringMultiline", + "pos": [ + 1190, + 3680 + ], + "size": [ + 370, + 350 + ], + "flags": {}, + "order": 43, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 595 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 625 + ] + } + ], + "title": "Prompt", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveStringMultiline", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 323, + "type": "ComfyMathExpression", + "pos": [ + 1210, + 5040 + ], + "size": [ + 360, + 210 + ], + "flags": { + "collapsed": true + }, + "order": 44, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 628 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 629 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 630, + 631 + ] + } + ], + "title": "Math Expression (length)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a * b + 1" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + 1630, + 3550, + 480, + 1140 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Generate Low Resolution", + "bounding": [ + 3130, + 3550, + 1000, + 1140 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + 2140, + 3550, + 960, + 1140 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Generate High Resolution", + "bounding": [ + 4670, + 3550, + 990, + 1130 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Lantent Upscale", + "bounding": [ + 4160, + 3550, + 480, + 1130 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 19, + "title": "Video Settings", + "bounding": [ + 1150, + 3550, + 460, + 1610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 20, + "title": "Image Preprocess", + "bounding": [ + 1630, + 4720, + 830, + 440 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 21, + "title": "Empty Latent", + "bounding": [ + 2820, + 4720, + 1310, + 450 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 22, + "title": "Number conversion", + "bounding": [ + 2480, + 4720, + 310, + 440 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 512, + "origin_id": 288, + "origin_slot": 0, + "target_id": 278, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 513, + "origin_id": 307, + "origin_slot": 1, + "target_id": 278, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 478, + "origin_id": 285, + "origin_slot": 0, + "target_id": 282, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 479, + "origin_id": 284, + "origin_slot": 0, + "target_id": 282, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 480, + "origin_id": 284, + "origin_slot": 1, + "target_id": 282, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 541, + "origin_id": 285, + "origin_slot": 0, + "target_id": 314, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 518, + "origin_id": 304, + "origin_slot": 0, + "target_id": 314, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 519, + "origin_id": 304, + "origin_slot": 1, + "target_id": 314, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 483, + "origin_id": 277, + "origin_slot": 0, + "target_id": 283, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 484, + "origin_id": 314, + "origin_slot": 0, + "target_id": 283, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 485, + "origin_id": 291, + "origin_slot": 0, + "target_id": 283, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 544, + "origin_id": 306, + "origin_slot": 0, + "target_id": 283, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 487, + "origin_id": 318, + "origin_slot": 0, + "target_id": 283, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 475, + "origin_id": 304, + "origin_slot": 0, + "target_id": 284, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 476, + "origin_id": 304, + "origin_slot": 1, + "target_id": 284, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 477, + "origin_id": 307, + "origin_slot": 0, + "target_id": 284, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 520, + "origin_id": 316, + "origin_slot": 0, + "target_id": 285, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 523, + "origin_id": 290, + "origin_slot": 0, + "target_id": 286, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 547, + "origin_id": 307, + "origin_slot": 0, + "target_id": 287, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 545, + "origin_id": 311, + "origin_slot": 0, + "target_id": 287, + "target_slot": 1, + "type": "LATENT_UPSCALE_MODEL" + }, + { + "id": 554, + "origin_id": 293, + "origin_slot": 0, + "target_id": 287, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 552, + "origin_id": 293, + "origin_slot": 0, + "target_id": 288, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 515, + "origin_id": 289, + "origin_slot": 0, + "target_id": 288, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 548, + "origin_id": 287, + "origin_slot": 0, + "target_id": 288, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 543, + "origin_id": 302, + "origin_slot": 0, + "target_id": 288, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 505, + "origin_id": 286, + "origin_slot": 0, + "target_id": 289, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 558, + "origin_id": 312, + "origin_slot": 0, + "target_id": 290, + "target_slot": 2, + "type": "INT" + }, + { + "id": 559, + "origin_id": 299, + "origin_slot": 0, + "target_id": 290, + "target_slot": 3, + "type": "INT" + }, + { + "id": 560, + "origin_id": 312, + "origin_slot": 0, + "target_id": 292, + "target_slot": 0, + "type": "INT" + }, + { + "id": 557, + "origin_id": 316, + "origin_slot": 2, + "target_id": 293, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 562, + "origin_id": 299, + "origin_slot": 0, + "target_id": 294, + "target_slot": 0, + "type": "INT" + }, + { + "id": 561, + "origin_id": 292, + "origin_slot": 1, + "target_id": 295, + "target_slot": 0, + "type": "INT" + }, + { + "id": 563, + "origin_id": 294, + "origin_slot": 1, + "target_id": 295, + "target_slot": 1, + "type": "INT" + }, + { + "id": 556, + "origin_id": 316, + "origin_slot": 2, + "target_id": 296, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 510, + "origin_id": 289, + "origin_slot": 0, + "target_id": 296, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 511, + "origin_id": 295, + "origin_slot": 0, + "target_id": 296, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 542, + "origin_id": 302, + "origin_slot": 0, + "target_id": 296, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 495, + "origin_id": 309, + "origin_slot": 1, + "target_id": 297, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 496, + "origin_id": 279, + "origin_slot": 0, + "target_id": 297, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 564, + "origin_id": 300, + "origin_slot": 0, + "target_id": 298, + "target_slot": 0, + "type": "INT" + }, + { + "id": 526, + "origin_id": 303, + "origin_slot": 0, + "target_id": 304, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 527, + "origin_id": 313, + "origin_slot": 0, + "target_id": 304, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 566, + "origin_id": 298, + "origin_slot": 0, + "target_id": 304, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 497, + "origin_id": 296, + "origin_slot": 0, + "target_id": 318, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 498, + "origin_id": 305, + "origin_slot": 0, + "target_id": 318, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 481, + "origin_id": 279, + "origin_slot": 0, + "target_id": 305, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 565, + "origin_id": 298, + "origin_slot": 1, + "target_id": 305, + "target_slot": 2, + "type": "INT" + }, + { + "id": 488, + "origin_id": 283, + "origin_slot": 0, + "target_id": 307, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 490, + "origin_id": 276, + "origin_slot": 0, + "target_id": 308, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 491, + "origin_id": 282, + "origin_slot": 0, + "target_id": 308, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 492, + "origin_id": 280, + "origin_slot": 0, + "target_id": 308, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 493, + "origin_id": 281, + "origin_slot": 0, + "target_id": 308, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 494, + "origin_id": 278, + "origin_slot": 0, + "target_id": 308, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 578, + "origin_id": 308, + "origin_slot": 0, + "target_id": 309, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 539, + "origin_id": 309, + "origin_slot": 0, + "target_id": 315, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 553, + "origin_id": 293, + "origin_slot": 0, + "target_id": 315, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 538, + "origin_id": 315, + "origin_slot": 0, + "target_id": 310, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 534, + "origin_id": 297, + "origin_slot": 0, + "target_id": 310, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 591, + "origin_id": 298, + "origin_slot": 0, + "target_id": 310, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 535, + "origin_id": -10, + "origin_slot": 0, + "target_id": 290, + "target_slot": 0, + "type": "IMAGE,MASK" + }, + { + "id": 536, + "origin_id": 310, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 595, + "origin_id": -10, + "origin_slot": 1, + "target_id": 319, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 597, + "origin_id": -10, + "origin_slot": 2, + "target_id": 312, + "target_slot": 0, + "type": "INT" + }, + { + "id": 598, + "origin_id": -10, + "origin_slot": 3, + "target_id": 299, + "target_slot": 0, + "type": "INT" + }, + { + "id": 599, + "origin_id": -10, + "origin_slot": 4, + "target_id": 301, + "target_slot": 0, + "type": "INT" + }, + { + "id": 601, + "origin_id": -10, + "origin_slot": 5, + "target_id": 316, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 602, + "origin_id": -10, + "origin_slot": 6, + "target_id": 285, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 604, + "origin_id": -10, + "origin_slot": 5, + "target_id": 279, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 605, + "origin_id": -10, + "origin_slot": 5, + "target_id": 317, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 606, + "origin_id": -10, + "origin_slot": 7, + "target_id": 317, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 607, + "origin_id": -10, + "origin_slot": 8, + "target_id": 311, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 615, + "origin_id": 317, + "origin_slot": 0, + "target_id": 303, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 624, + "origin_id": -10, + "origin_slot": 9, + "target_id": 300, + "target_slot": 0, + "type": "INT" + }, + { + "id": 625, + "origin_id": 319, + "origin_slot": 0, + "target_id": 303, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 627, + "origin_id": 317, + "origin_slot": 0, + "target_id": 313, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 628, + "origin_id": 301, + "origin_slot": 0, + "target_id": 323, + "target_slot": 0, + "type": "INT" + }, + { + "id": 629, + "origin_id": 300, + "origin_slot": 0, + "target_id": 323, + "target_slot": 1, + "type": "INT" + }, + { + "id": 630, + "origin_id": 323, + "origin_slot": 1, + "target_id": 305, + "target_slot": 1, + "type": "INT" + }, + { + "id": 631, + "origin_id": 323, + "origin_slot": 1, + "target_id": 295, + "target_slot": 2, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "Vue-corrected" + }, + "category": "Video generation and editing/Image to video", + "description": "Generates video from a single input image using LTX-2.3." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Image to Video (Wan 2.2).json b/blueprints/Image to Video (Wan 2.2).json index cd0b44a72..a24adcfb6 100644 --- a/blueprints/Image to Video (Wan 2.2).json +++ b/blueprints/Image to Video (Wan 2.2).json @@ -1 +1,2053 @@ -{"id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", "revision": 0, "last_node_id": 119, "last_link_id": 231, "nodes": [{"id": 116, "type": "296b573f-1e7d-43df-a2df-925fe5e17063", "pos": [1098.3332694531493, -268.3334707134305], "size": [400, 470], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "start image", "localized_name": "start_image", "name": "start_image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"label": "low_noise_unet", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"label": "low_noise_lora", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"label": "high_noise_unet", "name": "unet_name_1", "type": "COMBO", "widget": {"name": "unet_name_1"}, "link": null}, {"label": "high_noise_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"name": "VIDEO", "type": "VIDEO", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "width"], ["-1", "height"], ["-1", "length"], ["86", "noise_seed"], ["86", "control_after_generate"], ["-1", "unet_name"], ["-1", "lora_name"], ["-1", "unet_name_1"], ["-1", "lora_name_1"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 640, 640, 81, null, null, "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan_2.1_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "296b573f-1e7d-43df-a2df-925fe5e17063", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 119, "lastLinkId": 231, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Video (Wan 2.2)", "inputNode": {"id": -10, "bounding": [-250, 570, 131.435546875, 260]}, "outputNode": {"id": -20, "bounding": [1723.4786916118696, 716.3650158766799, 120, 60]}, "inputs": [{"id": "69d8b033-5601-446e-9634-f5cafbd373e2", "name": "start_image", "type": "IMAGE", "linkIds": [186], "localized_name": "start_image", "label": "start image", "shape": 7, "pos": [-138.564453125, 590]}, {"id": "88ae2af6-63c1-41be-90e8-6359f4d5f133", "name": "text", "type": "STRING", "linkIds": [222], "label": "prompt", "pos": [-138.564453125, 610]}, {"id": "fad9d346-653e-4be5-9e52-38cef6fa59f3", "name": "width", "type": "INT", "linkIds": [223], "pos": [-138.564453125, 630]}, {"id": "a4f34897-8063-4613-a2eb-6c2503167eb1", "name": "height", "type": "INT", "linkIds": [224], "pos": [-138.564453125, 650]}, {"id": "dc4d4472-cff7-41e0-9a4a-d118fcd4a21a", "name": "length", "type": "INT", "linkIds": [225], "pos": [-138.564453125, 670]}, {"id": "f7317e79-4a52-460b-9d71-89ec450dc333", "name": "unet_name", "type": "COMBO", "linkIds": [226], "label": "low_noise_unet", "pos": [-138.564453125, 690]}, {"id": "7a470f86-503a-474f-9571-830c8eb99231", "name": "lora_name", "type": "COMBO", "linkIds": [227], "label": "low_noise_lora", "pos": [-138.564453125, 710]}, {"id": "1d88c531-f68e-41b9-95c5-16f944a55b7d", "name": "unet_name_1", "type": "COMBO", "linkIds": [228], "label": "high_noise_unet", "pos": [-138.564453125, 730]}, {"id": "67a79742-33e5-4c38-89d8-ecb021d067c8", "name": "lora_name_1", "type": "COMBO", "linkIds": [229], "label": "high_noise_lora", "pos": [-138.564453125, 750]}, {"id": "9d184b83-37c6-4891-bbdf-ffcdf5ab2016", "name": "clip_name", "type": "COMBO", "linkIds": [230], "pos": [-138.564453125, 770]}, {"id": "24c568ec-aeb2-4c31-9f87-54ee9099d55f", "name": "vae_name", "type": "COMBO", "linkIds": [231], "pos": [-138.564453125, 790]}], "outputs": [{"id": "994c9c48-5f35-48ed-8c9d-0f2b21990cb6", "name": "VIDEO", "type": "VIDEO", "linkIds": [221], "pos": [1743.4786916118696, 736.3650158766799]}], "widgets": [], "nodes": [{"id": 84, "type": "CLIPLoader", "pos": [59.999957705045404, 29.99977085410412], "size": [346.38020833333337, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 230}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [178, 181]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", "directory": "text_encoders"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 90, "type": "VAELoader", "pos": [59.999957705045404, 189.9997708925786], "size": [344.7265625, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 231}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [176, 185]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 95, "type": "UNETLoader", "pos": [49.99996468306838, -230.00013148243067], "size": [346.7447916666667, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 226}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 96, "type": "UNETLoader", "pos": [49.99996468306838, -100.00008258817711], "size": [346.7447916666667, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 228}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [196]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 103, "type": "ModelSamplingSD3", "pos": [739.9998741034308, -100.00008258817711], "size": [210, 58], "flags": {"collapsed": false}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 189}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [192]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [5.000000000000001]}, {"id": 93, "type": "CLIPTextEncode", "pos": [439.99997175727736, 89.99984067280784], "size": [510, 88], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 181}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 222}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [183]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 89, "type": "CLIPTextEncode", "pos": [439.99997175727736, 289.99986864261126], "size": [510, 88], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 178}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [184]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"], "color": "#322", "bgcolor": "#533"}, {"id": 101, "type": "LoraLoaderModelOnly", "pos": [449.99996477925447, -230.00013148243067], "size": [280, 82], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 194}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 227}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [190]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "directory": "loras"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", 1.0000000000000002]}, {"id": 102, "type": "LoraLoaderModelOnly", "pos": [449.99996477925447, -100.00008258817711], "size": [280, 82], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 196}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 229}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [189]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "directory": "loras"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", 1.0000000000000002]}, {"id": 104, "type": "ModelSamplingSD3", "pos": [739.9998741034308, -230.00013148243067], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 190}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [195]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [5.000000000000001]}, {"id": 98, "type": "WanImageToVideo", "pos": [530.0000206419123, 529.9999245437435], "size": [342.59114583333337, 210], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 183}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 184}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 185}, {"localized_name": "clip_vision_output", "name": "clip_vision_output", "shape": 7, "type": "CLIP_VISION_OUTPUT", "link": null}, {"localized_name": "start_image", "name": "start_image", "shape": 7, "type": "IMAGE", "link": 186}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 223}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 224}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 225}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "slot_index": 0, "links": [168, 172]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "slot_index": 1, "links": [169, 173]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [174]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "WanImageToVideo", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [640, 640, 81, 1]}, {"id": 86, "type": "KSamplerAdvanced", "pos": [989.9999230265402, -250.00014544809514], "size": [304.73958333333337, 334], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 195}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 172}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 173}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 174}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["enable", 0, "randomize", 4, 1, "euler", "simple", 0, 2, "enable"]}, {"id": 85, "type": "KSamplerAdvanced", "pos": [1336.748028098344, -250.00014544809514], "size": [304.73958333333337, 334], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 192}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 168}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 169}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 170}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [175]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["disable", 0, "fixed", 4, 1, "euler", "simple", 2, 4, "disable"]}, {"id": 67, "type": "Note", "pos": [510.0000345979581, 819.9999455547611], "size": [390, 88], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [], "title": "Video Size", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["By default, we set the video to a smaller size for users with low VRAM. If you have enough VRAM, you can change the size"], "color": "#222", "bgcolor": "#000"}, {"id": 105, "type": "MarkdownNote", "pos": [-469.9999795985529, 279.9998197772136], "size": [480, 170.65104166666669], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "VRAM Usage", "properties": {"ue_properties": {"version": "7.1", "widget_ue_connectable": {}, "input_ue_unconnectable": {}}}, "widgets_values": ["## GPU:RTX4090D 24GB\n\n| Model | Size |VRAM Usage | 1st Generation | 2nd Generation |\n|---------------------|-------|-----------|---------------|-----------------|\n| fp8_scaled |640*640| 84% | ≈ 536s | ≈ 513s |\n| fp8_scaled + 4steps LoRA | 640*640 | 83% | ≈ 97s | ≈ 71s |"], "color": "#222", "bgcolor": "#000"}, {"id": 66, "type": "MarkdownNote", "pos": [-469.9999795985529, -320.00012452364496], "size": [480, 572.1354166666667], "flags": {}, "order": 6, "mode": 0, "inputs": [], "outputs": [], "title": "Model Links", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model**\n- [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors)\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ │ └─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n"], "color": "#222", "bgcolor": "#000"}, {"id": 115, "type": "Note", "pos": [29.999978639114225, -470.00010361843204], "size": [360, 88], "flags": {}, "order": 7, "mode": 0, "inputs": [], "outputs": [], "title": "About 4 Steps LoRA", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed."], "color": "#222", "bgcolor": "#000"}, {"id": 117, "type": "CreateVideo", "pos": [1030, 650], "size": [270, 78], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 220}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [221]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [16]}, {"id": 87, "type": "VAEDecode", "pos": [1020, 540], "size": [210, 46], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 175}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 176}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [220]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAEDecode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": []}], "groups": [{"id": 15, "title": "fp8_scaled + 4steps LoRA", "bounding": [30, -350, 1630, 1120], "color": "#444", "font_size": 24, "flags": {}}, {"id": 11, "title": "Step1 - Load models", "bounding": [40, -310, 371.0310363769531, 571.3974609375], "color": "#444", "font_size": 24, "flags": {}}, {"id": 13, "title": "Step4 - Prompt", "bounding": [430, 20, 530, 420], "color": "#444", "font_size": 24, "flags": {}}, {"id": 14, "title": "Step3 - Video size & length", "bounding": [430, 460, 530, 290], "color": "#444", "font_size": 24, "flags": {}}, {"id": 16, "title": "Lightx2v 4steps LoRA", "bounding": [430, -310, 530, 310], "color": "#444", "font_size": 24, "flags": {}}], "links": [{"id": 189, "origin_id": 102, "origin_slot": 0, "target_id": 103, "target_slot": 0, "type": "MODEL"}, {"id": 181, "origin_id": 84, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "CLIP"}, {"id": 178, "origin_id": 84, "origin_slot": 0, "target_id": 89, "target_slot": 0, "type": "CLIP"}, {"id": 194, "origin_id": 95, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "MODEL"}, {"id": 196, "origin_id": 96, "origin_slot": 0, "target_id": 102, "target_slot": 0, "type": "MODEL"}, {"id": 190, "origin_id": 101, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "MODEL"}, {"id": 183, "origin_id": 93, "origin_slot": 0, "target_id": 98, "target_slot": 0, "type": "CONDITIONING"}, {"id": 184, "origin_id": 89, "origin_slot": 0, "target_id": 98, "target_slot": 1, "type": "CONDITIONING"}, {"id": 185, "origin_id": 90, "origin_slot": 0, "target_id": 98, "target_slot": 2, "type": "VAE"}, {"id": 175, "origin_id": 85, "origin_slot": 0, "target_id": 87, "target_slot": 0, "type": "LATENT"}, {"id": 176, "origin_id": 90, "origin_slot": 0, "target_id": 87, "target_slot": 1, "type": "VAE"}, {"id": 195, "origin_id": 104, "origin_slot": 0, "target_id": 86, "target_slot": 0, "type": "MODEL"}, {"id": 172, "origin_id": 98, "origin_slot": 0, "target_id": 86, "target_slot": 1, "type": "CONDITIONING"}, {"id": 173, "origin_id": 98, "origin_slot": 1, "target_id": 86, "target_slot": 2, "type": "CONDITIONING"}, {"id": 174, "origin_id": 98, "origin_slot": 2, "target_id": 86, "target_slot": 3, "type": "LATENT"}, {"id": 192, "origin_id": 103, "origin_slot": 0, "target_id": 85, "target_slot": 0, "type": "MODEL"}, {"id": 168, "origin_id": 98, "origin_slot": 0, "target_id": 85, "target_slot": 1, "type": "CONDITIONING"}, {"id": 169, "origin_id": 98, "origin_slot": 1, "target_id": 85, "target_slot": 2, "type": "CONDITIONING"}, {"id": 170, "origin_id": 86, "origin_slot": 0, "target_id": 85, "target_slot": 3, "type": "LATENT"}, {"id": 186, "origin_id": -10, "origin_slot": 0, "target_id": 98, "target_slot": 4, "type": "IMAGE"}, {"id": 220, "origin_id": 87, "origin_slot": 0, "target_id": 117, "target_slot": 0, "type": "IMAGE"}, {"id": 221, "origin_id": 117, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 222, "origin_id": -10, "origin_slot": 1, "target_id": 93, "target_slot": 1, "type": "STRING"}, {"id": 223, "origin_id": -10, "origin_slot": 2, "target_id": 98, "target_slot": 5, "type": "INT"}, {"id": 224, "origin_id": -10, "origin_slot": 3, "target_id": 98, "target_slot": 6, "type": "INT"}, {"id": 225, "origin_id": -10, "origin_slot": 4, "target_id": 98, "target_slot": 7, "type": "INT"}, {"id": 226, "origin_id": -10, "origin_slot": 5, "target_id": 95, "target_slot": 0, "type": "COMBO"}, {"id": 227, "origin_id": -10, "origin_slot": 6, "target_id": 101, "target_slot": 1, "type": "COMBO"}, {"id": 228, "origin_id": -10, "origin_slot": 7, "target_id": 96, "target_slot": 0, "type": "COMBO"}, {"id": 229, "origin_id": -10, "origin_slot": 8, "target_id": 102, "target_slot": 1, "type": "COMBO"}, {"id": 230, "origin_id": -10, "origin_slot": 9, "target_id": 84, "target_slot": 0, "type": "COMBO"}, {"id": 231, "origin_id": -10, "origin_slot": 10, "target_id": 90, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Image to video"}]}, "config": {}, "extra": {"ds": {"scale": 0.7926047855889957, "offset": [-30.12529469925767, 690.3829855122884]}, "frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ue_links": []}, "version": 0.4} +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 119, + "last_link_id": 231, + "nodes": [ + { + "id": 116, + "type": "296b573f-1e7d-43df-a2df-925fe5e17063", + "pos": [ + 1098.3332694531493, + -268.3334707134305 + ], + "size": [ + 400, + 470 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "start image", + "localized_name": "start_image", + "name": "start_image", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "label": "low_noise_unet", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "label": "low_noise_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "label": "high_noise_unet", + "name": "unet_name_1", + "type": "COMBO", + "widget": { + "name": "unet_name_1" + }, + "link": null + }, + { + "label": "high_noise_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "name": "VIDEO", + "type": "VIDEO", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "width" + ], + [ + "-1", + "height" + ], + [ + "-1", + "length" + ], + [ + "86", + "noise_seed" + ], + [ + "86", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "lora_name" + ], + [ + "-1", + "unet_name_1" + ], + [ + "-1", + "lora_name_1" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + 640, + 640, + 81, + null, + null, + "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan_2.1_vae.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "296b573f-1e7d-43df-a2df-925fe5e17063", + "version": 1, + "state": { + "lastGroupId": 16, + "lastNodeId": 119, + "lastLinkId": 231, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Image to Video (Wan 2.2)", + "inputNode": { + "id": -10, + "bounding": [ + -250, + 570, + 131.435546875, + 260 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1723.4786916118696, + 716.3650158766799, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "69d8b033-5601-446e-9634-f5cafbd373e2", + "name": "start_image", + "type": "IMAGE", + "linkIds": [ + 186 + ], + "localized_name": "start_image", + "label": "start image", + "shape": 7, + "pos": [ + -138.564453125, + 590 + ] + }, + { + "id": "88ae2af6-63c1-41be-90e8-6359f4d5f133", + "name": "text", + "type": "STRING", + "linkIds": [ + 222 + ], + "label": "prompt", + "pos": [ + -138.564453125, + 610 + ] + }, + { + "id": "fad9d346-653e-4be5-9e52-38cef6fa59f3", + "name": "width", + "type": "INT", + "linkIds": [ + 223 + ], + "pos": [ + -138.564453125, + 630 + ] + }, + { + "id": "a4f34897-8063-4613-a2eb-6c2503167eb1", + "name": "height", + "type": "INT", + "linkIds": [ + 224 + ], + "pos": [ + -138.564453125, + 650 + ] + }, + { + "id": "dc4d4472-cff7-41e0-9a4a-d118fcd4a21a", + "name": "length", + "type": "INT", + "linkIds": [ + 225 + ], + "pos": [ + -138.564453125, + 670 + ] + }, + { + "id": "f7317e79-4a52-460b-9d71-89ec450dc333", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 226 + ], + "label": "low_noise_unet", + "pos": [ + -138.564453125, + 690 + ] + }, + { + "id": "7a470f86-503a-474f-9571-830c8eb99231", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 227 + ], + "label": "low_noise_lora", + "pos": [ + -138.564453125, + 710 + ] + }, + { + "id": "1d88c531-f68e-41b9-95c5-16f944a55b7d", + "name": "unet_name_1", + "type": "COMBO", + "linkIds": [ + 228 + ], + "label": "high_noise_unet", + "pos": [ + -138.564453125, + 730 + ] + }, + { + "id": "67a79742-33e5-4c38-89d8-ecb021d067c8", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 229 + ], + "label": "high_noise_lora", + "pos": [ + -138.564453125, + 750 + ] + }, + { + "id": "9d184b83-37c6-4891-bbdf-ffcdf5ab2016", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 230 + ], + "pos": [ + -138.564453125, + 770 + ] + }, + { + "id": "24c568ec-aeb2-4c31-9f87-54ee9099d55f", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 231 + ], + "pos": [ + -138.564453125, + 790 + ] + } + ], + "outputs": [ + { + "id": "994c9c48-5f35-48ed-8c9d-0f2b21990cb6", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 221 + ], + "pos": [ + 1743.4786916118696, + 736.3650158766799 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 84, + "type": "CLIPLoader", + "pos": [ + 59.999957705045404, + 29.99977085410412 + ], + "size": [ + 346.38020833333337, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 230 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 178, + 181 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "directory": "text_encoders" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 90, + "type": "VAELoader", + "pos": [ + 59.999957705045404, + 189.9997708925786 + ], + "size": [ + 344.7265625, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 231 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 176, + 185 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "wan_2.1_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", + "directory": "vae" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 95, + "type": "UNETLoader", + "pos": [ + 49.99996468306838, + -230.00013148243067 + ], + "size": [ + 346.7447916666667, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 226 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 194 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 96, + "type": "UNETLoader", + "pos": [ + 49.99996468306838, + -100.00008258817711 + ], + "size": [ + 346.7447916666667, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 228 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 196 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 103, + "type": "ModelSamplingSD3", + "pos": [ + 739.9998741034308, + -100.00008258817711 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": false + }, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 189 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 192 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "ModelSamplingSD3", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 5.000000000000001 + ] + }, + { + "id": 93, + "type": "CLIPTextEncode", + "pos": [ + 439.99997175727736, + 89.99984067280784 + ], + "size": [ + 510, + 88 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 181 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 222 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 183 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPTextEncode", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 89, + "type": "CLIPTextEncode", + "pos": [ + 439.99997175727736, + 289.99986864261126 + ], + "size": [ + 510, + 88 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 178 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 184 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPTextEncode", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 101, + "type": "LoraLoaderModelOnly", + "pos": [ + 449.99996477925447, + -230.00013148243067 + ], + "size": [ + 280, + 82 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 194 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 227 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 190 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.49", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "directory": "loras" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + 1.0000000000000002 + ] + }, + { + "id": 102, + "type": "LoraLoaderModelOnly", + "pos": [ + 449.99996477925447, + -100.00008258817711 + ], + "size": [ + 280, + 82 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 196 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 229 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 189 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.49", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "directory": "loras" + } + ], + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + 1.0000000000000002 + ] + }, + { + "id": 104, + "type": "ModelSamplingSD3", + "pos": [ + 739.9998741034308, + -230.00013148243067 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 190 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 195 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "ModelSamplingSD3", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 5.000000000000001 + ] + }, + { + "id": 98, + "type": "WanImageToVideo", + "pos": [ + 530.0000206419123, + 529.9999245437435 + ], + "size": [ + 342.59114583333337, + 210 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 183 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 184 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 185 + }, + { + "localized_name": "clip_vision_output", + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": null + }, + { + "localized_name": "start_image", + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 186 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 223 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 224 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 225 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 168, + 172 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 169, + 173 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 174 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "WanImageToVideo", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 640, + 640, + 81, + 1 + ] + }, + { + "id": 86, + "type": "KSamplerAdvanced", + "pos": [ + 989.9999230265402, + -250.00014544809514 + ], + "size": [ + 304.73958333333337, + 334 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 195 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 172 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 173 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 174 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "COMBO", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "start_at_step", + "name": "start_at_step", + "type": "INT", + "widget": { + "name": "start_at_step" + }, + "link": null + }, + { + "localized_name": "end_at_step", + "name": "end_at_step", + "type": "INT", + "widget": { + "name": "end_at_step" + }, + "link": null + }, + { + "localized_name": "return_with_leftover_noise", + "name": "return_with_leftover_noise", + "type": "COMBO", + "widget": { + "name": "return_with_leftover_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 170 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "KSamplerAdvanced", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "enable", + 0, + "randomize", + 4, + 1, + "euler", + "simple", + 0, + 2, + "enable" + ] + }, + { + "id": 85, + "type": "KSamplerAdvanced", + "pos": [ + 1336.748028098344, + -250.00014544809514 + ], + "size": [ + 304.73958333333337, + 334 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 192 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 168 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 169 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 170 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "COMBO", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "start_at_step", + "name": "start_at_step", + "type": "INT", + "widget": { + "name": "start_at_step" + }, + "link": null + }, + { + "localized_name": "end_at_step", + "name": "end_at_step", + "type": "INT", + "widget": { + "name": "end_at_step" + }, + "link": null + }, + { + "localized_name": "return_with_leftover_noise", + "name": "return_with_leftover_noise", + "type": "COMBO", + "widget": { + "name": "return_with_leftover_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 175 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "KSamplerAdvanced", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 4, + 1, + "euler", + "simple", + 2, + 4, + "disable" + ] + }, + { + "id": 67, + "type": "Note", + "pos": [ + 510.0000345979581, + 819.9999455547611 + ], + "size": [ + 390, + 88 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Video Size", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "By default, we set the video to a smaller size for users with low VRAM. If you have enough VRAM, you can change the size" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 105, + "type": "MarkdownNote", + "pos": [ + -469.9999795985529, + 279.9998197772136 + ], + "size": [ + 480, + 170.65104166666669 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "VRAM Usage", + "properties": { + "ue_properties": { + "version": "7.1", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "## GPU:RTX4090D 24GB\n\n| Model | Size |VRAM Usage | 1st Generation | 2nd Generation |\n|---------------------|-------|-----------|---------------|-----------------|\n| fp8_scaled |640*640| 84% | ≈ 536s | ≈ 513s |\n| fp8_scaled + 4steps LoRA | 640*640 | 83% | ≈ 97s | ≈ 71s |" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 66, + "type": "MarkdownNote", + "pos": [ + -469.9999795985529, + -320.00012452364496 + ], + "size": [ + 480, + 572.1354166666667 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Model Links", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model**\n- [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors)\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ │ └─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 115, + "type": "Note", + "pos": [ + 29.999978639114225, + -470.00010361843204 + ], + "size": [ + 360, + 88 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "About 4 Steps LoRA", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed." + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 117, + "type": "CreateVideo", + "pos": [ + 1030, + 650 + ], + "size": [ + 270, + 78 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 220 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 221 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "CreateVideo" + }, + "widgets_values": [ + 16 + ] + }, + { + "id": 87, + "type": "VAEDecode", + "pos": [ + 1020, + 540 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 175 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 176 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 220 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "VAEDecode", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.1", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 15, + "title": "fp8_scaled + 4steps LoRA", + "bounding": [ + 30, + -350, + 1630, + 1120 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Step1 - Load models", + "bounding": [ + 40, + -310, + 371.0310363769531, + 571.3974609375 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 13, + "title": "Step4 - Prompt", + "bounding": [ + 430, + 20, + 530, + 420 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 14, + "title": "Step3 - Video size & length", + "bounding": [ + 430, + 460, + 530, + 290 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 16, + "title": "Lightx2v 4steps LoRA", + "bounding": [ + 430, + -310, + 530, + 310 + ], + "color": "#444", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 189, + "origin_id": 102, + "origin_slot": 0, + "target_id": 103, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 181, + "origin_id": 84, + "origin_slot": 0, + "target_id": 93, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 178, + "origin_id": 84, + "origin_slot": 0, + "target_id": 89, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 194, + "origin_id": 95, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 196, + "origin_id": 96, + "origin_slot": 0, + "target_id": 102, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 190, + "origin_id": 101, + "origin_slot": 0, + "target_id": 104, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 183, + "origin_id": 93, + "origin_slot": 0, + "target_id": 98, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 184, + "origin_id": 89, + "origin_slot": 0, + "target_id": 98, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 185, + "origin_id": 90, + "origin_slot": 0, + "target_id": 98, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 175, + "origin_id": 85, + "origin_slot": 0, + "target_id": 87, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 176, + "origin_id": 90, + "origin_slot": 0, + "target_id": 87, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 195, + "origin_id": 104, + "origin_slot": 0, + "target_id": 86, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 172, + "origin_id": 98, + "origin_slot": 0, + "target_id": 86, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 173, + "origin_id": 98, + "origin_slot": 1, + "target_id": 86, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 174, + "origin_id": 98, + "origin_slot": 2, + "target_id": 86, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 192, + "origin_id": 103, + "origin_slot": 0, + "target_id": 85, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 168, + "origin_id": 98, + "origin_slot": 0, + "target_id": 85, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 169, + "origin_id": 98, + "origin_slot": 1, + "target_id": 85, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 170, + "origin_id": 86, + "origin_slot": 0, + "target_id": 85, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 186, + "origin_id": -10, + "origin_slot": 0, + "target_id": 98, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 220, + "origin_id": 87, + "origin_slot": 0, + "target_id": 117, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 221, + "origin_id": 117, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 222, + "origin_id": -10, + "origin_slot": 1, + "target_id": 93, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 223, + "origin_id": -10, + "origin_slot": 2, + "target_id": 98, + "target_slot": 5, + "type": "INT" + }, + { + "id": 224, + "origin_id": -10, + "origin_slot": 3, + "target_id": 98, + "target_slot": 6, + "type": "INT" + }, + { + "id": 225, + "origin_id": -10, + "origin_slot": 4, + "target_id": 98, + "target_slot": 7, + "type": "INT" + }, + { + "id": 226, + "origin_id": -10, + "origin_slot": 5, + "target_id": 95, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 227, + "origin_id": -10, + "origin_slot": 6, + "target_id": 101, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 228, + "origin_id": -10, + "origin_slot": 7, + "target_id": 96, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 229, + "origin_id": -10, + "origin_slot": 8, + "target_id": 102, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 230, + "origin_id": -10, + "origin_slot": 9, + "target_id": 84, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 231, + "origin_id": -10, + "origin_slot": 10, + "target_id": 90, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Image to video", + "description": "Image-to-video with Wan 2.2 using a start image plus text prompt to extend motion from the still frame." + } + ] + }, + "config": {}, + "extra": { + "ds": { + "scale": 0.7926047855889957, + "offset": [ + -30.12529469925767, + 690.3829855122884 + ] + }, + "frontendVersion": "1.37.11", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true, + "ue_links": [] + }, + "version": 0.4 +} diff --git a/blueprints/Pose to Image (Z-Image-Turbo).json b/blueprints/Pose to Image (Z-Image-Turbo).json index f4c224249..5c2749efe 100644 --- a/blueprints/Pose to Image (Z-Image-Turbo).json +++ b/blueprints/Pose to Image (Z-Image-Turbo).json @@ -1 +1,1323 @@ -{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 26, "last_link_id": 46, "nodes": [{"id": 13, "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "pos": [400, 3630], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["19", "seed"], ["19", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 26, "lastLinkId": 46, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Pose to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [27.60368520069494, 4936.043696127976, 120, 160]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [41, 42], "label": "image", "pos": [127.60368520069494, 4956.043696127976]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [37], "label": "prompt", "pos": [127.60368520069494, 4976.043696127976]}, {"id": "9f23df20-75de-4782-8ff7-225bc7976bbe", "name": "unet_name", "type": "COMBO", "linkIds": [43], "pos": [127.60368520069494, 4996.043696127976]}, {"id": "fc8aa3eb-a537-4976-8b5f-666f0dc5af4b", "name": "clip_name", "type": "COMBO", "linkIds": [44], "pos": [127.60368520069494, 5016.043696127976]}, {"id": "ed2c5269-91ac-4f93-b68d-6b546cef20d8", "name": "vae_name", "type": "COMBO", "linkIds": [45], "pos": [127.60368520069494, 5036.043696127976]}, {"id": "560ba519-ec0c-4ca4-b8f0-f02174012475", "name": "name", "type": "COMBO", "linkIds": [46], "pos": [127.60368520069494, 5056.043696127976]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [35], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 14, "type": "CLIPLoader", "pos": [340, 4820], "size": [269.9609375, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 44}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 15, "type": "UNETLoader", "pos": [340, 4670], "size": [269.9609375, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 43}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 16, "type": "VAELoader", "pos": [340, 5000], "size": [269.9609375, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 45}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [21, 30]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 17, "type": "ModelPatchLoader", "pos": [340, 5130], "size": [269.9609375, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 46}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [29]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 18, "type": "ModelSamplingAuraFlow", "pos": [1110, 4610], "size": [289.97395833333337, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 22}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [23]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 19, "type": "KSampler", "pos": [1110, 4720], "size": [300, 309.9609375], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 23}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 24}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 25}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 26}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [20]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 20, "type": "ConditioningZeroOut", "pos": [860, 5160], "size": [204.134765625, 26], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 27}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [25]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 21, "type": "QwenImageDiffsynthControlnet", "pos": [720, 5320], "size": [289.97395833333337, 138], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 28}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 29}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 30}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 42}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [22]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 23, "type": "CLIPTextEncode", "pos": [660, 4660], "size": [400, 179.9609375], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 33}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 37}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [24, 27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 24, "type": "VAEDecode", "pos": [1450, 4620], "size": [200, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 20}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 21}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 25, "type": "GetImageSize", "pos": [330, 5540], "size": [140, 66], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 41}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [31]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [32]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 22, "type": "EmptySD3LatentImage", "pos": [1110, 5540], "size": [259.9609375, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 31}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 32}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [26]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}], "groups": [{"id": 1, "title": "Prompt", "bounding": [640, 4590, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [320, 4590, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [640, 5240, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 20, "origin_id": 19, "origin_slot": 0, "target_id": 24, "target_slot": 0, "type": "LATENT"}, {"id": 21, "origin_id": 16, "origin_slot": 0, "target_id": 24, "target_slot": 1, "type": "VAE"}, {"id": 22, "origin_id": 21, "origin_slot": 0, "target_id": 18, "target_slot": 0, "type": "MODEL"}, {"id": 23, "origin_id": 18, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 24, "origin_id": 23, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 25, "origin_id": 20, "origin_slot": 0, "target_id": 19, "target_slot": 2, "type": "CONDITIONING"}, {"id": 26, "origin_id": 22, "origin_slot": 0, "target_id": 19, "target_slot": 3, "type": "LATENT"}, {"id": 27, "origin_id": 23, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "CONDITIONING"}, {"id": 28, "origin_id": 15, "origin_slot": 0, "target_id": 21, "target_slot": 0, "type": "MODEL"}, {"id": 29, "origin_id": 17, "origin_slot": 0, "target_id": 21, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 30, "origin_id": 16, "origin_slot": 0, "target_id": 21, "target_slot": 2, "type": "VAE"}, {"id": 31, "origin_id": 25, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "INT"}, {"id": 32, "origin_id": 25, "origin_slot": 1, "target_id": 22, "target_slot": 1, "type": "INT"}, {"id": 33, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "CLIP"}, {"id": 35, "origin_id": 24, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 37, "origin_id": -10, "origin_slot": 1, "target_id": 23, "target_slot": 1, "type": "STRING"}, {"id": 41, "origin_id": -10, "origin_slot": 0, "target_id": 25, "target_slot": 0, "type": "IMAGE"}, {"id": 42, "origin_id": -10, "origin_slot": 0, "target_id": 21, "target_slot": 3, "type": "IMAGE"}, {"id": 43, "origin_id": -10, "origin_slot": 2, "target_id": 15, "target_slot": 0, "type": "COMBO"}, {"id": 44, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}, {"id": 45, "origin_id": -10, "origin_slot": 4, "target_id": 16, "target_slot": 0, "type": "COMBO"}, {"id": 46, "origin_id": -10, "origin_slot": 5, "target_id": 17, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Pose to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.6479518372239997, "offset": [852.9773200429215, -3036.34291480022]}}, "version": 0.4} +{ + "id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", + "revision": 0, + "last_node_id": 26, + "last_link_id": 46, + "nodes": [ + { + "id": 13, + "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", + "pos": [ + 400, + 3630 + ], + "size": [ + 400, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": null + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "19", + "seed" + ], + [ + "19", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ], + [ + "-1", + "name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + null, + null, + "z_image_turbo_bf16.safetensors", + "qwen_3_4b.safetensors", + "ae.safetensors", + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 26, + "lastLinkId": 46, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Pose to Image (Z-Image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + 27.60368520069494, + 4936.043696127976, + 120, + 160 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1598.6038576146689, + 4936.043696127976, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 41, + 42 + ], + "label": "image", + "pos": [ + 127.60368520069494, + 4956.043696127976 + ] + }, + { + "id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", + "name": "text", + "type": "STRING", + "linkIds": [ + 37 + ], + "label": "prompt", + "pos": [ + 127.60368520069494, + 4976.043696127976 + ] + }, + { + "id": "9f23df20-75de-4782-8ff7-225bc7976bbe", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 43 + ], + "pos": [ + 127.60368520069494, + 4996.043696127976 + ] + }, + { + "id": "fc8aa3eb-a537-4976-8b5f-666f0dc5af4b", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 44 + ], + "pos": [ + 127.60368520069494, + 5016.043696127976 + ] + }, + { + "id": "ed2c5269-91ac-4f93-b68d-6b546cef20d8", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 45 + ], + "pos": [ + 127.60368520069494, + 5036.043696127976 + ] + }, + { + "id": "560ba519-ec0c-4ca4-b8f0-f02174012475", + "name": "name", + "type": "COMBO", + "linkIds": [ + 46 + ], + "pos": [ + 127.60368520069494, + 5056.043696127976 + ] + } + ], + "outputs": [ + { + "id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 35 + ], + "pos": [ + 1618.6038576146689, + 4956.043696127976 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 14, + "type": "CLIPLoader", + "pos": [ + 340, + 4820 + ], + "size": [ + 269.9609375, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 44 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 15, + "type": "UNETLoader", + "pos": [ + 340, + 4670 + ], + "size": [ + 269.9609375, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 43 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 28 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 16, + "type": "VAELoader", + "pos": [ + 340, + 5000 + ], + "size": [ + 269.9609375, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 45 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 21, + 30 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 17, + "type": "ModelPatchLoader", + "pos": [ + 340, + 5130 + ], + "size": [ + 269.9609375, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "name", + "name": "name", + "type": "COMBO", + "widget": { + "name": "name" + }, + "link": 46 + } + ], + "outputs": [ + { + "localized_name": "MODEL_PATCH", + "name": "MODEL_PATCH", + "type": "MODEL_PATCH", + "links": [ + 29 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.51", + "Node name for S&R": "ModelPatchLoader", + "models": [ + { + "name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", + "directory": "model_patches" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" + ] + }, + { + "id": 18, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1110, + 4610 + ], + "size": [ + 289.97395833333337, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 22 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 23 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 19, + "type": "KSampler", + "pos": [ + 1110, + 4720 + ], + "size": [ + 300, + 309.9609375 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 23 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 24 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 25 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 26 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 20 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 9, + 1, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 20, + "type": "ConditioningZeroOut", + "pos": [ + 860, + 5160 + ], + "size": [ + 204.134765625, + 26 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 27 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 25 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 21, + "type": "QwenImageDiffsynthControlnet", + "pos": [ + 720, + 5320 + ], + "size": [ + 289.97395833333337, + 138 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 28 + }, + { + "localized_name": "model_patch", + "name": "model_patch", + "type": "MODEL_PATCH", + "link": 29 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 30 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 42 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 22 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "QwenImageDiffsynthControlnet", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 23, + "type": "CLIPTextEncode", + "pos": [ + 660, + 4660 + ], + "size": [ + 400, + 179.9609375 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 33 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 37 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 24, + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 24, + "type": "VAEDecode", + "pos": [ + 1450, + 4620 + ], + "size": [ + 200, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 20 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 21 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 25, + "type": "GetImageSize", + "pos": [ + 330, + 5540 + ], + "size": [ + 140, + 66 + ], + "flags": { + "collapsed": false + }, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 41 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 31 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 32 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.76", + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 22, + "type": "EmptySD3LatentImage", + "pos": [ + 1110, + 5540 + ], + "size": [ + 259.9609375, + 106 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 31 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 32 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 26 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Prompt", + "bounding": [ + 640, + 4590, + 440, + 630 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Models", + "bounding": [ + 320, + 4590, + 300, + 640 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Apple ControlNet", + "bounding": [ + 640, + 5240, + 440, + 260 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 20, + "origin_id": 19, + "origin_slot": 0, + "target_id": 24, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 21, + "origin_id": 16, + "origin_slot": 0, + "target_id": 24, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 22, + "origin_id": 21, + "origin_slot": 0, + "target_id": 18, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 23, + "origin_id": 18, + "origin_slot": 0, + "target_id": 19, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 24, + "origin_id": 23, + "origin_slot": 0, + "target_id": 19, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 25, + "origin_id": 20, + "origin_slot": 0, + "target_id": 19, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 26, + "origin_id": 22, + "origin_slot": 0, + "target_id": 19, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 27, + "origin_id": 23, + "origin_slot": 0, + "target_id": 20, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 28, + "origin_id": 15, + "origin_slot": 0, + "target_id": 21, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 29, + "origin_id": 17, + "origin_slot": 0, + "target_id": 21, + "target_slot": 1, + "type": "MODEL_PATCH" + }, + { + "id": 30, + "origin_id": 16, + "origin_slot": 0, + "target_id": 21, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 31, + "origin_id": 25, + "origin_slot": 0, + "target_id": 22, + "target_slot": 0, + "type": "INT" + }, + { + "id": 32, + "origin_id": 25, + "origin_slot": 1, + "target_id": 22, + "target_slot": 1, + "type": "INT" + }, + { + "id": 33, + "origin_id": 14, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 35, + "origin_id": 24, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 37, + "origin_id": -10, + "origin_slot": 1, + "target_id": 23, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 41, + "origin_id": -10, + "origin_slot": 0, + "target_id": 25, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 42, + "origin_id": -10, + "origin_slot": 0, + "target_id": 21, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 43, + "origin_id": -10, + "origin_slot": 2, + "target_id": 15, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 44, + "origin_id": -10, + "origin_slot": 3, + "target_id": 14, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 45, + "origin_id": -10, + "origin_slot": 4, + "target_id": 16, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 46, + "origin_id": -10, + "origin_slot": 5, + "target_id": 17, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "category": "Image generation and editing/Pose to image", + "description": "Generates an image from pose keypoints using Z-Image-Turbo with text conditioning." + } + ] + }, + "config": {}, + "extra": { + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true, + "ds": { + "scale": 0.6479518372239997, + "offset": [ + 852.9773200429215, + -3036.34291480022 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Pose to Video (LTX 2.0).json b/blueprints/Pose to Video (LTX 2.0).json index 78c098798..1ce49351a 100644 --- a/blueprints/Pose to Video (LTX 2.0).json +++ b/blueprints/Pose to Video (LTX 2.0).json @@ -1 +1,3881 @@ -{"id": "01cd475b-52df-43bf-aafa-484a5976d2d2", "revision": 0, "last_node_id": 160, "last_link_id": 410, "nodes": [{"id": 1, "type": "f0e58a6b-7246-4103-9fec-73b423634b1f", "pos": [210, 3830], "size": [420, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "first_frame_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "first frame", "name": "image", "type": "IMAGE", "link": null}, {"label": "control image", "name": "input", "type": "IMAGE,MASK", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"label": "distll_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": null}, {"name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "resize_type.width"], ["-1", "resize_type.height"], ["-1", "length"], ["-1", "strength"], ["-1", "bypass"], ["126", "noise_seed"], ["126", "control_after_generate"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "model_name"], ["-1", "lora_name_1"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1280, 720, 97, 1, false, null, null, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-pose-control.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors", "ltx-2-19b-distilled-lora-384.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "f0e58a6b-7246-4103-9fec-73b423634b1f", "version": 1, "state": {"lastGroupId": 11, "lastNodeId": 160, "lastLinkId": 410, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Pose to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2220, 4180, 153.3203125, 280]}, "outputNode": {"id": -20, "bounding": [1750.2777777777776, 4091.1111111111113, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "label": "prompt", "pos": [-2086.6796875, 4200]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "first_frame_strength", "pos": [-2086.6796875, 4220]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2086.6796875, 4240]}, {"id": "f7aa8c12-bdba-4bbd-84cf-b49cfc32a1dd", "name": "image", "type": "IMAGE", "linkIds": [398, 399], "label": "first frame", "pos": [-2086.6796875, 4260]}, {"id": "da40a4c0-cd19-46c6-8eb3-62d0026fbe85", "name": "input", "type": "IMAGE,MASK", "linkIds": [400], "label": "control image", "pos": [-2086.6796875, 4280]}, {"id": "8005344b-99d6-4829-a619-c4e8ef640eb9", "name": "ckpt_name", "type": "COMBO", "linkIds": [401, 402, 403], "pos": [-2086.6796875, 4300]}, {"id": "25e7c4e8-850c-4f37-bc14-e3f4b5f228c0", "name": "lora_name", "type": "COMBO", "linkIds": [404, 405], "pos": [-2086.6796875, 4320]}, {"id": "f16a18dd-947e-400a-8889-02cf998f760a", "name": "lora_name_1", "type": "COMBO", "linkIds": [406], "label": "distll_lora", "pos": [-2086.6796875, 4340]}, {"id": "1abf156c-4c85-4ee5-8671-62df3177d835", "name": "model_name", "type": "COMBO", "linkIds": [407], "label": "upscale_model", "pos": [-2086.6796875, 4360]}, {"id": "203402cf-4253-4daf-bf78-5def9496e0af", "name": "resize_type.width", "type": "INT", "linkIds": [408], "pos": [-2086.6796875, 4380]}, {"id": "e6d8ac4a-34d4-46c6-bcb2-4e66a696438c", "name": "resize_type.height", "type": "INT", "linkIds": [409], "pos": [-2086.6796875, 4400]}, {"id": "6aa6cf2c-bc4f-4f8b-be62-aa15793375dc", "name": "length", "type": "INT", "linkIds": [410], "pos": [-2086.6796875, 4420]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770.2777777777776, 4111.111111111111]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-697.721823660531, 3671.1105325465196], "size": [269.97395833333337, 98], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-697.721823660531, 3841.1107362825187], "size": [269.97395833333337, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [410.27824286284044, 3851.110970278795], "size": [269.97395833333337, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-69.72208571196083, 3701.1104657166875], "size": [389.97395833333337, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 407}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [410.27824286284044, 4101.110949206838], "size": [269.97395833333337, 46], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [410.27824286284044, 3701.1104657166875], "size": [269.97395833333337, 98], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-387.72197839215096, 3521.1103425011374], "size": [213.09895833333334, 106], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1133.7215420073496, 4141.110347554622], "size": [269.97395833333337, 78], "flags": {}, "order": 27, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1163.7218246405453, 3881.1109034489627], "size": [400, 88], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-519.7217122979332, 4701.110031965835], "size": [187.5, 46], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-393.72183921949465, 3801.1107787938904], "size": [239.97395833333334, 46], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.7214530029996, 3515.1112854387566], "size": [409.97395833333337, 88], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [410.27824286284044, 3981.1101681370833], "size": [269.97395833333337, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-69.72208571196083, 3571.110499039739], "size": [389.97395833333337, 82], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 406}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 95, "type": "LTXVScheduler", "pos": [-699.7218704597861, 3981.1101681370833], "size": [269.97395833333337, 154], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-697.721823660531, 3521.1103425011374], "size": [269.97395833333337, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [710.2782734905775, 3571.110499039739], "size": [212.36979166666669, 106], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [410.27824286284044, 3571.110499039739], "size": [269.97395833333337, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1130.2783163694094, 3841.1107362825187], "size": [239.97395833333334, 46], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 383}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1420.2783925712918, 3761.1104019496292], "size": [269.97395833333337, 78], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1649.721454901846, 3761.1104019496292], "size": [419.97395833333337, 82], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 404}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-pose-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Pose-Control/resolve/main/ltx-2-19b-ic-lora-pose-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-pose-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730.2784619127078, 3731.1109580277], "size": [193.2916015625, 46], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1120.2783619435547, 3641.110599376351], "size": [269.97395833333337, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1130.2783163694094, 3531.1113453160738], "size": [239.97395833333334, 46], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1600, 4940], "size": [269.97395833333337, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1600, 5070], "size": [269.97395833333337, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 118, "type": "Reroute", "pos": [-229.7217758812614, 4211.111007032079], "size": [75, 26], "flags": {}, "order": 14, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-19.72161465663438, 4071.1107364662485], "size": [269.97395833333337, 122], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 398}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-9.721939801202097, 3841.1107362825187], "size": [239.97395833333334, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-9.721939801202097, 3961.111517352274], "size": [259.97395833333337, 66], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1649.721454901846, 4041.1110828665023], "size": [419.97395833333337, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 405}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 403}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-pose-control.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1649.721454901846, 3591.1104777840524], "size": [419.97395833333337, 98], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 401}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 156, "type": "LTXVAudioVAELoader", "pos": [-1636.9543279290153, 3911.095334870057], "size": [399.0494791666667, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 402}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [382, 383]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "LTXVAudioVAELoader"}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1089.7215608128167, 4401.110560478942], "size": [269.97395833333337, 122], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 399}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 132, "type": "LTXVAddGuide", "pos": [-599.7217670603999, 4421.110609115862], "size": [269.97395833333337, 162], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 395}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 154, "type": "MarkdownNote", "pos": [-1630, 5190], "size": [350, 88], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#432", "bgcolor": "#653"}, {"id": 159, "type": "ResizeImageMaskNode", "pos": [-1610, 4580], "size": [284.375, 154], "flags": {}, "order": 39, "mode": 0, "inputs": [{"localized_name": "input", "name": "input", "type": "IMAGE,MASK", "link": 400}, {"localized_name": "resize_type", "name": "resize_type", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "resize_type"}, "link": null}, {"localized_name": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": 408}, {"localized_name": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": 409}, {"localized_name": "crop", "name": "resize_type.crop", "type": "COMBO", "widget": {"name": "resize_type.crop"}, "link": null}, {"localized_name": "scale_method", "name": "scale_method", "type": "COMBO", "widget": {"name": "scale_method"}, "link": null}], "outputs": [{"localized_name": "resized", "name": "resized", "type": "IMAGE,MASK", "links": [391, 392, 395]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ResizeImageMaskNode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["scale dimensions", 1280, 720, "center", "lanczos"]}, {"id": 110, "type": "GetImageSize", "pos": [-1600, 4780], "size": [259.97395833333337, 66], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 391}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1099.721794809093, 4611.11072170357], "size": [269.97395833333337, 130], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 410}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1099.721794809093, 4811.110229576288], "size": [269.97395833333337, 106], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 382}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": null}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1610, 4860, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 1, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 1, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 382, "origin_id": 156, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 383, "origin_id": 156, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 391, "origin_id": 159, "origin_slot": 0, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 395, "origin_id": 159, "origin_slot": 0, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 398, "origin_id": -10, "origin_slot": 3, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 399, "origin_id": -10, "origin_slot": 3, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 400, "origin_id": -10, "origin_slot": 4, "target_id": 159, "target_slot": 0, "type": "IMAGE,MASK"}, {"id": 401, "origin_id": -10, "origin_slot": 5, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 402, "origin_id": -10, "origin_slot": 5, "target_id": 156, "target_slot": 0, "type": "COMBO"}, {"id": 403, "origin_id": -10, "origin_slot": 5, "target_id": 97, "target_slot": 1, "type": "COMBO"}, {"id": 404, "origin_id": -10, "origin_slot": 6, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 405, "origin_id": -10, "origin_slot": 6, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 406, "origin_id": -10, "origin_slot": 7, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 407, "origin_id": -10, "origin_slot": 8, "target_id": 100, "target_slot": 0, "type": "COMBO"}, {"id": 408, "origin_id": -10, "origin_slot": 9, "target_id": 159, "target_slot": 2, "type": "INT"}, {"id": 409, "origin_id": -10, "origin_slot": 10, "target_id": 159, "target_slot": 3, "type": "INT"}, {"id": 410, "origin_id": -10, "origin_slot": 11, "target_id": 115, "target_slot": 2, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Pose to video"}]}, "config": {}, "extra": {"ds": {"scale": 1.3889423076923078, "offset": [217.0560747663551, -3703.3333333333335]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} +{ + "revision": 0, + "last_node_id": 143, + "last_link_id": 0, + "nodes": [ + { + "id": 143, + "type": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", + "pos": [ + 290, + 3960 + ], + "size": [ + 400, + 500 + ], + "flags": { + "collapsed": false + }, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "label": "control_images", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "label": "first_frame", + "name": "image_1", + "type": "IMAGE", + "link": null + }, + { + "label": "image_strength", + "name": "strength_1", + "type": "FLOAT", + "widget": { + "name": "strength_1" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "label": "control_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + }, + { + "label": "distill_lora", + "name": "lora_name_1", + "type": "COMBO", + "widget": { + "name": "lora_name_1" + }, + "link": null + }, + { + "label": "upscale_model", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "124", + "text" + ], + [ + "149", + "strength" + ], + [ + "126", + "noise_seed" + ], + [ + "103", + "ckpt_name" + ], + [ + "134", + "lora_name" + ], + [ + "97", + "text_encoder" + ], + [ + "105", + "lora_name" + ], + [ + "100", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": { + "lora_name": true, + "strength": true, + "bypass": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Pose to Video (LTX 2.0)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", + "version": 1, + "state": { + "lastGroupId": 14, + "lastNodeId": 701, + "lastLinkId": 1774, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Pose to Video (LTX 2.0)", + "inputNode": { + "id": -10, + "bounding": [ + -2050, + 4100, + 127.029296875, + 240 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1750, + 4090, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", + "name": "text", + "type": "STRING", + "linkIds": [ + 345 + ], + "pos": [ + -1942.970703125, + 4120 + ] + }, + { + "id": "35a07084-3ecf-482a-a330-b40278770ca3", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 348, + 380 + ], + "label": "control_images", + "pos": [ + -1942.970703125, + 4140 + ] + }, + { + "id": "bea20802-d654-4287-a8ef-0f834314bcf9", + "name": "image_1", + "type": "IMAGE", + "linkIds": [ + 364, + 379 + ], + "label": "first_frame", + "pos": [ + -1942.970703125, + 4160 + ] + }, + { + "id": "b9b4151d-df88-40c0-a2bd-6e35b94557fe", + "name": "strength_1", + "type": "FLOAT", + "linkIds": [ + 1758, + 1759 + ], + "label": "image_strength", + "pos": [ + -1942.970703125, + 4180 + ] + }, + { + "id": "b51f6a12-9152-4526-b115-443cfd23003f", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 1767 + ], + "pos": [ + -1942.970703125, + 4200 + ] + }, + { + "id": "47248f12-f174-4e35-854c-fa5eebea2903", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 1768, + 1770, + 1771 + ], + "pos": [ + -1942.970703125, + 4220 + ] + }, + { + "id": "6feb34cf-7972-4d3a-91fc-11070a84dc5f", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 1769 + ], + "label": "control_lora", + "pos": [ + -1942.970703125, + 4240 + ] + }, + { + "id": "6b423a3e-6c0e-445d-93c0-2cc3945400d1", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 1772 + ], + "pos": [ + -1942.970703125, + 4260 + ] + }, + { + "id": "ffd38c52-cc57-4e68-b140-94e7b03499b1", + "name": "lora_name_1", + "type": "COMBO", + "linkIds": [ + 1773 + ], + "label": "distill_lora", + "pos": [ + -1942.970703125, + 4280 + ] + }, + { + "id": "6d8b9605-acf0-4dd7-8d45-f824c2fd5895", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 1774 + ], + "label": "upscale_model", + "pos": [ + -1942.970703125, + 4300 + ] + } + ], + "outputs": [ + { + "id": "4e837941-de2d-4df8-8f94-686e24036897", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 304 + ], + "localized_name": "VIDEO", + "pos": [ + 1770, + 4110 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 93, + "type": "CFGGuider", + "pos": [ + -690, + 3710 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 326 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 309 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 311 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 261 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 94, + "type": "KSamplerSelect", + "pos": [ + -690, + 3940 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 262 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 99, + "type": "ManualSigmas", + "pos": [ + 450, + 3910 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 278 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 100, + "type": "LatentUpscaleModelLoader", + "pos": [ + -70, + 3790 + ], + "size": [ + 390, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 1774 + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "links": [ + 288 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LatentUpscaleModelLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", + "directory": "latent_upscale_models" + } + ] + }, + "widgets_values": [ + "ltx-2-spatial-upscaler-x2-1.0.safetensors" + ] + }, + { + "id": 101, + "type": "LTXVConcatAVLatent", + "pos": [ + 450, + 4220 + ], + "size": [ + 270, + 120 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 365 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 279 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 108, + "type": "CFGGuider", + "pos": [ + 450, + 3720 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 280 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 281 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 282 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 276 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 111, + "type": "LTXVEmptyLatentAudio", + "pos": [ + -1100, + 4940 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 285 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 329 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 354 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 300 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 123, + "type": "SamplerCustomAdvanced", + "pos": [ + -380, + 3530 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 260 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 261 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 262 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 263 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 323 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 272 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 114, + "type": "LTXVConditioning", + "pos": [ + -1130, + 4140 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 292 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 293 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 355 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 313 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 314 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 119, + "type": "CLIPTextEncode", + "pos": [ + -1160, + 3880 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 294 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 293 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 116, + "type": "LTXVConcatAVLatent", + "pos": [ + -520, + 4830 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 324 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 300 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 322, + 323 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 122, + "type": "LTXVSeparateAVLatent", + "pos": [ + -380, + 3810 + ], + "size": [ + 240, + 100 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 272 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 270 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 266 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 124, + "type": "CLIPTextEncode", + "pos": [ + -1170, + 3510 + ], + "size": [ + 410, + 320 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 295 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 345 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 292 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 98, + "type": "KSamplerSelect", + "pos": [ + 450, + 4070 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 277 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "gradient_estimation" + ] + }, + { + "id": 105, + "type": "LoraLoaderModelOnly", + "pos": [ + -70, + 3570 + ], + "size": [ + 390, + 140 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 327 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 1773 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 280 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-19b-distilled-lora-384.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "ltx-2-19b-distilled-lora-384.safetensors", + 1 + ] + }, + { + "id": 95, + "type": "LTXVScheduler", + "pos": [ + -690, + 4130 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "latent", + "name": "latent", + "shape": 7, + "type": "LATENT", + "link": 322 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "max_shift", + "name": "max_shift", + "type": "FLOAT", + "widget": { + "name": "max_shift" + }, + "link": null + }, + { + "localized_name": "base_shift", + "name": "base_shift", + "type": "FLOAT", + "widget": { + "name": "base_shift" + }, + "link": null + }, + { + "localized_name": "stretch", + "name": "stretch", + "type": "BOOLEAN", + "widget": { + "name": "stretch" + }, + "link": null + }, + { + "localized_name": "terminal", + "name": "terminal", + "type": "FLOAT", + "widget": { + "name": "terminal" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 263 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVScheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 126, + "type": "RandomNoise", + "pos": [ + -690, + 3520 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 1767 + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 260 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 107, + "type": "SamplerCustomAdvanced", + "pos": [ + 730, + 3570 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 347 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 276 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 277 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 278 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 279 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [ + 336 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 187, + "type": "RandomNoise", + "pos": [ + 450, + 3570 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 347 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "fixed" + ] + }, + { + "id": 139, + "type": "LTXVAudioVAEDecode", + "pos": [ + 1130, + 3840 + ], + "size": [ + 240, + 100 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 338 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 340 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 339 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 106, + "type": "CreateVideo", + "pos": [ + 1420, + 3760 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 352 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 339 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 356 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 304 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 134, + "type": "LoraLoaderModelOnly", + "pos": [ + -1650, + 3750 + ], + "size": [ + 420, + 140 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 325 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 1769 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 326, + 327 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-19b-ic-lora-pose-control.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Pose-Control/resolve/main/ltx-2-19b-ic-lora-pose-control.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "ltx-2-19b-ic-lora-pose-control.safetensors", + 1 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 138, + "type": "LTXVSeparateAVLatent", + "pos": [ + 740, + 3810 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 336 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 337, + 351 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 338 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 188, + "type": "VAEDecodeTiled", + "pos": [ + 1120, + 3640 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 351 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 353 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 352 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 512, + 64, + 4096, + 8 + ] + }, + { + "id": 113, + "type": "VAEDecode", + "pos": [ + 1130, + 3530 + ], + "size": [ + 240, + 100 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 337 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 291 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 145, + "type": "PrimitiveInt", + "pos": [ + -1610, + 4800 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 354 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24, + "fixed" + ] + }, + { + "id": 148, + "type": "PrimitiveFloat", + "pos": [ + -1610, + 4930 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 355, + 356 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 115, + "type": "EmptyLTXVLatentVideo", + "pos": [ + -1100, + 4740 + ], + "size": [ + 270, + 200 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 296 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 297 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 330 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 360 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.60", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 118, + "type": "Reroute", + "pos": [ + -350, + 3980 + ], + "size": [ + 230, + 40 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 303 + } + ], + "outputs": [ + { + "name": "", + "type": "VAE", + "links": [ + 289, + 291, + 367 + ] + } + ], + "properties": { + "showOutputText": false, + "horizontal": false, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 189, + "type": "LTXVImgToVideoInplace", + "pos": [ + 180, + 4040 + ], + "size": [ + 260, + 190 + ], + "flags": { + "collapsed": false + }, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 367 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 379 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 366 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 1759 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 368 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 365 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 104, + "type": "LTXVCropGuides", + "pos": [ + -90, + 4210 + ], + "size": [ + 240, + 120 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 310 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 312 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 270 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 281 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 282 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 287 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 112, + "type": "LTXVLatentUpsampler", + "pos": [ + -90, + 4030 + ], + "size": [ + 260, + 120 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 287 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 288 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 289 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 366 + ] + } + ], + "title": "spatial", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVLatentUpsampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 154, + "type": "MarkdownNote", + "pos": [ + -1640, + 5050 + ], + "size": [ + 350, + 170 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Frame Rate Note", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "Please make sure the frame rate value is the same in both boxes" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 96, + "type": "LTXVAudioVAELoader", + "pos": [ + -1650, + 3970 + ], + "size": [ + 420, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 1770 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 285, + 340 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.68", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAudioVAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + }, + { + "id": 97, + "type": "LTXAVTextEncoderLoader", + "pos": [ + -1650, + 4160 + ], + "size": [ + 420, + 150 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 1772 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 1771 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 294, + 295 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXAVTextEncoderLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + }, + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2-19b-dev-fp8.safetensors", + "default" + ] + }, + { + "id": 103, + "type": "CheckpointLoaderSimple", + "pos": [ + -1650, + 3520 + ], + "size": [ + 420, + 160 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 1768 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 325 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 303, + 328, + 353, + 359 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.56", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2-19b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2-19b-dev-fp8.safetensors" + ] + }, + { + "id": 110, + "type": "GetImageSize", + "pos": [ + -1610, + 4630 + ], + "size": [ + 260, + 120 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 381 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 296 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 297 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 329, + 330 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "GetImageSize", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 132, + "type": "LTXVAddGuide", + "pos": [ + -600, + 4550 + ], + "size": [ + 270, + 240 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 313 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 314 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 328 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 357 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 348 + }, + { + "localized_name": "frame_idx", + "name": "frame_idx", + "type": "INT", + "widget": { + "name": "frame_idx" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 309, + 310 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 311, + 312 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 324 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.75", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVAddGuide", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 149, + "type": "LTXVImgToVideoInplace", + "pos": [ + -1090, + 4530 + ], + "size": [ + 270, + 180 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 359 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 364 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 360 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": 1758 + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 357 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 155, + "type": "ImageScaleBy", + "pos": [ + -1620, + 4440 + ], + "size": [ + 280, + 140 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 380 + }, + { + "localized_name": "upscale_method", + "name": "upscale_method", + "type": "COMBO", + "widget": { + "name": "upscale_method" + }, + "link": null + }, + { + "localized_name": "scale_by", + "name": "scale_by", + "type": "FLOAT", + "widget": { + "name": "scale_by" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 381 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ImageScaleBy", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "lanczos", + 0.5 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -1660, + 3440, + 450, + 940 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Basic Sampling", + "bounding": [ + -700, + 3440, + 580, + 940 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -1180, + 3440, + 450, + 940 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Latent", + "bounding": [ + -1180, + 4420, + 1050, + 680 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 9, + "title": "Upscale Sampling(2x)", + "bounding": [ + -100, + 3440, + 1110, + 940 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Sampler", + "bounding": [ + 410, + 3480, + 590, + 880 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Model", + "bounding": [ + -90, + 3480, + 450, + 480 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Frame rate", + "bounding": [ + -1620, + 4730, + 290, + 271.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 326, + "origin_id": 134, + "origin_slot": 0, + "target_id": 93, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 309, + "origin_id": 132, + "origin_slot": 0, + "target_id": 93, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 311, + "origin_id": 132, + "origin_slot": 1, + "target_id": 93, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 266, + "origin_id": 122, + "origin_slot": 1, + "target_id": 101, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 280, + "origin_id": 105, + "origin_slot": 0, + "target_id": 108, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 281, + "origin_id": 104, + "origin_slot": 0, + "target_id": 108, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 282, + "origin_id": 104, + "origin_slot": 1, + "target_id": 108, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 285, + "origin_id": 96, + "origin_slot": 0, + "target_id": 111, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 329, + "origin_id": 110, + "origin_slot": 2, + "target_id": 111, + "target_slot": 1, + "type": "INT" + }, + { + "id": 260, + "origin_id": 126, + "origin_slot": 0, + "target_id": 123, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 261, + "origin_id": 93, + "origin_slot": 0, + "target_id": 123, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 262, + "origin_id": 94, + "origin_slot": 0, + "target_id": 123, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 263, + "origin_id": 95, + "origin_slot": 0, + "target_id": 123, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 323, + "origin_id": 116, + "origin_slot": 0, + "target_id": 123, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 296, + "origin_id": 110, + "origin_slot": 0, + "target_id": 115, + "target_slot": 0, + "type": "INT" + }, + { + "id": 297, + "origin_id": 110, + "origin_slot": 1, + "target_id": 115, + "target_slot": 1, + "type": "INT" + }, + { + "id": 330, + "origin_id": 110, + "origin_slot": 2, + "target_id": 115, + "target_slot": 2, + "type": "INT" + }, + { + "id": 325, + "origin_id": 103, + "origin_slot": 0, + "target_id": 134, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 292, + "origin_id": 124, + "origin_slot": 0, + "target_id": 114, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 293, + "origin_id": 119, + "origin_slot": 0, + "target_id": 114, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 294, + "origin_id": 97, + "origin_slot": 0, + "target_id": 119, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 324, + "origin_id": 132, + "origin_slot": 2, + "target_id": 116, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 300, + "origin_id": 111, + "origin_slot": 0, + "target_id": 116, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 313, + "origin_id": 114, + "origin_slot": 0, + "target_id": 132, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 314, + "origin_id": 114, + "origin_slot": 1, + "target_id": 132, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 328, + "origin_id": 103, + "origin_slot": 2, + "target_id": 132, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 272, + "origin_id": 123, + "origin_slot": 0, + "target_id": 122, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 336, + "origin_id": 107, + "origin_slot": 1, + "target_id": 138, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 339, + "origin_id": 139, + "origin_slot": 0, + "target_id": 106, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 295, + "origin_id": 97, + "origin_slot": 0, + "target_id": 124, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 303, + "origin_id": 103, + "origin_slot": 2, + "target_id": 118, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 338, + "origin_id": 138, + "origin_slot": 1, + "target_id": 139, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 340, + "origin_id": 96, + "origin_slot": 0, + "target_id": 139, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 337, + "origin_id": 138, + "origin_slot": 0, + "target_id": 113, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 291, + "origin_id": 118, + "origin_slot": 0, + "target_id": 113, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 276, + "origin_id": 108, + "origin_slot": 0, + "target_id": 107, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 277, + "origin_id": 98, + "origin_slot": 0, + "target_id": 107, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 278, + "origin_id": 99, + "origin_slot": 0, + "target_id": 107, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 279, + "origin_id": 101, + "origin_slot": 0, + "target_id": 107, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 327, + "origin_id": 134, + "origin_slot": 0, + "target_id": 105, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 310, + "origin_id": 132, + "origin_slot": 0, + "target_id": 104, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 312, + "origin_id": 132, + "origin_slot": 1, + "target_id": 104, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 270, + "origin_id": 122, + "origin_slot": 0, + "target_id": 104, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 287, + "origin_id": 104, + "origin_slot": 2, + "target_id": 112, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 288, + "origin_id": 100, + "origin_slot": 0, + "target_id": 112, + "target_slot": 1, + "type": "LATENT_UPSCALE_MODEL" + }, + { + "id": 289, + "origin_id": 118, + "origin_slot": 0, + "target_id": 112, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 322, + "origin_id": 116, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 304, + "origin_id": 106, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 345, + "origin_id": -10, + "origin_slot": 0, + "target_id": 124, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 347, + "origin_id": 187, + "origin_slot": 0, + "target_id": 107, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 348, + "origin_id": -10, + "origin_slot": 1, + "target_id": 132, + "target_slot": 4, + "type": "IMAGE" + }, + { + "id": 351, + "origin_id": 138, + "origin_slot": 0, + "target_id": 188, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 352, + "origin_id": 188, + "origin_slot": 0, + "target_id": 106, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 353, + "origin_id": 103, + "origin_slot": 2, + "target_id": 188, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 354, + "origin_id": 145, + "origin_slot": 0, + "target_id": 111, + "target_slot": 2, + "type": "INT" + }, + { + "id": 355, + "origin_id": 148, + "origin_slot": 0, + "target_id": 114, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 356, + "origin_id": 148, + "origin_slot": 0, + "target_id": 106, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 357, + "origin_id": 149, + "origin_slot": 0, + "target_id": 132, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 359, + "origin_id": 103, + "origin_slot": 2, + "target_id": 149, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 360, + "origin_id": 115, + "origin_slot": 0, + "target_id": 149, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 364, + "origin_id": -10, + "origin_slot": 2, + "target_id": 149, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 365, + "origin_id": 189, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 366, + "origin_id": 112, + "origin_slot": 0, + "target_id": 189, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 367, + "origin_id": 118, + "origin_slot": 0, + "target_id": 189, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 368, + "origin_id": -10, + "origin_slot": 4, + "target_id": 189, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 379, + "origin_id": -10, + "origin_slot": 2, + "target_id": 189, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 380, + "origin_id": -10, + "origin_slot": 1, + "target_id": 155, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 381, + "origin_id": 155, + "origin_slot": 0, + "target_id": 110, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 1758, + "origin_id": -10, + "origin_slot": 3, + "target_id": 149, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 1759, + "origin_id": -10, + "origin_slot": 3, + "target_id": 189, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 1767, + "origin_id": -10, + "origin_slot": 4, + "target_id": 126, + "target_slot": 0, + "type": "INT" + }, + { + "id": 1768, + "origin_id": -10, + "origin_slot": 5, + "target_id": 103, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 1769, + "origin_id": -10, + "origin_slot": 6, + "target_id": 134, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 1770, + "origin_id": -10, + "origin_slot": 5, + "target_id": 96, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 1771, + "origin_id": -10, + "origin_slot": 5, + "target_id": 97, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 1772, + "origin_id": -10, + "origin_slot": 7, + "target_id": 97, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 1773, + "origin_id": -10, + "origin_slot": 8, + "target_id": 105, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 1774, + "origin_id": -10, + "origin_slot": 9, + "target_id": 100, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Pose to video", + "description": "Generates video from pose reference frames using LTX-2, with optional synchronized audio." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Prompt Enhance.json b/blueprints/Prompt Enhance.json index 2612f66db..e260b1203 100644 --- a/blueprints/Prompt Enhance.json +++ b/blueprints/Prompt Enhance.json @@ -1 +1,279 @@ -{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}} +{ + "revision": 0, + "last_node_id": 15, + "last_link_id": 0, + "nodes": [ + { + "id": 15, + "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", + "pos": [ + -1490, + 2040 + ], + "size": [ + 400, + 260 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "label": "reference images", + "name": "images", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": null + } + ], + "title": "Prompt Enhance", + "properties": { + "proxyWidgets": [ + [ + "-1", + "prompt" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.14.1" + }, + "widgets_values": [ + "" + ] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 15, + "lastLinkId": 14, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Prompt Enhance", + "inputNode": { + "id": -10, + "bounding": [ + -2170, + 2110, + 138.876953125, + 80 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -640, + 2110, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "aeab7216-00e0-4528-a09b-bba50845c5a6", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 11 + ], + "pos": [ + -2051.123046875, + 2130 + ] + }, + { + "id": "7b73fd36-aa31-4771-9066-f6c83879994b", + "name": "images", + "type": "IMAGE", + "linkIds": [ + 14 + ], + "label": "reference images", + "pos": [ + -2051.123046875, + 2150 + ] + } + ], + "outputs": [ + { + "id": "c7b0d930-68a1-48d1-b496-0519e5837064", + "name": "STRING", + "type": "STRING", + "linkIds": [ + 13 + ], + "pos": [ + -620, + 2130 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 11, + "type": "GeminiNode", + "pos": [ + -1560, + 1990 + ], + "size": [ + 470, + 470 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "shape": 7, + "type": "IMAGE", + "link": 14 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "localized_name": "video", + "name": "video", + "shape": 7, + "type": "VIDEO", + "link": null + }, + { + "localized_name": "files", + "name": "files", + "shape": 7, + "type": "GEMINI_INPUT_FILES", + "link": null + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 11 + }, + { + "localized_name": "model", + "name": "model", + "type": "COMBO", + "widget": { + "name": "model" + }, + "link": null + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "system_prompt", + "name": "system_prompt", + "shape": 7, + "type": "STRING", + "widget": { + "name": "system_prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 13 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "GeminiNode" + }, + "widgets_values": [ + "", + "gemini-3-pro-preview", + 42, + "randomize", + "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only" + ], + "color": "#432", + "bgcolor": "#653" + } + ], + "groups": [], + "links": [ + { + "id": 11, + "origin_id": -10, + "origin_slot": 0, + "target_id": 11, + "target_slot": 4, + "type": "STRING" + }, + { + "id": 13, + "origin_id": 11, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 14, + "origin_id": -10, + "origin_slot": 1, + "target_id": 11, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Text generation/Prompt enhance", + "description": "Expands short text prompts into detailed descriptions using a text generation model for better generation quality." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Remove Background (BiRefNet).json b/blueprints/Remove Background (BiRefNet).json new file mode 100644 index 000000000..732a4adc4 --- /dev/null +++ b/blueprints/Remove Background (BiRefNet).json @@ -0,0 +1,397 @@ +{ + "revision": 0, + "last_node_id": 19, + "last_link_id": 0, + "nodes": [ + { + "id": 19, + "type": "5b40ca21-ba1a-41d5-b403-4d2d7acdc195", + "pos": [ + -6411.330578108367, + 1940.2638932730042 + ], + "size": [ + 349.609375, + 145.9375 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": null + }, + { + "name": "bg_removal_name", + "type": "COMBO", + "widget": { + "name": "bg_removal_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + }, + { + "name": "mask", + "type": "MASK", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "14", + "bg_removal_name" + ] + ] + }, + "widgets_values": [], + "title": "Remove Background (BiRefNet)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "5b40ca21-ba1a-41d5-b403-4d2d7acdc195", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 21, + "lastLinkId": 16, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Remove Background (BiRefNet)", + "description": "Removes or replaces image backgrounds using BiRefNet segmentation and alpha compositing.", + "inputNode": { + "id": -10, + "bounding": [ + -6728.534070722246, + 1475.2619799128663, + 150.9140625, + 88 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -6169.049695722246, + 1475.2619799128663, + 128, + 88 + ] + }, + "inputs": [ + { + "id": "7bc321cd-df31-4c39-aaf7-7f0d01326189", + "name": "image", + "type": "IMAGE", + "linkIds": [ + 5, + 7 + ], + "localized_name": "image", + "pos": [ + -6601.620008222246, + 1499.2619799128663 + ] + }, + { + "id": "e89d2cd8-daa3-4e29-8a69-851db85072cb", + "name": "bg_removal_name", + "type": "COMBO", + "linkIds": [ + 12 + ], + "pos": [ + -6601.620008222246, + 1519.2619799128663 + ] + } + ], + "outputs": [ + { + "id": "16e7863c-4c38-46c2-aa74-e82991fbfe8d", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 8 + ], + "localized_name": "IMAGE", + "pos": [ + -6145.049695722246, + 1499.2619799128663 + ] + }, + { + "id": "f7240c19-5b80-406e-a8e2-9b12440ee2d6", + "name": "mask", + "type": "MASK", + "linkIds": [ + 11 + ], + "pos": [ + -6145.049695722246, + 1519.2619799128663 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 13, + "type": "RemoveBackground", + "pos": [ + -6536.764823982709, + 1444.9963409012412 + ], + "size": [ + 302.25, + 72 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 5 + }, + { + "localized_name": "bg_removal_model", + "name": "bg_removal_model", + "type": "BACKGROUND_REMOVAL", + "link": 3 + } + ], + "outputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "links": [ + 4, + 11 + ] + } + ], + "properties": { + "Node name for S&R": "RemoveBackground" + } + }, + { + "id": 14, + "type": "LoadBackgroundRemovalModel", + "pos": [ + -6540.534070722246, + 1302.223464635445 + ], + "size": [ + 311.484375, + 85.515625 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "bg_removal_name", + "name": "bg_removal_name", + "type": "COMBO", + "widget": { + "name": "bg_removal_name" + }, + "link": 12 + } + ], + "outputs": [ + { + "localized_name": "bg_model", + "name": "bg_model", + "type": "BACKGROUND_REMOVAL", + "links": [ + 3 + ] + } + ], + "properties": { + "Node name for S&R": "LoadBackgroundRemovalModel", + "models": [ + { + "name": "birefnet.safetensors", + "url": "https://huggingface.co/Comfy-Org/BiRefNet/resolve/main/background_removal/birefnet.safetensors", + "directory": "background_removal" + } + ] + }, + "widgets_values": [ + "birefnet.safetensors" + ] + }, + { + "id": 15, + "type": "InvertMask", + "pos": [ + -6532.446160529669, + 1571.1111286839914 + ], + "size": [ + 285.984375, + 48 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 4 + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "InvertMask" + } + }, + { + "id": 16, + "type": "JoinImageWithAlpha", + "pos": [ + -6527.4370171636665, + 1674.3004951902876 + ], + "size": [ + 284.96875, + 72 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 7 + }, + { + "localized_name": "alpha", + "name": "alpha", + "type": "MASK", + "link": 6 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 8 + ] + } + ], + "properties": { + "Node name for S&R": "JoinImageWithAlpha" + } + } + ], + "groups": [], + "links": [ + { + "id": 3, + "origin_id": 14, + "origin_slot": 0, + "target_id": 13, + "target_slot": 1, + "type": "BACKGROUND_REMOVAL" + }, + { + "id": 4, + "origin_id": 13, + "origin_slot": 0, + "target_id": 15, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 6, + "origin_id": 15, + "origin_slot": 0, + "target_id": 16, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 5, + "origin_id": -10, + "origin_slot": 0, + "target_id": 13, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 7, + "origin_id": -10, + "origin_slot": 0, + "target_id": 16, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 8, + "origin_id": 16, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 11, + "origin_id": 13, + "origin_slot": 0, + "target_id": -20, + "target_slot": 1, + "type": "MASK" + }, + { + "id": 12, + "origin_id": -10, + "origin_slot": 1, + "target_id": 14, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Image generation and editing/Background Removal" + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Sharpen.json b/blueprints/Sharpen.json index a4accaf59..3c4099c6b 100644 --- a/blueprints/Sharpen.json +++ b/blueprints/Sharpen.json @@ -1 +1,310 @@ -{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}} +{ + "revision": 0, + "last_node_id": 25, + "last_link_id": 0, + "nodes": [ + { + "id": 25, + "type": "621ba4e2-22a8-482d-a369-023753198b7b", + "pos": [ + 4610, + -790 + ], + "size": [ + 230, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "label": "IMAGE", + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "title": "Sharpen", + "properties": { + "proxyWidgets": [ + [ + "24", + "value" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "621ba4e2-22a8-482d-a369-023753198b7b", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 24, + "lastLinkId": 36, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Sharpen", + "inputNode": { + "id": -10, + "bounding": [ + 4090, + -825, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 5150, + -825, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 34 + ], + "localized_name": "images.image0", + "label": "image", + "pos": [ + 4190, + -805 + ] + } + ], + "outputs": [ + { + "id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 35 + ], + "localized_name": "IMAGE0", + "label": "IMAGE", + "pos": [ + 5170, + -805 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 24, + "type": "PrimitiveFloat", + "pos": [ + 4280, + -1240 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "strength", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 36 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 3, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 0.5 + ] + }, + { + "id": 23, + "type": "GLSLShader", + "pos": [ + 4570, + -1240 + ], + "size": [ + 370, + 192 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 34 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 36 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 35 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 36, + "origin_id": 24, + "origin_slot": 0, + "target_id": 23, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 34, + "origin_id": -10, + "origin_slot": 0, + "target_id": 23, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 35, + "origin_id": 23, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Sharpen", + "description": "Sharpens image details using a GPU fragment shader for enhanced clarity." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Text to Audio (ACE-Step 1.5).json b/blueprints/Text to Audio (ACE-Step 1.5).json index 51e3bbed3..5b8b8626f 100644 --- a/blueprints/Text to Audio (ACE-Step 1.5).json +++ b/blueprints/Text to Audio (ACE-Step 1.5).json @@ -1 +1,1522 @@ -{"id": "67979fed-a490-450a-83f4-c7c0105d450e", "revision": 0, "last_node_id": 110, "last_link_id": 288, "nodes": [{"id": 21, "type": "510f6b52-34ee-40dd-b532-475497dee41b", "pos": [1810, -560], "size": [390, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "tags", "type": "STRING", "widget": {"name": "tags"}, "link": null}, {"name": "lyrics", "type": "STRING", "widget": {"name": "lyrics"}, "link": null}, {"name": "timesignature", "type": "COMBO", "widget": {"name": "timesignature"}, "link": null}, {"name": "language", "type": "COMBO", "widget": {"name": "language"}, "link": null}, {"name": "keyscale", "type": "COMBO", "widget": {"name": "keyscale"}, "link": null}, {"name": "generate_audio_codes", "type": "BOOLEAN", "widget": {"name": "generate_audio_codes"}, "link": null}, {"name": "cfg_scale", "type": "FLOAT", "widget": {"name": "cfg_scale"}, "link": null}, {"label": "duration", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name1", "type": "COMBO", "widget": {"name": "clip_name1"}, "link": null}, {"name": "clip_name2", "type": "COMBO", "widget": {"name": "clip_name2"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "AUDIO", "name": "AUDIO", "type": "AUDIO", "links": []}], "properties": {"proxyWidgets": [["-1", "tags"], ["-1", "lyrics"], ["-1", "language"], ["-1", "timesignature"], ["-1", "keyscale"], ["-1", "generate_audio_codes"], ["-1", "cfg_scale"], ["102", "value"], ["102", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name1"], ["-1", "clip_name2"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.12.3", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", "", "en", "4", "E minor", true, 2, null, null, "acestep_v1.5_turbo.safetensors", "qwen_0.6b_ace15.safetensors", "qwen_4b_ace15.safetensors", "ace_1.5_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "510f6b52-34ee-40dd-b532-475497dee41b", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 110, "lastLinkId": 288, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Text to Audio (ACE-Step 1.5)", "inputNode": {"id": -10, "bounding": [-660, -560, 167.458984375, 280]}, "outputNode": {"id": -20, "bounding": [1504.8375, -410, 120, 60]}, "inputs": [{"id": "ebc79d17-2e65-4e0f-855a-c9f2466a5fbf", "name": "tags", "type": "STRING", "linkIds": [264], "pos": [-512.541015625, -540]}, {"id": "230afdb4-a647-4fb7-a68c-a2204fd5d570", "name": "lyrics", "type": "STRING", "linkIds": [265], "pos": [-512.541015625, -520]}, {"id": "efdcbb48-231c-4757-b343-4458c011a283", "name": "timesignature", "type": "COMBO", "linkIds": [266], "pos": [-512.541015625, -500]}, {"id": "811579c1-2979-4721-a1e1-7d9352616e7b", "name": "language", "type": "COMBO", "linkIds": [267], "pos": [-512.541015625, -480]}, {"id": "76a68b0d-7a5f-43dc-873d-d78adf32895f", "name": "keyscale", "type": "COMBO", "linkIds": [268], "pos": [-512.541015625, -460]}, {"id": "11bb3297-272d-4c56-873a-2c974581e838", "name": "generate_audio_codes", "type": "BOOLEAN", "linkIds": [269], "pos": [-512.541015625, -440]}, {"id": "e5a30400-a8b0-422a-a0f3-21739727ab03", "name": "cfg_scale", "type": "FLOAT", "linkIds": [270], "pos": [-512.541015625, -420]}, {"id": "91a37ca5-e0d1-42c5-8248-419b850661a0", "name": "value", "type": "FLOAT", "linkIds": [284], "label": "duration", "pos": [-512.541015625, -400]}, {"id": "30f69f59-e916-48ab-9a5d-ae445b8d8a63", "name": "unet_name", "type": "COMBO", "linkIds": [285], "pos": [-512.541015625, -380]}, {"id": "1af0e8df-6fa7-4df2-b1b4-9c356a8f30a6", "name": "clip_name1", "type": "COMBO", "linkIds": [286], "pos": [-512.541015625, -360]}, {"id": "c7195505-9e83-4f87-b8d7-7747d808577d", "name": "clip_name2", "type": "COMBO", "linkIds": [287], "pos": [-512.541015625, -340]}, {"id": "ca4bd68f-e7c1-4d87-9914-cfe15c63b96e", "name": "vae_name", "type": "COMBO", "linkIds": [288], "pos": [-512.541015625, -320]}], "outputs": [{"id": "bfd748f6-f9ac-4588-81fa-41bde07a58fa", "name": "AUDIO", "type": "AUDIO", "linkIds": [263], "localized_name": "AUDIO", "pos": [1524.8375, -390]}], "widgets": [], "nodes": [{"id": 105, "type": "DualCLIPLoader", "pos": [-165, -660], "size": [380, 130], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name1", "name": "clip_name1", "type": "COMBO", "widget": {"name": "clip_name1"}, "link": 286}, {"localized_name": "clip_name2", "name": "clip_name2", "type": "COMBO", "widget": {"name": "clip_name2"}, "link": 287}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "DualCLIPLoader", "models": [{"name": "qwen_0.6b_ace15.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_0.6b_ace15.safetensors", "directory": "text_encoders"}, {"name": "qwen_4b_ace15.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_4b_ace15.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_0.6b_ace15.safetensors", "qwen_4b_ace15.safetensors", "ace", "default"]}, {"id": 106, "type": "VAELoader", "pos": [-165, -470], "size": [380, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 288}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "VAELoader", "models": [{"name": "ace_1.5_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/vae/ace_1.5_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ace_1.5_vae.safetensors"]}, {"id": 98, "type": "EmptyAceStep1.5LatentAudio", "pos": [-150, 10], "size": [314.90390625, 82], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "seconds", "name": "seconds", "type": "FLOAT", "widget": {"name": "seconds"}, "link": 279}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [249]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "EmptyAceStep1.5LatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [120, 1]}, {"id": 47, "type": "ConditioningZeroOut", "pos": [670, 50], "size": [204.75, 26], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 255}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [119]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 3, "type": "KSampler", "pos": [930, -680], "size": [329.39477481889753, 262], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 175}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 254}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 119}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 249}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": 258}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [256]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed", 8, 1, "euler", "simple", 1]}, {"id": 78, "type": "ModelSamplingAuraFlow", "pos": [930, -810], "size": [329.39477481889753, 60], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 260}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [175]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 18, "type": "VAEDecodeAudio", "pos": [1280, -800], "size": [164.8375, 46], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 256}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 262}], "outputs": [{"localized_name": "AUDIO", "name": "AUDIO", "type": "AUDIO", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "VAEDecodeAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 94, "type": "TextEncodeAceStepAudio1.5", "pos": [270, -790], "size": [611.9184354063266, 679.7643386829468], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 261}, {"localized_name": "tags", "name": "tags", "type": "STRING", "widget": {"name": "tags"}, "link": 264}, {"localized_name": "lyrics", "name": "lyrics", "type": "STRING", "widget": {"name": "lyrics"}, "link": 265}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": 257}, {"localized_name": "bpm", "name": "bpm", "type": "INT", "widget": {"name": "bpm"}, "link": null}, {"localized_name": "duration", "name": "duration", "type": "FLOAT", "widget": {"name": "duration"}, "link": 280}, {"localized_name": "timesignature", "name": "timesignature", "type": "COMBO", "widget": {"name": "timesignature"}, "link": 266}, {"localized_name": "language", "name": "language", "type": "COMBO", "widget": {"name": "language"}, "link": 267}, {"localized_name": "keyscale", "name": "keyscale", "type": "COMBO", "widget": {"name": "keyscale"}, "link": 268}, {"localized_name": "generate_audio_codes", "name": "generate_audio_codes", "type": "BOOLEAN", "widget": {"name": "generate_audio_codes"}, "link": 269}, {"localized_name": "cfg_scale", "name": "cfg_scale", "type": "FLOAT", "widget": {"name": "cfg_scale"}, "link": 270}, {"localized_name": "temperature", "name": "temperature", "type": "FLOAT", "widget": {"name": "temperature"}, "link": null}, {"localized_name": "top_p", "name": "top_p", "type": "FLOAT", "widget": {"name": "top_p"}, "link": null}, {"localized_name": "top_k", "name": "top_k", "type": "INT", "widget": {"name": "top_k"}, "link": null}, {"localized_name": "min_p", "name": "min_p", "type": "FLOAT", "widget": {"name": "min_p"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [254, 255]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "TextEncodeAceStepAudio1.5", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", "", 0, "fixed", 190, 120, "4", "en", "E minor", true, 2, 0.85, 0.9, 0, 0]}, {"id": 104, "type": "UNETLoader", "pos": [-170, -790], "size": [380, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 285}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "UNETLoader", "models": [{"name": "acestep_v1.5_turbo.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/diffusion_models/acestep_v1.5_turbo.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["acestep_v1.5_turbo.safetensors", "default"]}, {"id": 102, "type": "PrimitiveNode", "pos": [-120, -130], "size": [268.39945903485034, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [], "outputs": [{"name": "INT", "type": "INT", "widget": {"name": "seed"}, "links": [257, 258]}], "title": "seed", "properties": {"Run widget replace on values": false}, "widgets_values": [0, "randomize"]}, {"id": 110, "type": "PrimitiveFloat", "pos": [-120, -280], "size": [270, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": 284}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [279, 280]}], "title": "Song Duration", "properties": {"cnr_id": "comfy-core", "ver": "0.12.3", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [120]}], "groups": [{"id": 1, "title": "Step 1 - Load Models", "bounding": [-180, -860, 405, 461.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Step 2 - Duration", "bounding": [-180, -370, 400, 170], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Step3 - Prompt", "bounding": [260, -860, 640, 960], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 255, "origin_id": 94, "origin_slot": 0, "target_id": 47, "target_slot": 0, "type": "CONDITIONING"}, {"id": 175, "origin_id": 78, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 254, "origin_id": 94, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 119, "origin_id": 47, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 249, "origin_id": 98, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 258, "origin_id": 102, "origin_slot": 0, "target_id": 3, "target_slot": 4, "type": "INT"}, {"id": 260, "origin_id": 104, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "MODEL"}, {"id": 256, "origin_id": 3, "origin_slot": 0, "target_id": 18, "target_slot": 0, "type": "LATENT"}, {"id": 262, "origin_id": 106, "origin_slot": 0, "target_id": 18, "target_slot": 1, "type": "VAE"}, {"id": 261, "origin_id": 105, "origin_slot": 0, "target_id": 94, "target_slot": 0, "type": "CLIP"}, {"id": 257, "origin_id": 102, "origin_slot": 0, "target_id": 94, "target_slot": 3, "type": "INT"}, {"id": 263, "origin_id": 18, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "AUDIO"}, {"id": 264, "origin_id": -10, "origin_slot": 0, "target_id": 94, "target_slot": 1, "type": "STRING"}, {"id": 265, "origin_id": -10, "origin_slot": 1, "target_id": 94, "target_slot": 2, "type": "STRING"}, {"id": 266, "origin_id": -10, "origin_slot": 2, "target_id": 94, "target_slot": 6, "type": "COMBO"}, {"id": 267, "origin_id": -10, "origin_slot": 3, "target_id": 94, "target_slot": 7, "type": "COMBO"}, {"id": 268, "origin_id": -10, "origin_slot": 4, "target_id": 94, "target_slot": 8, "type": "COMBO"}, {"id": 269, "origin_id": -10, "origin_slot": 5, "target_id": 94, "target_slot": 9, "type": "BOOLEAN"}, {"id": 270, "origin_id": -10, "origin_slot": 6, "target_id": 94, "target_slot": 10, "type": "FLOAT"}, {"id": 279, "origin_id": 110, "origin_slot": 0, "target_id": 98, "target_slot": 0, "type": "FLOAT"}, {"id": 280, "origin_id": 110, "origin_slot": 0, "target_id": 94, "target_slot": 5, "type": "FLOAT"}, {"id": 284, "origin_id": -10, "origin_slot": 7, "target_id": 110, "target_slot": 0, "type": "FLOAT"}, {"id": 285, "origin_id": -10, "origin_slot": 8, "target_id": 104, "target_slot": 0, "type": "COMBO"}, {"id": 286, "origin_id": -10, "origin_slot": 9, "target_id": 105, "target_slot": 0, "type": "COMBO"}, {"id": 287, "origin_id": -10, "origin_slot": 10, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 288, "origin_id": -10, "origin_slot": 11, "target_id": 106, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Audio/Music generation"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.9575633843910519, "offset": [-950.8014851321678, 872.1540230582457]}}, "version": 0.4} +{ + "id": "67979fed-a490-450a-83f4-c7c0105d450e", + "revision": 0, + "last_node_id": 110, + "last_link_id": 288, + "nodes": [ + { + "id": 21, + "type": "510f6b52-34ee-40dd-b532-475497dee41b", + "pos": [ + 1810, + -560 + ], + "size": [ + 390, + 460 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "name": "tags", + "type": "STRING", + "widget": { + "name": "tags" + }, + "link": null + }, + { + "name": "lyrics", + "type": "STRING", + "widget": { + "name": "lyrics" + }, + "link": null + }, + { + "name": "timesignature", + "type": "COMBO", + "widget": { + "name": "timesignature" + }, + "link": null + }, + { + "name": "language", + "type": "COMBO", + "widget": { + "name": "language" + }, + "link": null + }, + { + "name": "keyscale", + "type": "COMBO", + "widget": { + "name": "keyscale" + }, + "link": null + }, + { + "name": "generate_audio_codes", + "type": "BOOLEAN", + "widget": { + "name": "generate_audio_codes" + }, + "link": null + }, + { + "name": "cfg_scale", + "type": "FLOAT", + "widget": { + "name": "cfg_scale" + }, + "link": null + }, + { + "label": "duration", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": null + }, + { + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "AUDIO", + "name": "AUDIO", + "type": "AUDIO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "tags" + ], + [ + "-1", + "lyrics" + ], + [ + "-1", + "language" + ], + [ + "-1", + "timesignature" + ], + [ + "-1", + "keyscale" + ], + [ + "-1", + "generate_audio_codes" + ], + [ + "-1", + "cfg_scale" + ], + [ + "102", + "value" + ], + [ + "102", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "clip_name1" + ], + [ + "-1", + "clip_name2" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.12.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "", + "en", + "4", + "E minor", + true, + 2, + null, + null, + "acestep_v1.5_turbo.safetensors", + "qwen_0.6b_ace15.safetensors", + "qwen_4b_ace15.safetensors", + "ace_1.5_vae.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "510f6b52-34ee-40dd-b532-475497dee41b", + "version": 1, + "state": { + "lastGroupId": 3, + "lastNodeId": 110, + "lastLinkId": 288, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Audio (ACE-Step 1.5)", + "inputNode": { + "id": -10, + "bounding": [ + -660, + -560, + 167.458984375, + 280 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1504.8375, + -410, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "ebc79d17-2e65-4e0f-855a-c9f2466a5fbf", + "name": "tags", + "type": "STRING", + "linkIds": [ + 264 + ], + "pos": [ + -512.541015625, + -540 + ] + }, + { + "id": "230afdb4-a647-4fb7-a68c-a2204fd5d570", + "name": "lyrics", + "type": "STRING", + "linkIds": [ + 265 + ], + "pos": [ + -512.541015625, + -520 + ] + }, + { + "id": "efdcbb48-231c-4757-b343-4458c011a283", + "name": "timesignature", + "type": "COMBO", + "linkIds": [ + 266 + ], + "pos": [ + -512.541015625, + -500 + ] + }, + { + "id": "811579c1-2979-4721-a1e1-7d9352616e7b", + "name": "language", + "type": "COMBO", + "linkIds": [ + 267 + ], + "pos": [ + -512.541015625, + -480 + ] + }, + { + "id": "76a68b0d-7a5f-43dc-873d-d78adf32895f", + "name": "keyscale", + "type": "COMBO", + "linkIds": [ + 268 + ], + "pos": [ + -512.541015625, + -460 + ] + }, + { + "id": "11bb3297-272d-4c56-873a-2c974581e838", + "name": "generate_audio_codes", + "type": "BOOLEAN", + "linkIds": [ + 269 + ], + "pos": [ + -512.541015625, + -440 + ] + }, + { + "id": "e5a30400-a8b0-422a-a0f3-21739727ab03", + "name": "cfg_scale", + "type": "FLOAT", + "linkIds": [ + 270 + ], + "pos": [ + -512.541015625, + -420 + ] + }, + { + "id": "91a37ca5-e0d1-42c5-8248-419b850661a0", + "name": "value", + "type": "FLOAT", + "linkIds": [ + 284 + ], + "label": "duration", + "pos": [ + -512.541015625, + -400 + ] + }, + { + "id": "30f69f59-e916-48ab-9a5d-ae445b8d8a63", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 285 + ], + "pos": [ + -512.541015625, + -380 + ] + }, + { + "id": "1af0e8df-6fa7-4df2-b1b4-9c356a8f30a6", + "name": "clip_name1", + "type": "COMBO", + "linkIds": [ + 286 + ], + "pos": [ + -512.541015625, + -360 + ] + }, + { + "id": "c7195505-9e83-4f87-b8d7-7747d808577d", + "name": "clip_name2", + "type": "COMBO", + "linkIds": [ + 287 + ], + "pos": [ + -512.541015625, + -340 + ] + }, + { + "id": "ca4bd68f-e7c1-4d87-9914-cfe15c63b96e", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 288 + ], + "pos": [ + -512.541015625, + -320 + ] + } + ], + "outputs": [ + { + "id": "bfd748f6-f9ac-4588-81fa-41bde07a58fa", + "name": "AUDIO", + "type": "AUDIO", + "linkIds": [ + 263 + ], + "localized_name": "AUDIO", + "pos": [ + 1524.8375, + -390 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 105, + "type": "DualCLIPLoader", + "pos": [ + -165, + -660 + ], + "size": [ + 380, + 130 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name1", + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": 286 + }, + { + "localized_name": "clip_name2", + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": 287 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 261 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "DualCLIPLoader", + "models": [ + { + "name": "qwen_0.6b_ace15.safetensors", + "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_0.6b_ace15.safetensors", + "directory": "text_encoders" + }, + { + "name": "qwen_4b_ace15.safetensors", + "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_4b_ace15.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_0.6b_ace15.safetensors", + "qwen_4b_ace15.safetensors", + "ace", + "default" + ] + }, + { + "id": 106, + "type": "VAELoader", + "pos": [ + -165, + -470 + ], + "size": [ + 380, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 288 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 262 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ace_1.5_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/vae/ace_1.5_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ace_1.5_vae.safetensors" + ] + }, + { + "id": 98, + "type": "EmptyAceStep1.5LatentAudio", + "pos": [ + -150, + 10 + ], + "size": [ + 314.90390625, + 82 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "seconds", + "name": "seconds", + "type": "FLOAT", + "widget": { + "name": "seconds" + }, + "link": 279 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 249 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "EmptyAceStep1.5LatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 120, + 1 + ] + }, + { + "id": 47, + "type": "ConditioningZeroOut", + "pos": [ + 670, + 50 + ], + "size": [ + 204.75, + 26 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 255 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 119 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 930, + -680 + ], + "size": [ + 329.39477481889753, + 262 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 175 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 254 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 249 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 258 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 256 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "fixed", + 8, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 78, + "type": "ModelSamplingAuraFlow", + "pos": [ + 930, + -810 + ], + "size": [ + 329.39477481889753, + 60 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 260 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 175 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 18, + "type": "VAEDecodeAudio", + "pos": [ + 1280, + -800 + ], + "size": [ + 164.8375, + 46 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 256 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 262 + } + ], + "outputs": [ + { + "localized_name": "AUDIO", + "name": "AUDIO", + "type": "AUDIO", + "links": [ + 263 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "VAEDecodeAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + }, + { + "id": 94, + "type": "TextEncodeAceStepAudio1.5", + "pos": [ + 270, + -790 + ], + "size": [ + 611.9184354063266, + 679.7643386829468 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 261 + }, + { + "localized_name": "tags", + "name": "tags", + "type": "STRING", + "widget": { + "name": "tags" + }, + "link": 264 + }, + { + "localized_name": "lyrics", + "name": "lyrics", + "type": "STRING", + "widget": { + "name": "lyrics" + }, + "link": 265 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 257 + }, + { + "localized_name": "bpm", + "name": "bpm", + "type": "INT", + "widget": { + "name": "bpm" + }, + "link": null + }, + { + "localized_name": "duration", + "name": "duration", + "type": "FLOAT", + "widget": { + "name": "duration" + }, + "link": 280 + }, + { + "localized_name": "timesignature", + "name": "timesignature", + "type": "COMBO", + "widget": { + "name": "timesignature" + }, + "link": 266 + }, + { + "localized_name": "language", + "name": "language", + "type": "COMBO", + "widget": { + "name": "language" + }, + "link": 267 + }, + { + "localized_name": "keyscale", + "name": "keyscale", + "type": "COMBO", + "widget": { + "name": "keyscale" + }, + "link": 268 + }, + { + "localized_name": "generate_audio_codes", + "name": "generate_audio_codes", + "type": "BOOLEAN", + "widget": { + "name": "generate_audio_codes" + }, + "link": 269 + }, + { + "localized_name": "cfg_scale", + "name": "cfg_scale", + "type": "FLOAT", + "widget": { + "name": "cfg_scale" + }, + "link": 270 + }, + { + "localized_name": "temperature", + "name": "temperature", + "type": "FLOAT", + "widget": { + "name": "temperature" + }, + "link": null + }, + { + "localized_name": "top_p", + "name": "top_p", + "type": "FLOAT", + "widget": { + "name": "top_p" + }, + "link": null + }, + { + "localized_name": "top_k", + "name": "top_k", + "type": "INT", + "widget": { + "name": "top_k" + }, + "link": null + }, + { + "localized_name": "min_p", + "name": "min_p", + "type": "FLOAT", + "widget": { + "name": "min_p" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 254, + 255 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "TextEncodeAceStepAudio1.5", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "", + "", + 0, + "fixed", + 190, + 120, + "4", + "en", + "E minor", + true, + 2, + 0.85, + 0.9, + 0, + 0 + ] + }, + { + "id": 104, + "type": "UNETLoader", + "pos": [ + -170, + -790 + ], + "size": [ + 380, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 285 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 260 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.1", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "acestep_v1.5_turbo.safetensors", + "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/diffusion_models/acestep_v1.5_turbo.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "acestep_v1.5_turbo.safetensors", + "default" + ] + }, + { + "id": 102, + "type": "PrimitiveNode", + "pos": [ + -120, + -130 + ], + "size": [ + 268.39945903485034, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "widget": { + "name": "seed" + }, + "links": [ + 257, + 258 + ] + } + ], + "title": "seed", + "properties": { + "Run widget replace on values": false + }, + "widgets_values": [ + 0, + "randomize" + ] + }, + { + "id": 110, + "type": "PrimitiveFloat", + "pos": [ + -120, + -280 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": 284 + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 279, + 280 + ] + } + ], + "title": "Song Duration", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 120 + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Step 1 - Load Models", + "bounding": [ + -180, + -860, + 405, + 461.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Step 2 - Duration", + "bounding": [ + -180, + -370, + 400, + 170 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Step3 - Prompt", + "bounding": [ + 260, + -860, + 640, + 960 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 255, + "origin_id": 94, + "origin_slot": 0, + "target_id": 47, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 175, + "origin_id": 78, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 254, + "origin_id": 94, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 119, + "origin_id": 47, + "origin_slot": 0, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 249, + "origin_id": 98, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 258, + "origin_id": 102, + "origin_slot": 0, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 260, + "origin_id": 104, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 256, + "origin_id": 3, + "origin_slot": 0, + "target_id": 18, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 262, + "origin_id": 106, + "origin_slot": 0, + "target_id": 18, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 261, + "origin_id": 105, + "origin_slot": 0, + "target_id": 94, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 257, + "origin_id": 102, + "origin_slot": 0, + "target_id": 94, + "target_slot": 3, + "type": "INT" + }, + { + "id": 263, + "origin_id": 18, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "AUDIO" + }, + { + "id": 264, + "origin_id": -10, + "origin_slot": 0, + "target_id": 94, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 265, + "origin_id": -10, + "origin_slot": 1, + "target_id": 94, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 266, + "origin_id": -10, + "origin_slot": 2, + "target_id": 94, + "target_slot": 6, + "type": "COMBO" + }, + { + "id": 267, + "origin_id": -10, + "origin_slot": 3, + "target_id": 94, + "target_slot": 7, + "type": "COMBO" + }, + { + "id": 268, + "origin_id": -10, + "origin_slot": 4, + "target_id": 94, + "target_slot": 8, + "type": "COMBO" + }, + { + "id": 269, + "origin_id": -10, + "origin_slot": 5, + "target_id": 94, + "target_slot": 9, + "type": "BOOLEAN" + }, + { + "id": 270, + "origin_id": -10, + "origin_slot": 6, + "target_id": 94, + "target_slot": 10, + "type": "FLOAT" + }, + { + "id": 279, + "origin_id": 110, + "origin_slot": 0, + "target_id": 98, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 280, + "origin_id": 110, + "origin_slot": 0, + "target_id": 94, + "target_slot": 5, + "type": "FLOAT" + }, + { + "id": 284, + "origin_id": -10, + "origin_slot": 7, + "target_id": 110, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 285, + "origin_id": -10, + "origin_slot": 8, + "target_id": 104, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 286, + "origin_id": -10, + "origin_slot": 9, + "target_id": 105, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 287, + "origin_id": -10, + "origin_slot": 10, + "target_id": 105, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 288, + "origin_id": -10, + "origin_slot": 11, + "target_id": 106, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Audio/Music generation", + "description": "Generates audio/music from text prompts using ACE-Step 1.5, a diffusion-based audio generation model." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG", + "ds": { + "scale": 0.9575633843910519, + "offset": [ + -950.8014851321678, + 872.1540230582457 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Text to Image (Ernie Image Turbo).json b/blueprints/Text to Image (Ernie Image Turbo).json new file mode 100644 index 000000000..4ecdd1883 --- /dev/null +++ b/blueprints/Text to Image (Ernie Image Turbo).json @@ -0,0 +1,2112 @@ +{ + "revision": 0, + "last_node_id": 88, + "last_link_id": 0, + "nodes": [ + { + "id": 88, + "type": "2a4f0815-c4d2-4e8b-9bdf-991a8403889d", + "pos": [ + -120, + 240 + ], + "size": [ + 400, + 540 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "prompt_enhancement", + "name": "value_1", + "type": "BOOLEAN", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "label": "prompt_enhancer", + "name": "clip_name_1", + "type": "COMBO", + "widget": { + "name": "clip_name_1" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "94", + "value" + ], + [ + "96", + "value" + ], + [ + "71", + "width" + ], + [ + "71", + "height" + ], + [ + "70", + "seed" + ], + [ + "66", + "unet_name" + ], + [ + "62", + "clip_name" + ], + [ + "98", + "clip_name" + ], + [ + "63", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": { + "value": true, + "value_1": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "title": "Text to Image (Ernie Image Turbo)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2a4f0815-c4d2-4e8b-9bdf-991a8403889d", + "version": 1, + "state": { + "lastGroupId": 7, + "lastNodeId": 103, + "lastLinkId": 134, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Ernie Image Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + -1350, + 370, + 163.50390625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1110, + 260, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "74a4609c-67df-4ae9-ab96-9ff4e3a1c3b1", + "name": "value", + "type": "STRING", + "linkIds": [ + 128 + ], + "label": "prompt", + "pos": [ + -1206.49609375, + 390 + ] + }, + { + "id": "996f1854-7ae3-450e-821c-a9b5b7c310f9", + "name": "value_1", + "type": "BOOLEAN", + "linkIds": [ + 127 + ], + "label": "prompt_enhancement", + "pos": [ + -1206.49609375, + 410 + ] + }, + { + "id": "71e9c6e8-4285-4543-b1d3-81520088f6a4", + "name": "width", + "type": "INT", + "linkIds": [ + 104, + 129 + ], + "pos": [ + -1206.49609375, + 430 + ] + }, + { + "id": "bdb6cd97-67d9-440c-8c4c-9b7a7540edd0", + "name": "height", + "type": "INT", + "linkIds": [ + 105, + 130 + ], + "pos": [ + -1206.49609375, + 450 + ] + }, + { + "id": "18abb56c-30bf-4de5-83c1-c12376e8d14e", + "name": "seed", + "type": "INT", + "linkIds": [ + 108 + ], + "pos": [ + -1206.49609375, + 470 + ] + }, + { + "id": "e5cd06f9-64ed-4778-97ba-b165f7a79c4e", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 109 + ], + "pos": [ + -1206.49609375, + 490 + ] + }, + { + "id": "06480e4c-4043-489b-ae68-1cf2b4246260", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 110 + ], + "pos": [ + -1206.49609375, + 510 + ] + }, + { + "id": "8d65d01b-16b2-420d-8b7b-42077c2e4976", + "name": "clip_name_1", + "type": "COMBO", + "linkIds": [ + 132 + ], + "label": "prompt_enhancer", + "pos": [ + -1206.49609375, + 530 + ] + }, + { + "id": "697f2fdb-0fd9-4008-a895-0f9ce9e8fd88", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 133 + ], + "pos": [ + -1206.49609375, + 550 + ] + } + ], + "outputs": [ + { + "id": "21d5fbe0-9f91-4d93-8ea8-5bbf2cd5b698", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 84 + ], + "localized_name": "IMAGE", + "pos": [ + 1130, + 280 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 71, + "type": "EmptyFlux2LatentImage", + "pos": [ + -470, + 1050 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 104 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 105 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 80 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyFlux2LatentImage", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 66, + "type": "UNETLoader", + "pos": [ + -470, + 320 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 109 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 85 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ernie-image-turbo.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image-turbo.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ernie-image-turbo.safetensors", + "default" + ] + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 710, + 280 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 73 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 74 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 84 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 70, + "type": "KSampler", + "pos": [ + 350, + 280 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 85 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 76 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 113 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 80 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 108 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 73 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 423299999918804, + "randomize", + 8, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + -140, + 320 + ], + "size": [ + 410, + 370 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 79 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 131 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 76, + 112 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 62, + "type": "CLIPLoader", + "pos": [ + -470, + 530 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 110 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 79 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ministral-3-3b.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ministral-3-3b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ministral-3-3b.safetensors", + "flux2", + "default" + ] + }, + { + "id": 63, + "type": "VAELoader", + "pos": [ + -470, + 780 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 133 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 74 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "flux2-vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/vae/flux2-vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "flux2-vae.safetensors" + ] + }, + { + "id": 91, + "type": "ConditioningZeroOut", + "pos": [ + 30, + 760 + ], + "size": [ + 230, + 80 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 112 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "ConditioningZeroOut", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 93, + "type": "StringReplace", + "pos": [ + -500, + -650 + ], + "size": [ + 430, + 450 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": null + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 115 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 121 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "[SYSTEM_PROMPT]你是一个专业的文生图 Prompt 增强助手。你将收到用户的简短图片描述及目标生成分辨率,请据此扩写为一段内容丰富、细节充分的视觉描述,以帮助文生图模型生成高质量的图片。仅输出增强后的描述,不要包含任何解释或前缀。[/SYSTEM_PROMPT][INST]{\"prompt\": \"{prompt}\", \"width\": {width}, \"height\": {height}}[/INST]", + "{prompt}", + "" + ] + }, + { + "id": 94, + "type": "PrimitiveStringMultiline", + "pos": [ + -950, + -660 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 128 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 115, + 118 + ] + } + ], + "title": "String (Multiline - Prompt)", + "properties": { + "Node name for S&R": "PrimitiveStringMultiline", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ] + }, + { + "id": 95, + "type": "TextGenerate", + "pos": [ + 530, + -660 + ], + "size": [ + 400, + 380 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 116 + }, + { + "localized_name": "image", + "name": "image", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 117 + }, + { + "localized_name": "max_length", + "name": "max_length", + "type": "INT", + "widget": { + "name": "max_length" + }, + "link": null + }, + { + "localized_name": "sampling_mode", + "name": "sampling_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "sampling_mode" + }, + "link": null + }, + { + "localized_name": "temperature", + "name": "sampling_mode.temperature", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.temperature" + }, + "link": null + }, + { + "localized_name": "top_k", + "name": "sampling_mode.top_k", + "type": "INT", + "widget": { + "name": "sampling_mode.top_k" + }, + "link": null + }, + { + "localized_name": "top_p", + "name": "sampling_mode.top_p", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.top_p" + }, + "link": null + }, + { + "localized_name": "min_p", + "name": "sampling_mode.min_p", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.min_p" + }, + "link": null + }, + { + "localized_name": "repetition_penalty", + "name": "sampling_mode.repetition_penalty", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.repetition_penalty" + }, + "link": null + }, + { + "localized_name": "seed", + "name": "sampling_mode.seed", + "type": "INT", + "widget": { + "name": "sampling_mode.seed" + }, + "link": null + }, + { + "localized_name": "sampling_mode.presence_penalty", + "name": "sampling_mode.presence_penalty", + "shape": 7, + "type": "FLOAT", + "widget": { + "name": "sampling_mode.presence_penalty" + }, + "link": null + }, + { + "localized_name": "thinking", + "name": "thinking", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "thinking" + }, + "link": null + }, + { + "localized_name": "use_default_template", + "name": "use_default_template", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "use_default_template" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "generated_text", + "name": "generated_text", + "type": "STRING", + "links": [ + 119 + ] + } + ], + "properties": { + "Node name for S&R": "TextGenerate", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + 2048, + "on", + 0.6, + 64, + 0.8, + 0.05, + 1.05, + 0, + 0, + false, + true + ] + }, + { + "id": 96, + "type": "PrimitiveBoolean", + "pos": [ + -490, + 60 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 127 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 120 + ] + } + ], + "title": "Enable prompt enhancement?", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + true + ] + }, + { + "id": 97, + "type": "ComfySwitchNode", + "pos": [ + 550, + -10 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 118 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 119 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 120 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 131, + 134 + ] + } + ], + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + false + ] + }, + { + "id": 98, + "type": "CLIPLoader", + "pos": [ + -490, + -150 + ], + "size": [ + 510, + 150 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 132 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 116 + ] + } + ], + "title": "Load CLIP (PE)", + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "models": [ + { + "name": "ernie-image-prompt-enhancer.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ernie-image-prompt-enhancer.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ernie-image-prompt-enhancer.safetensors", + "flux2", + "default" + ] + }, + { + "id": 99, + "type": "PreviewAny", + "pos": [ + -950, + -410 + ], + "size": [ + 400, + 180 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 129 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 122 + ] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 100, + "type": "PreviewAny", + "pos": [ + -950, + -190 + ], + "size": [ + 400, + 180 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 130 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 124 + ] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 101, + "type": "StringReplace", + "pos": [ + -30, + -650 + ], + "size": [ + 230, + 450 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 121 + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 122 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + "{width}", + "" + ] + }, + { + "id": 102, + "type": "StringReplace", + "pos": [ + 220, + -650 + ], + "size": [ + 250, + 450 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 123 + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 124 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 117 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + "{height}", + "" + ] + }, + { + "id": 103, + "type": "PreviewAny", + "pos": [ + 970, + -660 + ], + "size": [ + 570, + 790 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 134 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + } + ], + "groups": [ + { + "id": 6, + "title": "Text to Image", + "bounding": [ + -510, + 200, + 1450, + 1060 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image Size", + "bounding": [ + -490, + 950, + 300, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -160, + 250, + 470, + 670 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Model", + "bounding": [ + -490, + 250, + 300, + 670 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Prompt Enhancement", + "bounding": [ + -510, + -720, + 1450, + 890 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 73, + "origin_id": 70, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 74, + "origin_id": 63, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 85, + "origin_id": 66, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 76, + "origin_id": 67, + "origin_slot": 0, + "target_id": 70, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 80, + "origin_id": 71, + "origin_slot": 0, + "target_id": 70, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 79, + "origin_id": 62, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 84, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 104, + "origin_id": -10, + "origin_slot": 2, + "target_id": 71, + "target_slot": 0, + "type": "INT" + }, + { + "id": 105, + "origin_id": -10, + "origin_slot": 3, + "target_id": 71, + "target_slot": 1, + "type": "INT" + }, + { + "id": 108, + "origin_id": -10, + "origin_slot": 4, + "target_id": 70, + "target_slot": 4, + "type": "INT" + }, + { + "id": 109, + "origin_id": -10, + "origin_slot": 5, + "target_id": 66, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 110, + "origin_id": -10, + "origin_slot": 6, + "target_id": 62, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 112, + "origin_id": 67, + "origin_slot": 0, + "target_id": 91, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 113, + "origin_id": 91, + "origin_slot": 0, + "target_id": 70, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 115, + "origin_id": 94, + "origin_slot": 0, + "target_id": 93, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 116, + "origin_id": 98, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 117, + "origin_id": 102, + "origin_slot": 0, + "target_id": 95, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 118, + "origin_id": 94, + "origin_slot": 0, + "target_id": 97, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 119, + "origin_id": 95, + "origin_slot": 0, + "target_id": 97, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 120, + "origin_id": 96, + "origin_slot": 0, + "target_id": 97, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 121, + "origin_id": 93, + "origin_slot": 0, + "target_id": 101, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 122, + "origin_id": 99, + "origin_slot": 0, + "target_id": 101, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 123, + "origin_id": 101, + "origin_slot": 0, + "target_id": 102, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 124, + "origin_id": 100, + "origin_slot": 0, + "target_id": 102, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 127, + "origin_id": -10, + "origin_slot": 1, + "target_id": 96, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 128, + "origin_id": -10, + "origin_slot": 0, + "target_id": 94, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 129, + "origin_id": -10, + "origin_slot": 2, + "target_id": 99, + "target_slot": 0, + "type": "*" + }, + { + "id": 130, + "origin_id": -10, + "origin_slot": 3, + "target_id": 100, + "target_slot": 0, + "type": "*" + }, + { + "id": 131, + "origin_id": 97, + "origin_slot": 0, + "target_id": 67, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 132, + "origin_id": -10, + "origin_slot": 7, + "target_id": 98, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 133, + "origin_id": -10, + "origin_slot": 8, + "target_id": 63, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 134, + "origin_id": 97, + "origin_slot": 0, + "target_id": 103, + "target_slot": 0, + "type": "STRING" + } + ], + "extra": {}, + "category": "Image generation and editing/Text to image", + "description": "Faster ERNIE Image Turbo variant (~8B DiT, distilled for fewer sampling steps): same strengths in Chinese/English on-image text and layout-heavy graphics as the base ERNIE Image lineup, with bundled encoders and VAE." + } + ] + }, + "extra": { + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (Ernie Image).json b/blueprints/Text to Image (Ernie Image).json new file mode 100644 index 000000000..2bab20d69 --- /dev/null +++ b/blueprints/Text to Image (Ernie Image).json @@ -0,0 +1,2190 @@ +{ + "revision": 0, + "last_node_id": 88, + "last_link_id": 0, + "nodes": [ + { + "id": 88, + "type": "03921aea-a70e-44b4-bc77-f6bda10f2120", + "pos": [ + -120, + 240 + ], + "size": [ + 400, + 540 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "prompt_enhancement", + "name": "value_1", + "type": "BOOLEAN", + "widget": { + "name": "value_1" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "label": "prompt_enhancer", + "name": "clip_name_1", + "type": "COMBO", + "widget": { + "name": "clip_name_1" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "78", + "value" + ], + [ + "76", + "value" + ], + [ + "71", + "width" + ], + [ + "71", + "height" + ], + [ + "70", + "steps" + ], + [ + "70", + "cfg" + ], + [ + "70", + "seed" + ], + [ + "66", + "unet_name" + ], + [ + "62", + "clip_name" + ], + [ + "91", + "clip_name" + ], + [ + "63", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": { + "value": true, + "value_1": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "title": "Text to Image (Ernie Image)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "03921aea-a70e-44b4-bc77-f6bda10f2120", + "version": 1, + "state": { + "lastGroupId": 6, + "lastNodeId": 99, + "lastLinkId": 124, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Ernie Image)", + "inputNode": { + "id": -10, + "bounding": [ + -1350, + 370, + 163.50390625, + 260 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1110, + 260, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "504de359-52a4-49aa-b6be-23c1cdb0cbde", + "name": "value", + "type": "STRING", + "linkIds": [ + 102 + ], + "label": "prompt", + "pos": [ + -1206.49609375, + 390 + ] + }, + { + "id": "29f699c6-9263-41f6-b37d-69b9fc3913dd", + "name": "value_1", + "type": "BOOLEAN", + "linkIds": [ + 103 + ], + "label": "prompt_enhancement", + "pos": [ + -1206.49609375, + 410 + ] + }, + { + "id": "968e6213-d1e9-4268-8f47-1d6b9a39a43e", + "name": "width", + "type": "INT", + "linkIds": [ + 104, + 113 + ], + "pos": [ + -1206.49609375, + 430 + ] + }, + { + "id": "181c49ef-740d-4385-aa11-79718951ccb9", + "name": "height", + "type": "INT", + "linkIds": [ + 105, + 114 + ], + "pos": [ + -1206.49609375, + 450 + ] + }, + { + "id": "1e85f808-66a1-41df-be52-334142b35419", + "name": "steps", + "type": "INT", + "linkIds": [ + 106 + ], + "pos": [ + -1206.49609375, + 470 + ] + }, + { + "id": "2806addf-a252-4aa3-a5b7-397ab36dccec", + "name": "cfg", + "type": "FLOAT", + "linkIds": [ + 107 + ], + "pos": [ + -1206.49609375, + 490 + ] + }, + { + "id": "5d036a66-5dc0-4d7c-b9a9-349e454738aa", + "name": "seed", + "type": "INT", + "linkIds": [ + 108 + ], + "pos": [ + -1206.49609375, + 510 + ] + }, + { + "id": "360f9a40-aac5-4e9c-bc98-9d55a4a58be2", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 109 + ], + "pos": [ + -1206.49609375, + 530 + ] + }, + { + "id": "886301c7-6e88-4cec-96fa-8ae20e8340c5", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 110 + ], + "pos": [ + -1206.49609375, + 550 + ] + }, + { + "id": "1d73a545-6d01-462f-bc61-966d4b918ff2", + "name": "clip_name_1", + "type": "COMBO", + "linkIds": [ + 120 + ], + "label": "prompt_enhancer", + "pos": [ + -1206.49609375, + 570 + ] + }, + { + "id": "8c61dc8c-e260-4b36-b73e-d36f90a0bbe3", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 121 + ], + "pos": [ + -1206.49609375, + 590 + ] + } + ], + "outputs": [ + { + "id": "f4cb34c8-4090-4281-b428-7338a339d274", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 84 + ], + "localized_name": "IMAGE", + "pos": [ + 1130, + 280 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 71, + "type": "EmptyFlux2LatentImage", + "pos": [ + -460, + 1040 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 104 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 105 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 80 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyFlux2LatentImage", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 66, + "type": "UNETLoader", + "pos": [ + -470, + 320 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 109 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 85 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ernie-image.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/diffusion_models/ernie-image.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ernie-image.safetensors", + "default" + ] + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 710, + 280 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 73 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 74 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 84 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 70, + "type": "KSampler", + "pos": [ + 350, + 280 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 85 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 76 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 83 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 80 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 108 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 106 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 107 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 73 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + 182596410725960, + "randomize", + 20, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + -140, + 320 + ], + "size": [ + 410, + 370 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 79 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 100 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 76 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 72, + "type": "CLIPTextEncode", + "pos": [ + -130, + 770 + ], + "size": [ + 390, + 140 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 82 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 83 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 83, + "type": "StringReplace", + "pos": [ + -500, + -640 + ], + "size": [ + 430, + 450 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": null + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 92 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 115 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "[SYSTEM_PROMPT]你是一个专业的文生图 Prompt 增强助手。你将收到用户的简短图片描述及目标生成分辨率,请据此扩写为一段内容丰富、细节充分的视觉描述,以帮助文生图模型生成高质量的图片。仅输出增强后的描述,不要包含任何解释或前缀。[/SYSTEM_PROMPT][INST]{\"prompt\": \"{prompt}\", \"width\": {width}, \"height\": {height}}[/INST]", + "{prompt}", + "" + ] + }, + { + "id": 78, + "type": "PrimitiveStringMultiline", + "pos": [ + -950, + -650 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 102 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 87, + 92 + ] + } + ], + "title": "String (Multiline - Prompt)", + "properties": { + "Node name for S&R": "PrimitiveStringMultiline", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "" + ] + }, + { + "id": 74, + "type": "TextGenerate", + "pos": [ + 530, + -650 + ], + "size": [ + 400, + 380 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 112 + }, + { + "localized_name": "image", + "name": "image", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 119 + }, + { + "localized_name": "max_length", + "name": "max_length", + "type": "INT", + "widget": { + "name": "max_length" + }, + "link": null + }, + { + "localized_name": "sampling_mode", + "name": "sampling_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "sampling_mode" + }, + "link": null + }, + { + "localized_name": "temperature", + "name": "sampling_mode.temperature", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.temperature" + }, + "link": null + }, + { + "localized_name": "top_k", + "name": "sampling_mode.top_k", + "type": "INT", + "widget": { + "name": "sampling_mode.top_k" + }, + "link": null + }, + { + "localized_name": "top_p", + "name": "sampling_mode.top_p", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.top_p" + }, + "link": null + }, + { + "localized_name": "min_p", + "name": "sampling_mode.min_p", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.min_p" + }, + "link": null + }, + { + "localized_name": "repetition_penalty", + "name": "sampling_mode.repetition_penalty", + "type": "FLOAT", + "widget": { + "name": "sampling_mode.repetition_penalty" + }, + "link": null + }, + { + "localized_name": "seed", + "name": "sampling_mode.seed", + "type": "INT", + "widget": { + "name": "sampling_mode.seed" + }, + "link": null + }, + { + "localized_name": "sampling_mode.presence_penalty", + "name": "sampling_mode.presence_penalty", + "shape": 7, + "type": "FLOAT", + "widget": { + "name": "sampling_mode.presence_penalty" + }, + "link": null + }, + { + "localized_name": "thinking", + "name": "thinking", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "thinking" + }, + "link": null + }, + { + "localized_name": "use_default_template", + "name": "use_default_template", + "shape": 7, + "type": "BOOLEAN", + "widget": { + "name": "use_default_template" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "generated_text", + "name": "generated_text", + "type": "STRING", + "links": [ + 89 + ] + } + ], + "properties": { + "Node name for S&R": "TextGenerate", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + 2048, + "on", + 0.6, + 64, + 0.8, + 0.05, + 1.05, + 0, + 0, + false, + true + ] + }, + { + "id": 76, + "type": "PrimitiveBoolean", + "pos": [ + -500, + 60 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 103 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 88 + ] + } + ], + "title": "Enable prompt enhancement?", + "properties": { + "Node name for S&R": "PrimitiveBoolean", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + true + ] + }, + { + "id": 75, + "type": "ComfySwitchNode", + "pos": [ + 530, + 20 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 87 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 89 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 88 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 100, + 124 + ] + } + ], + "properties": { + "Node name for S&R": "ComfySwitchNode", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + false + ] + }, + { + "id": 62, + "type": "CLIPLoader", + "pos": [ + -460, + 520 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 110 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 79, + 82 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ministral-3-3b.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ministral-3-3b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ministral-3-3b.safetensors", + "flux2", + "default" + ] + }, + { + "id": 63, + "type": "VAELoader", + "pos": [ + -460, + 770 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 121 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 74 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "flux2-vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/vae/flux2-vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "flux2-vae.safetensors" + ] + }, + { + "id": 91, + "type": "CLIPLoader", + "pos": [ + -500, + -150 + ], + "size": [ + 510, + 150 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 120 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 112 + ] + } + ], + "title": "Load CLIP (PE)", + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "models": [ + { + "name": "ernie-image-prompt-enhancer.safetensors", + "url": "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ernie-image-prompt-enhancer.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "ernie-image-prompt-enhancer.safetensors", + "flux2", + "default" + ] + }, + { + "id": 92, + "type": "PreviewAny", + "pos": [ + -950, + -400 + ], + "size": [ + 400, + 180 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 113 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 116 + ] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 93, + "type": "PreviewAny", + "pos": [ + -950, + -180 + ], + "size": [ + 400, + 180 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 114 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 118 + ] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + }, + { + "id": 94, + "type": "StringReplace", + "pos": [ + -30, + -640 + ], + "size": [ + 230, + 450 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 115 + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 116 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 117 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + "{width}", + "" + ] + }, + { + "id": 95, + "type": "StringReplace", + "pos": [ + 220, + -640 + ], + "size": [ + 250, + 450 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "string", + "name": "string", + "type": "STRING", + "widget": { + "name": "string" + }, + "link": 117 + }, + { + "localized_name": "find", + "name": "find", + "type": "STRING", + "widget": { + "name": "find" + }, + "link": null + }, + { + "localized_name": "replace", + "name": "replace", + "type": "STRING", + "widget": { + "name": "replace" + }, + "link": 118 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 119 + ] + } + ], + "properties": { + "Node name for S&R": "StringReplace", + "cnr_id": "comfy-core", + "ver": "0.18.1", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "", + "{height}", + "" + ] + }, + { + "id": 97, + "type": "PreviewAny", + "pos": [ + 970, + -650 + ], + "size": [ + 570, + 790 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "source", + "name": "source", + "type": "*", + "link": 124 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [] + } + ], + "title": "Preview as Text (Int to String)", + "properties": { + "Node name for S&R": "PreviewAny", + "cnr_id": "comfy-core", + "ver": "0.19.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + null, + null, + null + ] + } + ], + "groups": [ + { + "id": 6, + "title": "Text to Image", + "bounding": [ + -510, + 200, + 1450, + 1060 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image Size", + "bounding": [ + -480, + 940, + 310, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -160, + 250, + 470, + 670 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Model", + "bounding": [ + -490, + 250, + 320, + 670 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Prompt Enhancement", + "bounding": [ + -510, + -720, + 1450, + 890 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 73, + "origin_id": 70, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 74, + "origin_id": 63, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 85, + "origin_id": 66, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 76, + "origin_id": 67, + "origin_slot": 0, + "target_id": 70, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 83, + "origin_id": 72, + "origin_slot": 0, + "target_id": 70, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 80, + "origin_id": 71, + "origin_slot": 0, + "target_id": 70, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 79, + "origin_id": 62, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 100, + "origin_id": 75, + "origin_slot": 0, + "target_id": 67, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 82, + "origin_id": 62, + "origin_slot": 0, + "target_id": 72, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 92, + "origin_id": 78, + "origin_slot": 0, + "target_id": 83, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 87, + "origin_id": 78, + "origin_slot": 0, + "target_id": 75, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 89, + "origin_id": 74, + "origin_slot": 0, + "target_id": 75, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 88, + "origin_id": 76, + "origin_slot": 0, + "target_id": 75, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 84, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 102, + "origin_id": -10, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 103, + "origin_id": -10, + "origin_slot": 1, + "target_id": 76, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 104, + "origin_id": -10, + "origin_slot": 2, + "target_id": 71, + "target_slot": 0, + "type": "INT" + }, + { + "id": 105, + "origin_id": -10, + "origin_slot": 3, + "target_id": 71, + "target_slot": 1, + "type": "INT" + }, + { + "id": 106, + "origin_id": -10, + "origin_slot": 4, + "target_id": 70, + "target_slot": 5, + "type": "INT" + }, + { + "id": 107, + "origin_id": -10, + "origin_slot": 5, + "target_id": 70, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 108, + "origin_id": -10, + "origin_slot": 6, + "target_id": 70, + "target_slot": 4, + "type": "INT" + }, + { + "id": 109, + "origin_id": -10, + "origin_slot": 7, + "target_id": 66, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 110, + "origin_id": -10, + "origin_slot": 8, + "target_id": 62, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 112, + "origin_id": 91, + "origin_slot": 0, + "target_id": 74, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 113, + "origin_id": -10, + "origin_slot": 2, + "target_id": 92, + "target_slot": 0, + "type": "*" + }, + { + "id": 114, + "origin_id": -10, + "origin_slot": 3, + "target_id": 93, + "target_slot": 0, + "type": "*" + }, + { + "id": 115, + "origin_id": 83, + "origin_slot": 0, + "target_id": 94, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 116, + "origin_id": 92, + "origin_slot": 0, + "target_id": 94, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 117, + "origin_id": 94, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 118, + "origin_id": 93, + "origin_slot": 0, + "target_id": 95, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 119, + "origin_id": 95, + "origin_slot": 0, + "target_id": 74, + "target_slot": 2, + "type": "STRING" + }, + { + "id": 120, + "origin_id": -10, + "origin_slot": 9, + "target_id": 91, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 121, + "origin_id": -10, + "origin_slot": 10, + "target_id": 63, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 124, + "origin_id": 75, + "origin_slot": 0, + "target_id": 97, + "target_slot": 0, + "type": "STRING" + } + ], + "extra": {}, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using Baidu’s open ERNIE Image (~8B DiT): bilingual in-image typography and layouts (posters, infographics, multi-panel compositions) alongside general scenes, with bundled encoders and VAE." + } + ] + }, + "extra": { + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (Flux.1 Dev).json b/blueprints/Text to Image (Flux.1 Dev).json new file mode 100644 index 000000000..6d8446e81 --- /dev/null +++ b/blueprints/Text to Image (Flux.1 Dev).json @@ -0,0 +1,1047 @@ +{ + "revision": 0, + "last_node_id": 193, + "last_link_id": 0, + "nodes": [ + { + "id": 193, + "type": "1fd98b34-59ef-4d8d-afbf-58bdd7a1cd35", + "pos": [ + -1210, + -1770 + ], + "size": [ + 400, + 380 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": null + }, + { + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "45", + "text" + ], + [ + "27", + "width" + ], + [ + "27", + "height" + ], + [ + "31", + "seed" + ], + [ + "38", + "unet_name" + ], + [ + "40", + "clip_name1" + ], + [ + "40", + "clip_name2" + ], + [ + "39", + "vae_name" + ], + [ + "31", + "control_after_generate" + ] + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "Text to Image (Flux.1 Dev)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "1fd98b34-59ef-4d8d-afbf-58bdd7a1cd35", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 193, + "lastLinkId": 388, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Flux.1 Dev)", + "inputNode": { + "id": -10, + "bounding": [ + -1090, + 411, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 540, + 100, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "669e384e-5e26-4291-9bac-e1d1f04b4a16", + "name": "text", + "type": "STRING", + "linkIds": [ + 68 + ], + "label": "prompt", + "pos": [ + -990, + 431 + ] + }, + { + "id": "5a5c0b01-5836-4ca6-a24f-68c0a4fb9802", + "name": "width", + "type": "INT", + "linkIds": [ + 69 + ], + "pos": [ + -990, + 451 + ] + }, + { + "id": "5e01104a-ed7f-457b-aaee-934e8ecc088d", + "name": "height", + "type": "INT", + "linkIds": [ + 70 + ], + "pos": [ + -990, + 471 + ] + }, + { + "id": "ea5ea317-a484-4605-8138-8628a4b8e502", + "name": "seed", + "type": "INT", + "linkIds": [ + 382 + ], + "pos": [ + -990, + 491 + ] + }, + { + "id": "ea2332f5-bd49-4e2e-8c7a-95817dc56ed6", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 385 + ], + "pos": [ + -990, + 511 + ] + }, + { + "id": "4fca3f43-c05f-4337-bf84-2afe67e43739", + "name": "clip_name1", + "type": "COMBO", + "linkIds": [ + 386 + ], + "pos": [ + -990, + 531 + ] + }, + { + "id": "357a679f-1370-4cd5-9269-0d5ae1986b49", + "name": "clip_name2", + "type": "COMBO", + "linkIds": [ + 387 + ], + "pos": [ + -990, + 551 + ] + }, + { + "id": "924ffec5-81f8-4585-8761-5a80d5d775bc", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 388 + ], + "pos": [ + -990, + 571 + ] + } + ], + "outputs": [ + { + "id": "2185cb4d-8689-4cf8-b345-75319fb46a8e", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 9 + ], + "localized_name": "IMAGE", + "pos": [ + 560, + 120 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 39, + "type": "VAELoader", + "pos": [ + -800, + 670 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 388 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 58 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 38, + "type": "UNETLoader", + "pos": [ + -800, + 160 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 385 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 61 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "flux1-dev.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux1-dev/resolve/main/flux1-dev.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "flux1-dev.safetensors", + "default" + ] + }, + { + "id": 40, + "type": "DualCLIPLoader", + "pos": [ + -800, + 380 + ], + "size": [ + 270, + 180 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name1", + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": 386 + }, + { + "localized_name": "clip_name2", + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": 387 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 64 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "DualCLIPLoader", + "models": [ + { + "name": "clip_l.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors", + "directory": "text_encoders" + }, + { + "name": "t5xxl_fp16.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "clip_l.safetensors", + "t5xxl_fp16.safetensors", + "flux", + "default" + ] + }, + { + "id": 27, + "type": "EmptySD3LatentImage", + "pos": [ + -420, + 640 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 69 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 70 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 51 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "EmptySD3LatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 45, + "type": "CLIPTextEncode", + "pos": [ + -460, + 150 + ], + "size": [ + 330, + 220 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 64 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 68 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 65, + 66 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 31, + "type": "KSampler", + "pos": [ + -50, + 260 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 61 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 65 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 63 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 51 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 382 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 0, + "randomize", + 20, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 20, + 120 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 52 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 58 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 9 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 42, + "type": "ConditioningZeroOut", + "pos": [ + -350, + 420 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 66 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "ConditioningZeroOut" + } + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -820, + 70, + 320, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image Size", + "bounding": [ + -470, + 570, + 380, + 250 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -470, + 70, + 380, + 470 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 52, + "origin_id": 31, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 58, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 61, + "origin_id": 38, + "origin_slot": 0, + "target_id": 31, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 63, + "origin_id": 42, + "origin_slot": 0, + "target_id": 31, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 51, + "origin_id": 27, + "origin_slot": 0, + "target_id": 31, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 9, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 64, + "origin_id": 40, + "origin_slot": 0, + "target_id": 45, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 65, + "origin_id": 45, + "origin_slot": 0, + "target_id": 31, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 66, + "origin_id": 45, + "origin_slot": 0, + "target_id": 42, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 68, + "origin_id": -10, + "origin_slot": 0, + "target_id": 45, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 69, + "origin_id": -10, + "origin_slot": 1, + "target_id": 27, + "target_slot": 0, + "type": "INT" + }, + { + "id": 70, + "origin_id": -10, + "origin_slot": 2, + "target_id": 27, + "target_slot": 1, + "type": "INT" + }, + { + "id": 382, + "origin_id": -10, + "origin_slot": 3, + "target_id": 31, + "target_slot": 4, + "type": "INT" + }, + { + "id": 385, + "origin_id": -10, + "origin_slot": 4, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 386, + "origin_id": -10, + "origin_slot": 5, + "target_id": 40, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 387, + "origin_id": -10, + "origin_slot": 6, + "target_id": 40, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 388, + "origin_id": -10, + "origin_slot": 7, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from prompts using FLUX.1 [dev]: a 12B rectified-flow MMDiT with dual CLIP plus T5-XXL text encoders and guidance-distilled sampling for sharp prompt following versus classic DDPM diffusion." + } + ] + }, + "extra": { + "ds": { + "scale": 0.7513148009015777, + "offset": [ + 1726.1426909346173, + 146.66925047394233 + ] + }, + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (Flux.1 Krea Dev).json b/blueprints/Text to Image (Flux.1 Krea Dev).json new file mode 100644 index 000000000..0d7fa03c4 --- /dev/null +++ b/blueprints/Text to Image (Flux.1 Krea Dev).json @@ -0,0 +1,1041 @@ +{ + "revision": 0, + "last_node_id": 196, + "last_link_id": 0, + "nodes": [ + { + "id": 196, + "type": "aa0a207e-bf0e-477c-a87f-f58fcf5f7749", + "pos": [ + 1010, + 130 + ], + "size": [ + 410, + 460 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": null + }, + { + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "195", + "text" + ], + [ + "27", + "width" + ], + [ + "27", + "height" + ], + [ + "31", + "seed" + ], + [ + "38", + "unet_name" + ], + [ + "40", + "clip_name1" + ], + [ + "40", + "clip_name2" + ], + [ + "39", + "vae_name" + ] + ], + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1" + }, + "widgets_values": [], + "title": "Text to Image (Flux.1 Krea Dev)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "aa0a207e-bf0e-477c-a87f-f58fcf5f7749", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 196, + "lastLinkId": 395, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Flux.1 Krea Dev)", + "inputNode": { + "id": -10, + "bounding": [ + -1050, + 426, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 620, + 140, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "c2515318-6e10-4ad9-9466-e6aa855bc849", + "name": "text", + "type": "STRING", + "linkIds": [ + 71 + ], + "pos": [ + -950, + 446 + ] + }, + { + "id": "09f20672-c8a3-4180-823a-5a6af0113e4f", + "name": "width", + "type": "INT", + "linkIds": [ + 72 + ], + "pos": [ + -950, + 466 + ] + }, + { + "id": "7f54c952-896e-4356-bfb2-970e1c8f2eb7", + "name": "height", + "type": "INT", + "linkIds": [ + 73 + ], + "pos": [ + -950, + 486 + ] + }, + { + "id": "e2dc1c86-2fb4-4b80-b560-f30560af1897", + "name": "seed", + "type": "INT", + "linkIds": [ + 391 + ], + "pos": [ + -950, + 506 + ] + }, + { + "id": "34b172e7-85b2-444a-9a4d-1221f272c46e", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 392 + ], + "pos": [ + -950, + 526 + ] + }, + { + "id": "073b7440-d943-4a2f-a3a1-fbdb8fcda9f9", + "name": "clip_name1", + "type": "COMBO", + "linkIds": [ + 393 + ], + "pos": [ + -950, + 546 + ] + }, + { + "id": "55c1286a-4aca-41fc-b967-ae3d3fa7bc85", + "name": "clip_name2", + "type": "COMBO", + "linkIds": [ + 394 + ], + "pos": [ + -950, + 566 + ] + }, + { + "id": "2241e4fc-9219-4be7-bf6d-3493b579ab5a", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 395 + ], + "pos": [ + -950, + 586 + ] + } + ], + "outputs": [ + { + "id": "5310184a-f0a2-405f-9917-dd2a352a4fac", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 9 + ], + "localized_name": "IMAGE", + "pos": [ + 640, + 160 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 40, + "type": "DualCLIPLoader", + "pos": [ + -780, + 360 + ], + "size": [ + 270, + 180 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name1", + "name": "clip_name1", + "type": "COMBO", + "widget": { + "name": "clip_name1" + }, + "link": 393 + }, + { + "localized_name": "clip_name2", + "name": "clip_name2", + "type": "COMBO", + "widget": { + "name": "clip_name2" + }, + "link": 394 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 64 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "DualCLIPLoader", + "models": [ + { + "name": "clip_l.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors", + "directory": "text_encoders" + }, + { + "name": "t5xxl_fp16.safetensors", + "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "clip_l.safetensors", + "t5xxl_fp16.safetensors", + "flux", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + -770, + 630 + ], + "size": [ + 240, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 395 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 58 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 38, + "type": "UNETLoader", + "pos": [ + -780, + 170 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 392 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 61 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "flux1-krea-dev_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/FLUX.1-Krea-dev_ComfyUI/resolve/main/split_files/diffusion_models/flux1-krea-dev_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "flux1-krea-dev_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 195, + "type": "CLIPTextEncode", + "pos": [ + -440, + 180 + ], + "size": [ + 330, + 210 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 64 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 71 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 65, + 66 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.47", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 27, + "type": "EmptySD3LatentImage", + "pos": [ + -390, + 650 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 72 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 73 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 51 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "EmptySD3LatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 31, + "type": "KSampler", + "pos": [ + 0, + 130 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 61 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 65 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 63 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 51 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 391 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 0, + "randomize", + 20, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 340, + 140 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 52 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 58 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 9 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 42, + "type": "ConditioningZeroOut", + "pos": [ + -340, + 430 + ], + "size": [ + 230, + 80 + ], + "flags": { + "collapsed": false + }, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 66 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "ConditioningZeroOut" + } + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -800, + 90, + 310, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image Size", + "bounding": [ + -460, + 560, + 400, + 280 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -460, + 90, + 400, + 440 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 66, + "origin_id": 195, + "origin_slot": 0, + "target_id": 42, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 52, + "origin_id": 31, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 58, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 61, + "origin_id": 38, + "origin_slot": 0, + "target_id": 31, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 65, + "origin_id": 195, + "origin_slot": 0, + "target_id": 31, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 63, + "origin_id": 42, + "origin_slot": 0, + "target_id": 31, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 51, + "origin_id": 27, + "origin_slot": 0, + "target_id": 31, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 64, + "origin_id": 40, + "origin_slot": 0, + "target_id": 195, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 9, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 71, + "origin_id": -10, + "origin_slot": 0, + "target_id": 195, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 72, + "origin_id": -10, + "origin_slot": 1, + "target_id": 27, + "target_slot": 0, + "type": "INT" + }, + { + "id": 73, + "origin_id": -10, + "origin_slot": 2, + "target_id": 27, + "target_slot": 1, + "type": "INT" + }, + { + "id": 391, + "origin_id": -10, + "origin_slot": 3, + "target_id": 31, + "target_slot": 4, + "type": "INT" + }, + { + "id": 392, + "origin_id": -10, + "origin_slot": 4, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 393, + "origin_id": -10, + "origin_slot": 5, + "target_id": 40, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 394, + "origin_id": -10, + "origin_slot": 6, + "target_id": 40, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 395, + "origin_id": -10, + "origin_slot": 7, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "FLUX.1 Krea [dev] (Black Forest Labs × Krea): open-weight 12B rectified-flow text-to-image drop-in alongside FLUX.1 [dev], tuned away from overcooked saturation toward more natural diversity in people, realism, and style while keeping ecosystem compatibility." + } + ] + }, + "extra": { + "ds": { + "scale": 0.735584459955559, + "offset": [ + 1936.5815687336737, + 303.78330847702625 + ] + }, + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (Flux.2 Dev).json b/blueprints/Text to Image (Flux.2 Dev).json new file mode 100644 index 000000000..d5ca3077d --- /dev/null +++ b/blueprints/Text to Image (Flux.2 Dev).json @@ -0,0 +1,1870 @@ +{ + "revision": 0, + "last_node_id": 123, + "last_link_id": 0, + "nodes": [ + { + "id": 123, + "type": "85066daf-feda-4c7b-bbc3-d4797e8ccf0f", + "pos": [ + -800, + 640 + ], + "size": [ + 400, + 0 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "turbo_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "115", + "text" + ], + [ + "113", + "width" + ], + [ + "113", + "height" + ], + [ + "122", + "unet_name" + ], + [ + "111", + "clip_name" + ], + [ + "108", + "vae_name" + ], + [ + "116", + "lora_name" + ], + [ + "121", + "value" + ], + [ + "114", + "noise_seed" + ], + [ + "114", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": { + "value": true, + "lora_name": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Image (Flux.2 Dev)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "85066daf-feda-4c7b-bbc3-d4797e8ccf0f", + "version": 1, + "state": { + "lastGroupId": 6, + "lastNodeId": 123, + "lastLinkId": 232, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Flux.2 Dev)", + "inputNode": { + "id": -10, + "bounding": [ + -1500, + 250, + 151.744140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1560, + -20, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "1f4f1091-3f97-41d8-8ed8-e8b02260cf3c", + "name": "text", + "type": "STRING", + "linkIds": [ + 206 + ], + "label": "prompt", + "pos": [ + -1368.255859375, + 270 + ] + }, + { + "id": "b9b59411-4f5f-4482-8f78-369e6d50e71c", + "name": "width", + "type": "INT", + "linkIds": [ + 222, + 231 + ], + "pos": [ + -1368.255859375, + 290 + ] + }, + { + "id": "c6de9a28-3bf6-40d0-be16-f75ec517a766", + "name": "height", + "type": "INT", + "linkIds": [ + 223, + 232 + ], + "pos": [ + -1368.255859375, + 310 + ] + }, + { + "id": "8f1b1c75-e47c-45f5-af57-74abcfe8967c", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 225 + ], + "pos": [ + -1368.255859375, + 330 + ] + }, + { + "id": "6ac27631-1bf0-4161-9670-a662f6180b94", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 226 + ], + "pos": [ + -1368.255859375, + 350 + ] + }, + { + "id": "932e6cbe-f716-4905-ae54-d2b3543497bd", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 227 + ], + "pos": [ + -1368.255859375, + 370 + ] + }, + { + "id": "37400048-5e7b-427b-8b79-ea35841d5306", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 228 + ], + "label": "turbo_lora", + "pos": [ + -1368.255859375, + 390 + ] + }, + { + "id": "333212d0-f027-476f-8b97-a921e20e340a", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 229 + ], + "label": "enable_turbo_mode", + "pos": [ + -1368.255859375, + 410 + ] + }, + { + "id": "e7e73fad-ce6e-48d5-b719-e2abed685185", + "name": "noise_seed", + "type": "INT", + "linkIds": [ + 230 + ], + "pos": [ + -1368.255859375, + 430 + ] + } + ], + "outputs": [ + { + "id": "ed3c0a0f-a39f-453e-907f-8249c8e3335d", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 9 + ], + "localized_name": "IMAGE", + "pos": [ + 1580, + 0 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 105, + "type": "BasicGuider", + "pos": [ + 570, + 170 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 210 + }, + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 165 + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 30 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "BasicGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 106, + "type": "FluxGuidance", + "pos": [ + -510, + 470 + ], + "size": [ + 320, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 41 + }, + { + "localized_name": "guidance", + "name": "guidance", + "type": "FLOAT", + "widget": { + "name": "guidance" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 165 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "FluxGuidance", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 4 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 107, + "type": "KSamplerSelect", + "pos": [ + 570, + 350 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 19 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 108, + "type": "VAELoader", + "pos": [ + -1000, + 460 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 227 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 159 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "full_encoder_small_decoder.safetensors", + "url": "https://huggingface.co/black-forest-labs/FLUX.2-small-decoder/resolve/main/full_encoder_small_decoder.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "full_encoder_small_decoder.safetensors" + ] + }, + { + "id": 109, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + -20 + ], + "size": [ + 280, + 330 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 37 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 30 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 19 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 132 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 161 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 24 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 110, + "type": "VAEDecode", + "pos": [ + 1220, + -20 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 24 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 159 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 9 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 111, + "type": "CLIPLoader", + "pos": [ + -1000, + 200 + ], + "size": [ + 300, + 150 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 226 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 117 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "mistral_3_small_flux2_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/text_encoders/mistral_3_small_flux2_bf16.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "mistral_3_small_flux2_bf16.safetensors", + "flux2", + "default" + ] + }, + { + "id": 112, + "type": "Flux2Scheduler", + "pos": [ + 570, + 550 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 213 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 231 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 232 + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 132 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "Flux2Scheduler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + 1024, + 1024 + ] + }, + { + "id": 113, + "type": "EmptyFlux2LatentImage", + "pos": [ + -980, + 660 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 222 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 223 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 161 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptyFlux2LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 114, + "type": "RandomNoise", + "pos": [ + 570, + -20 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": 230 + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 37 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1027111520328378, + "randomize" + ] + }, + { + "id": 115, + "type": "CLIPTextEncode", + "pos": [ + -630, + -40 + ], + "size": [ + 440, + 450 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 117 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 206 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 41 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 116, + "type": "LoraLoaderModelOnly", + "pos": [ + -150, + 220 + ], + "size": [ + 300, + 140 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 221 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 228 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 209 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.7.0", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "Flux_2-Turbo-LoRA_comfyui.safetensors", + "url": "https://huggingface.co/ByteZSzn/Flux.2-Turbo-ComfyUI/resolve/main/Flux_2-Turbo-LoRA_comfyui.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Flux_2-Turbo-LoRA_comfyui.safetensors", + 1 + ] + }, + { + "id": 117, + "type": "ComfySwitchNode", + "pos": [ + 220, + -30 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 208 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 209 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 215 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 210 + ] + } + ], + "title": "Switch(model)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 118, + "type": "PrimitiveInt", + "pos": [ + -140, + -30 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 211 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 119, + "type": "PrimitiveInt", + "pos": [ + -150, + 460 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 212 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 8, + "fixed" + ] + }, + { + "id": 120, + "type": "ComfySwitchNode", + "pos": [ + 220, + 260 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 211 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 212 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 214 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 213 + ] + } + ], + "title": "Switch(steps)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 121, + "type": "PrimitiveBoolean", + "pos": [ + -110, + 690 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 229 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 214, + 215 + ] + } + ], + "title": "Enable Turbo LoRA", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.15.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 122, + "type": "UNETLoader", + "pos": [ + -1000, + -30 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 225 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 208, + 221 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.71", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "flux2_dev_fp8mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/diffusion_models/flux2_dev_fp8mixed.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "flux2_dev_fp8mixed.safetensors", + "default" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Step 1 - Upload models", + "bounding": [ + -1040, + -110, + 380, + 710 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Custom sampler", + "bounding": [ + 540, + -110, + 640, + 870 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Step2 - Prompt", + "bounding": [ + -640, + -110, + 460, + 710 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Original", + "bounding": [ + -160, + -110, + 320, + 230 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "8 Steps LoRA", + "bounding": [ + -160, + 140, + 320, + 460 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 165, + "origin_id": 106, + "origin_slot": 0, + "target_id": 105, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 41, + "origin_id": 115, + "origin_slot": 0, + "target_id": 106, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 37, + "origin_id": 114, + "origin_slot": 0, + "target_id": 109, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 30, + "origin_id": 105, + "origin_slot": 0, + "target_id": 109, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 19, + "origin_id": 107, + "origin_slot": 0, + "target_id": 109, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 132, + "origin_id": 112, + "origin_slot": 0, + "target_id": 109, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 161, + "origin_id": 113, + "origin_slot": 0, + "target_id": 109, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 117, + "origin_id": 111, + "origin_slot": 0, + "target_id": 115, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 24, + "origin_id": 109, + "origin_slot": 0, + "target_id": 110, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 159, + "origin_id": 108, + "origin_slot": 0, + "target_id": 110, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 9, + "origin_id": 110, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 206, + "origin_id": -10, + "origin_slot": 0, + "target_id": 115, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 208, + "origin_id": 122, + "origin_slot": 0, + "target_id": 117, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 209, + "origin_id": 116, + "origin_slot": 0, + "target_id": 117, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 210, + "origin_id": 117, + "origin_slot": 0, + "target_id": 105, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 211, + "origin_id": 118, + "origin_slot": 0, + "target_id": 120, + "target_slot": 0, + "type": "INT" + }, + { + "id": 212, + "origin_id": 119, + "origin_slot": 0, + "target_id": 120, + "target_slot": 1, + "type": "INT" + }, + { + "id": 213, + "origin_id": 120, + "origin_slot": 0, + "target_id": 112, + "target_slot": 0, + "type": "INT" + }, + { + "id": 214, + "origin_id": 121, + "origin_slot": 0, + "target_id": 120, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 215, + "origin_id": 121, + "origin_slot": 0, + "target_id": 117, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 221, + "origin_id": 122, + "origin_slot": 0, + "target_id": 116, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 222, + "origin_id": -10, + "origin_slot": 1, + "target_id": 113, + "target_slot": 0, + "type": "INT" + }, + { + "id": 223, + "origin_id": -10, + "origin_slot": 2, + "target_id": 113, + "target_slot": 1, + "type": "INT" + }, + { + "id": 225, + "origin_id": -10, + "origin_slot": 3, + "target_id": 122, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 226, + "origin_id": -10, + "origin_slot": 4, + "target_id": 111, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 227, + "origin_id": -10, + "origin_slot": 5, + "target_id": 108, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 228, + "origin_id": -10, + "origin_slot": 6, + "target_id": 116, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 229, + "origin_id": -10, + "origin_slot": 7, + "target_id": 121, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 230, + "origin_id": -10, + "origin_slot": 8, + "target_id": 114, + "target_slot": 0, + "type": "INT" + }, + { + "id": 231, + "origin_id": -10, + "origin_slot": 1, + "target_id": 112, + "target_slot": 1, + "type": "INT" + }, + { + "id": 232, + "origin_id": -10, + "origin_slot": 2, + "target_id": 112, + "target_slot": 2, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from prompts using FLUX.2 [dev]: a newer 32B rectified-flow stack with distilled guidance plus a stronger long-context multimodal encoder for complex scenes, sharper typography/UI text, anatomy, lighting, and high-resolution latent decoding." + } + ] + }, + "extra": { + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (NetaYume Lumina).json b/blueprints/Text to Image (NetaYume Lumina).json new file mode 100644 index 000000000..9e11b7a86 --- /dev/null +++ b/blueprints/Text to Image (NetaYume Lumina).json @@ -0,0 +1,1470 @@ +{ + "revision": 0, + "last_node_id": 219, + "last_link_id": 0, + "nodes": [ + { + "id": 219, + "type": "fc9485c9-2acd-482e-94f1-b5fa702f2536", + "pos": [ + -1900, + 2330 + ], + "size": [ + 400, + 540 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "62", + "value" + ], + [ + "53", + "width" + ], + [ + "53", + "height" + ], + [ + "55", + "seed" + ], + [ + "56", + "ckpt_name" + ], + [ + "55", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "title": "Text to Image (NetaYume Lumina)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "fc9485c9-2acd-482e-94f1-b5fa702f2536", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 219, + "lastLinkId": 395, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (NetaYume Lumina)", + "inputNode": { + "id": -10, + "bounding": [ + -600, + 90, + 120, + 140 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1740.333330193419, + 286.3333328495138, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "b80a1e0c-e8a6-4c4f-8eb1-825cb7e4fdcf", + "name": "value", + "type": "STRING", + "linkIds": [ + 36 + ], + "pos": [ + -500, + 110 + ] + }, + { + "id": "6583bb32-7cff-4921-a771-1f0dcdf779e6", + "name": "width", + "type": "INT", + "linkIds": [ + 39 + ], + "pos": [ + -500, + 130 + ] + }, + { + "id": "c486937a-46c0-431b-8775-057897843cbd", + "name": "height", + "type": "INT", + "linkIds": [ + 40 + ], + "pos": [ + -500, + 150 + ] + }, + { + "id": "9c85c0cc-c906-405a-a4d9-43b93c47cb53", + "name": "seed", + "type": "INT", + "linkIds": [ + 42 + ], + "pos": [ + -500, + 170 + ] + }, + { + "id": "f7e288ec-fa1f-4a1d-b721-6b605de9cb51", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 43 + ], + "pos": [ + -500, + 190 + ] + } + ], + "outputs": [ + { + "id": "ea4b872b-a294-4cbf-99a9-70e55c0f8b3e", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 16 + ], + "localized_name": "IMAGE", + "pos": [ + 1760.333330193419, + 306.3333328495138 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 53, + "type": "EmptySD3LatentImage", + "pos": [ + -220, + 370 + ], + "size": [ + 320, + 170 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 39 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 40 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 17 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptySD3LatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 54, + "type": "ModelSamplingAuraFlow", + "pos": [ + 650, + 40 + ], + "size": [ + 310, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 12 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 13 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ModelSamplingAuraFlow" + }, + "widgets_values": [ + 4 + ] + }, + { + "id": 55, + "type": "KSampler", + "pos": [ + 650, + 200 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 13 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 32 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 23 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 17 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 42 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 14 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 0, + "randomize", + 30, + 4, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 56, + "type": "CheckpointLoaderSimple", + "pos": [ + -220, + 70 + ], + "size": [ + 320, + 160 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 43 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 12 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 1, + "links": [ + 22, + 35 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 2, + "links": [ + 8 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CheckpointLoaderSimple", + "models": [ + { + "name": "NetaYumev35_pretrained_all_in_one.safetensors", + "url": "https://huggingface.co/duongve/NetaYume-Lumina-Image-2.0/resolve/main/NetaYumev35_pretrained_all_in_one.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "NetaYumev35_pretrained_all_in_one.safetensors" + ] + }, + { + "id": 57, + "type": "a07fdf06-1bda-4dac-bdbd-63ee8ebca1c9", + "pos": [ + 180, + 360 + ], + "size": [ + 400, + 140 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 22 + }, + { + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 23 + ] + } + ], + "properties": { + "proxyWidgets": [ + [ + "218", + "value" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 217, + "type": "VAEDecode", + "pos": [ + 1040, + 210 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 14 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 8 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 16 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 59, + "type": "MarkdownNote", + "pos": [ + 640, + -390 + ], + "size": [ + 370, + 280 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Note: Prompt", + "properties": {}, + "widgets_values": [ + "Check the prompt book [here](https://nieta-art.feishu.cn/wiki/RY3GwpT59icIQlkWXEfcCqIMnQd)\n\nYou should keep the prefix part fixed until the **Prompt Start** tag\n\n@whatever in the prompt is for artist tags, such as @comfyanonymous\n\nYou can find more artist tags [here](https://gumgum10.github.io/gumgum.github.io/)\n" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 60, + "type": "StringConcatenate", + "pos": [ + 170, + -370 + ], + "size": [ + 400, + 250 + ], + "flags": { + "collapsed": true + }, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "string_a", + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 30 + }, + { + "localized_name": "string_b", + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 31 + }, + { + "localized_name": "delimiter", + "name": "delimiter", + "type": "STRING", + "widget": { + "name": "delimiter" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 34 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "StringConcatenate" + }, + "widgets_values": [ + "", + "", + "" + ] + }, + { + "id": 61, + "type": "CLIPTextEncode", + "pos": [ + 170, + 60 + ], + "size": [ + 430, + 190 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 35 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 34 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 32 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 62, + "type": "PrimitiveStringMultiline", + "pos": [ + -240, + -210 + ], + "size": [ + 370, + 140 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 36 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 31 + ] + } + ], + "title": "Prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveStringMultiline" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 63, + "type": "PrimitiveStringMultiline", + "pos": [ + -240, + -390 + ], + "size": [ + 370, + 140 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 30 + ] + } + ], + "title": "System prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveStringMultiline" + }, + "widgets_values": [ + "You are an assistant designed to generate high quality anime images based on textual prompts. " + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -250, + -30, + 370, + 280 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image Size", + "bounding": [ + -250, + 280, + 370, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + 150, + -30, + 460, + 600 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Prompt Builder", + "bounding": [ + -250, + -460, + 840, + 400 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 12, + "origin_id": 56, + "origin_slot": 0, + "target_id": 54, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 13, + "origin_id": 54, + "origin_slot": 0, + "target_id": 55, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 23, + "origin_id": 57, + "origin_slot": 0, + "target_id": 55, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 17, + "origin_id": 53, + "origin_slot": 0, + "target_id": 55, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 14, + "origin_id": 55, + "origin_slot": 0, + "target_id": 217, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 8, + "origin_id": 56, + "origin_slot": 2, + "target_id": 217, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 22, + "origin_id": 56, + "origin_slot": 1, + "target_id": 57, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 16, + "origin_id": 217, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 30, + "origin_id": 63, + "origin_slot": 0, + "target_id": 60, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 31, + "origin_id": 62, + "origin_slot": 0, + "target_id": 60, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 32, + "origin_id": 61, + "origin_slot": 0, + "target_id": 55, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 34, + "origin_id": 60, + "origin_slot": 0, + "target_id": 61, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 35, + "origin_id": 56, + "origin_slot": 1, + "target_id": 61, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 36, + "origin_id": -10, + "origin_slot": 0, + "target_id": 62, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 39, + "origin_id": -10, + "origin_slot": 1, + "target_id": 53, + "target_slot": 0, + "type": "INT" + }, + { + "id": 40, + "origin_id": -10, + "origin_slot": 2, + "target_id": 53, + "target_slot": 1, + "type": "INT" + }, + { + "id": 42, + "origin_id": -10, + "origin_slot": 3, + "target_id": 55, + "target_slot": 4, + "type": "INT" + }, + { + "id": 43, + "origin_id": -10, + "origin_slot": 4, + "target_id": 56, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using NetaYume Lumina, fine-tuned from Neta Lumina for anime-style and illustration generation." + }, + { + "id": "a07fdf06-1bda-4dac-bdbd-63ee8ebca1c9", + "version": 1, + "state": { + "lastGroupId": 8, + "lastNodeId": 219, + "lastLinkId": 395, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "CLIP Text Encode (Negative Prompt)", + "inputNode": { + "id": -10, + "bounding": [ + -150, + 675, + 120, + 80 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 905.2780151367188, + 675, + 128.6640625, + 60 + ] + }, + "inputs": [ + { + "id": "47264a97-6fc9-454d-920f-b8a43fee0489", + "name": "clip", + "type": "CLIP", + "linkIds": [ + 5 + ], + "localized_name": "clip", + "pos": [ + -50, + 695 + ] + }, + { + "id": "7cdb7919-1dad-4bd2-928d-c543c3fd712e", + "name": "value", + "type": "STRING", + "linkIds": [ + 22 + ], + "pos": [ + -50, + 715 + ] + } + ], + "outputs": [ + { + "id": "c3f17ad9-6954-4333-bf8e-e1cf886c351b", + "name": "CONDITIONING", + "type": "CONDITIONING", + "linkIds": [ + 6 + ], + "localized_name": "CONDITIONING", + "pos": [ + 925.2780151367188, + 695 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 64, + "type": "StringConcatenate", + "pos": [ + 420, + 720 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "string_a", + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 19 + }, + { + "localized_name": "string_b", + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 20 + }, + { + "localized_name": "delimiter", + "name": "delimiter", + "type": "STRING", + "widget": { + "name": "delimiter" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 21 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "StringConcatenate" + }, + "widgets_values": [ + "", + "", + "" + ] + }, + { + "id": 65, + "type": "PrimitiveStringMultiline", + "pos": [ + 30, + 720 + ], + "size": [ + 370, + 130 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 19 + ] + } + ], + "title": "System prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveStringMultiline" + }, + "widgets_values": [ + "You are an assistant designed to generate low-quality images based on textual prompts " + ] + }, + { + "id": 218, + "type": "PrimitiveStringMultiline", + "pos": [ + 30, + 900 + ], + "size": [ + 370, + 130 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 22 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 20 + ] + } + ], + "title": "System prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.70", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveStringMultiline" + }, + "widgets_values": [ + "blurry, worst quality, low quality, jpeg artifacts, signature, watermark, username, error, deformed hands, bad anatomy, extra limbs, poorly drawn hands, poorly drawn face, mutation, deformed, extra eyes, extra arms, extra legs, malformed limbs, fused fingers, too many fingers, long neck, cross-eyed, bad proportions, missing arms, missing legs, extra digit, fewer digits, cropped" + ] + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + 420, + 410 + ], + "size": [ + 430, + 190 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 5 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 21 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 6 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "You are an assistant designed to generate low-quality images based on textual prompts blurry, worst quality, low quality, jpeg artifacts, signature, watermark, username, error, deformed hands, bad anatomy, extra limbs, poorly drawn hands, poorly drawn face, mutation, deformed, extra eyes, extra arms, extra legs, malformed limbs, fused fingers, too many fingers, long neck, cross-eyed, bad proportions, missing arms, missing legs, extra digit, fewer digits, cropped" + ], + "color": "#223", + "bgcolor": "#335" + } + ], + "groups": [], + "links": [ + { + "id": 19, + "origin_id": 65, + "origin_slot": 0, + "target_id": 64, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 20, + "origin_id": 218, + "origin_slot": 0, + "target_id": 64, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 21, + "origin_id": 64, + "origin_slot": 0, + "target_id": 67, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 5, + "origin_id": -10, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 6, + "origin_id": 67, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 22, + "origin_id": -10, + "origin_slot": 1, + "target_id": 218, + "target_slot": 0, + "type": "STRING" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "description": "Encodes a negative text prompt via CLIP for classifier-free guidance in anime-style generation (NetaYume Lumina)." + } + ] + }, + "extra": { + "ue_links": [] + } +} diff --git a/blueprints/Text to Image (Qwen-Image 2512).json b/blueprints/Text to Image (Qwen-Image 2512).json new file mode 100644 index 000000000..09612be8b --- /dev/null +++ b/blueprints/Text to Image (Qwen-Image 2512).json @@ -0,0 +1,1952 @@ +{ + "revision": 0, + "last_node_id": 263, + "last_link_id": 0, + "nodes": [ + { + "id": 263, + "type": "fd6ee5f8-a0a9-487a-8b44-8cb65957532a", + "pos": [ + 750, + 760 + ], + "size": [ + 400, + 0 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "249", + "text" + ], + [ + "252", + "width" + ], + [ + "252", + "height" + ], + [ + "256", + "value" + ], + [ + "253", + "seed" + ], + [ + "248", + "unet_name" + ], + [ + "245", + "clip_name" + ], + [ + "246", + "vae_name" + ], + [ + "259", + "lora_name" + ] + ], + "ue_properties": { + "widget_ue_connectable": { + "value": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.4", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Image (Qwen-Image 2512)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "fd6ee5f8-a0a9-487a-8b44-8cb65957532a", + "version": 1, + "state": { + "lastGroupId": 7, + "lastNodeId": 263, + "lastLinkId": 375, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Qwen-Image 2512)", + "inputNode": { + "id": -10, + "bounding": [ + -1080, + 1480, + 151.744140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1550, + 1460, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "74d26021-a723-4a90-8e33-5d805a7e5deb", + "name": "text", + "type": "STRING", + "linkIds": [ + 360 + ], + "pos": [ + -948.255859375, + 1500 + ] + }, + { + "id": "b55f69e6-c7cb-4641-9e1f-2cb1c1942ed0", + "name": "width", + "type": "INT", + "linkIds": [ + 361 + ], + "pos": [ + -948.255859375, + 1520 + ] + }, + { + "id": "3e80284d-aba3-43cd-ab7b-ac2a619ef18c", + "name": "height", + "type": "INT", + "linkIds": [ + 362 + ], + "pos": [ + -948.255859375, + 1540 + ] + }, + { + "id": "de06e137-6cec-4cb3-a6bb-737022310a7b", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 370 + ], + "label": "enable_turbo_mode", + "pos": [ + -948.255859375, + 1560 + ] + }, + { + "id": "9e500dee-a5b9-481b-ac46-64bab4bd3530", + "name": "seed", + "type": "INT", + "linkIds": [ + 371 + ], + "pos": [ + -948.255859375, + 1580 + ] + }, + { + "id": "33422b12-24e5-41c6-96fc-f9a8dadd5d94", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 372 + ], + "pos": [ + -948.255859375, + 1600 + ] + }, + { + "id": "5cf753e4-236e-468e-9a06-6b8e238badc8", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 373 + ], + "pos": [ + -948.255859375, + 1620 + ] + }, + { + "id": "790e775c-a639-4e5f-9007-e2ee6764dc5e", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 374 + ], + "pos": [ + -948.255859375, + 1640 + ] + }, + { + "id": "3ebed521-3fe9-4922-ae26-2483e03d9305", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 375 + ], + "pos": [ + -948.255859375, + 1660 + ] + } + ], + "outputs": [ + { + "id": "7db1f9e2-40ee-4f9f-bb24-a0db7b96d45e", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 333 + ], + "localized_name": "IMAGE", + "pos": [ + 1570, + 1480 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 245, + "type": "CLIPLoader", + "pos": [ + -590, + 1370 + ], + "size": [ + 280, + 150 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 373 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 314, + 315 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "CLIPLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + }, + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 246, + "type": "VAELoader", + "pos": [ + -580, + 1620 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 374 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 323 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "VAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + }, + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 247, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1040, + 1110 + ], + "size": [ + 250, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 367 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 316 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3.1000000000000005 + ] + }, + { + "id": 248, + "type": "UNETLoader", + "pos": [ + -590, + 1140 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 372 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 312, + 324 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "UNETLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "qwen_image_2512_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_2512_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + }, + { + "name": "qwen_image_2512_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_2512_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "qwen_image_2512_fp8_e4m3fn.safetensors", + "default" + ] + }, + { + "id": 249, + "type": "CLIPTextEncode", + "pos": [ + -200, + 1140 + ], + "size": [ + 360, + 420 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 314 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 360 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 317 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 250, + "type": "CLIPTextEncode", + "pos": [ + -200, + 1610 + ], + "size": [ + 370, + 170 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 315 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 318 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "低分辨率,低画质,肢体畸形,手指畸形,画面过饱和,蜡像感,人脸无细节,过度光滑,画面具有AI感。构图混乱。文字模糊,扭曲" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 251, + "type": "VAEDecode", + "pos": [ + 1320, + 1120 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 322 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 323 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 333 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 252, + "type": "EmptySD3LatentImage", + "pos": [ + -550, + 1930 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 361 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 362 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 319 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1328, + 1328, + 1 + ] + }, + { + "id": 253, + "type": "KSampler", + "pos": [ + 1040, + 1250 + ], + "size": [ + 250, + 350 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 316 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 317 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 318 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 319 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 371 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 368 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 369 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 322 + ] + } + ], + "properties": { + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.48", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 464857551335368, + "randomize", + 50, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 254, + "type": "PrimitiveInt", + "pos": [ + 300, + 1150 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 355 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 50, + "fixed" + ] + }, + { + "id": 255, + "type": "PrimitiveFloat", + "pos": [ + 300, + 1290 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 357 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 4 + ] + }, + { + "id": 256, + "type": "PrimitiveBoolean", + "pos": [ + 300, + 2060 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 370 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 326, + 358, + 359 + ] + } + ], + "title": "Enable 4 Steps LoRA?", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 257, + "type": "PrimitiveInt", + "pos": [ + 290, + 1540 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 347, + 354 + ] + } + ], + "title": "Int (Steps)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 4, + "fixed" + ] + }, + { + "id": 258, + "type": "PrimitiveFloat", + "pos": [ + 290, + 1670 + ], + "size": [ + 230, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 356 + ] + } + ], + "title": "Float (CFG)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "PrimitiveFloat", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 259, + "type": "LoraLoaderModelOnly", + "pos": [ + 240, + 1820 + ], + "size": [ + 330, + 140 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 312 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 375 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 325 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.49", + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors", + "url": "https://huggingface.co/lightx2v/Qwen-Image-2512-Lightning/resolve/main/Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors", + 1 + ] + }, + { + "id": 260, + "type": "ComfySwitchNode", + "pos": [ + 710, + 1170 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 324 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 325 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 326 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 367 + ] + } + ], + "title": "Switch (model)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 261, + "type": "ComfySwitchNode", + "pos": [ + 710, + 1420 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 355 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 354 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 359 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 368 + ] + } + ], + "title": "Switch (steps)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + }, + { + "id": 262, + "type": "ComfySwitchNode", + "pos": [ + 710, + 1660 + ], + "size": [ + 230, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 357 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 356 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 358 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 369 + ] + } + ], + "title": "Switch (cfg)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.12.3", + "Node name for S&R": "ComfySwitchNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + false + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + -640, + 1060, + 390, + 740 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Image size", + "bounding": [ + -630, + 1830, + 380, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + -220, + 1060, + 400, + 740 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "4-steps LoRA", + "bounding": [ + 210, + 1460, + 410, + 550 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Original Settings", + "bounding": [ + 210, + 1060, + 420, + 370 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Swtich", + "bounding": [ + 660, + 1060, + 320, + 750 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 312, + "origin_id": 248, + "origin_slot": 0, + "target_id": 259, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 314, + "origin_id": 245, + "origin_slot": 0, + "target_id": 249, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 315, + "origin_id": 245, + "origin_slot": 0, + "target_id": 250, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 322, + "origin_id": 253, + "origin_slot": 0, + "target_id": 251, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 323, + "origin_id": 246, + "origin_slot": 0, + "target_id": 251, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 316, + "origin_id": 247, + "origin_slot": 0, + "target_id": 253, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 317, + "origin_id": 249, + "origin_slot": 0, + "target_id": 253, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 318, + "origin_id": 250, + "origin_slot": 0, + "target_id": 253, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 319, + "origin_id": 252, + "origin_slot": 0, + "target_id": 253, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 324, + "origin_id": 248, + "origin_slot": 0, + "target_id": 260, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 325, + "origin_id": 259, + "origin_slot": 0, + "target_id": 260, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 326, + "origin_id": 256, + "origin_slot": 0, + "target_id": 260, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 333, + "origin_id": 251, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 347, + "origin_id": 257, + "origin_slot": 0, + "target_id": 253, + "target_slot": 4, + "type": "INT" + }, + { + "id": 354, + "origin_id": 257, + "origin_slot": 0, + "target_id": 261, + "target_slot": 1, + "type": "INT" + }, + { + "id": 355, + "origin_id": 254, + "origin_slot": 0, + "target_id": 261, + "target_slot": 0, + "type": "INT" + }, + { + "id": 356, + "origin_id": 258, + "origin_slot": 0, + "target_id": 262, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 357, + "origin_id": 255, + "origin_slot": 0, + "target_id": 262, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 358, + "origin_id": 256, + "origin_slot": 0, + "target_id": 262, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 359, + "origin_id": 256, + "origin_slot": 0, + "target_id": 261, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 360, + "origin_id": -10, + "origin_slot": 0, + "target_id": 249, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 361, + "origin_id": -10, + "origin_slot": 1, + "target_id": 252, + "target_slot": 0, + "type": "INT" + }, + { + "id": 362, + "origin_id": -10, + "origin_slot": 2, + "target_id": 252, + "target_slot": 1, + "type": "INT" + }, + { + "id": 367, + "origin_id": 260, + "origin_slot": 0, + "target_id": 247, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 368, + "origin_id": 261, + "origin_slot": 0, + "target_id": 253, + "target_slot": 5, + "type": "INT" + }, + { + "id": 369, + "origin_id": 262, + "origin_slot": 0, + "target_id": 253, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 370, + "origin_id": -10, + "origin_slot": 3, + "target_id": 256, + "target_slot": 0, + "type": "BOOLEAN" + }, + { + "id": 371, + "origin_id": -10, + "origin_slot": 4, + "target_id": 253, + "target_slot": 4, + "type": "INT" + }, + { + "id": 372, + "origin_id": -10, + "origin_slot": 5, + "target_id": 248, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 373, + "origin_id": -10, + "origin_slot": 6, + "target_id": 245, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 374, + "origin_id": -10, + "origin_slot": 7, + "target_id": 246, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 375, + "origin_id": -10, + "origin_slot": 8, + "target_id": 259, + "target_slot": 1, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "Vue-corrected" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using Qwen-Image-2512, with enhanced human realism and finer natural detail over the base version." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Text to Image (Qwen-Image).json b/blueprints/Text to Image (Qwen-Image).json new file mode 100644 index 000000000..e78d5a962 --- /dev/null +++ b/blueprints/Text to Image (Qwen-Image).json @@ -0,0 +1,1882 @@ +{ + "revision": 0, + "last_node_id": 76, + "last_link_id": 0, + "nodes": [ + { + "id": 76, + "type": "e5cfe5ba-2ae0-4bc4-869f-ab2228cb44d3", + "pos": [ + 30, + 10 + ], + "size": [ + 470, + 660 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "label": "lightning_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "label": "enable_turbo_mode", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "6", + "text" + ], + [ + "58", + "width" + ], + [ + "58", + "height" + ], + [ + "3", + "seed" + ], + [ + "37", + "unet_name" + ], + [ + "38", + "clip_name" + ], + [ + "39", + "vae_name" + ], + [ + "73", + "lora_name" + ], + [ + "86", + "value" + ], + [ + "3", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": { + "text": true, + "lora_name": true, + "value": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [], + "title": "Text to Image (Qwen-Image)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "e5cfe5ba-2ae0-4bc4-869f-ab2228cb44d3", + "version": 1, + "state": { + "lastGroupId": 5, + "lastNodeId": 87, + "lastLinkId": 153, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Qwen-Image)", + "inputNode": { + "id": -10, + "bounding": [ + -810, + 290, + 151.744140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 2580, + 340, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "846fd1a5-9f4a-4e83-af40-27cafe99e5c6", + "name": "text", + "type": "STRING", + "linkIds": [ + 132 + ], + "label": "prompt", + "pos": [ + -678.255859375, + 310 + ] + }, + { + "id": "e941d29f-bb7f-4001-a956-90a9b29ae9f9", + "name": "width", + "type": "INT", + "linkIds": [ + 134 + ], + "pos": [ + -678.255859375, + 330 + ] + }, + { + "id": "df798f50-87ba-481b-b847-ca8b7c7efff3", + "name": "height", + "type": "INT", + "linkIds": [ + 135 + ], + "pos": [ + -678.255859375, + 350 + ] + }, + { + "id": "3fcf7667-f697-43ee-bdee-0d3fed39e777", + "name": "seed", + "type": "INT", + "linkIds": [ + 136 + ], + "pos": [ + -678.255859375, + 370 + ] + }, + { + "id": "e8d70f26-d9f5-4633-a39e-0bf6cf93d566", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 137 + ], + "pos": [ + -678.255859375, + 390 + ] + }, + { + "id": "8c9b537a-c6c9-4365-96ad-dbbb82d917e0", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 138 + ], + "pos": [ + -678.255859375, + 410 + ] + }, + { + "id": "7cc2f92b-6e2f-4e4e-a316-b61f58ed1442", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 139 + ], + "pos": [ + -678.255859375, + 430 + ] + }, + { + "id": "3cb1ba7c-583c-4f92-afc1-71463161e2a4", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 140 + ], + "label": "lightning_lora", + "pos": [ + -678.255859375, + 450 + ] + }, + { + "id": "4278102d-766c-4c6b-af2e-0fb9f26bbb27", + "name": "value", + "type": "BOOLEAN", + "linkIds": [ + 153 + ], + "label": "enable_turbo_mode", + "pos": [ + -678.255859375, + 470 + ] + } + ], + "outputs": [ + { + "id": "2af20250-dc7a-4643-bc84-0a180d9ca62b", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 110 + ], + "localized_name": "IMAGE", + "pos": [ + 2600, + 360 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 39, + "type": "VAELoader", + "pos": [ + -260, + 510 + ], + "size": [ + 330, + 110 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 139 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "qwen_image_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_vae.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -260, + 280 + ], + "size": [ + 330, + 150 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 138 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_2.5_vl_7b_fp8_scaled.safetensors", + "qwen_image", + "default" + ] + }, + { + "id": 58, + "type": "EmptySD3LatentImage", + "pos": [ + -240, + 810 + ], + "size": [ + 270, + 170 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 134 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 135 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 107 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1328, + 1328, + 1 + ] + }, + { + "id": 66, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1780, + 180 + ], + "size": [ + 300, + 110 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 147 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 125 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3.1000000000000005 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -260, + 80 + ], + "size": [ + 330, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 137 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 129, + 142 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "qwen_image_fp8_e4m3fn.safetensors", + "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_image_fp8_e4m3fn.safetensors", + "default" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 120, + 60 + ], + "size": [ + 440, + 340 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 74 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 132 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 130, + 480 + ], + "size": [ + 430, + 180 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 75 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 2190, + 350 + ], + "size": [ + 230, + 100 + ], + "flags": { + "collapsed": false + }, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 128 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 73, + "type": "LoraLoaderModelOnly", + "pos": [ + 670, + 500 + ], + "size": [ + 400, + 140 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 129 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 140 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 141 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.49", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "Qwen-Image-Lightning-8steps-V1.0.safetensors", + "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V1.0.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "Qwen-Image-Lightning-8steps-V1.0.safetensors", + 1 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 1780, + 330 + ], + "size": [ + 300, + 480 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 107 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 136 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 148 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 149 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 128 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.48", + "ue_properties": { + "version": "7.7", + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 50347169638278, + "randomize", + 8, + 1, + "euler", + "simple", + 1 + ] + }, + { + "id": 78, + "type": "ComfySwitchNode", + "pos": [ + 1320, + 180 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 142 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 141 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 150 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 147 + ] + } + ], + "title": "Switch (Model)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 79, + "type": "PrimitiveInt", + "pos": [ + 680, + 710 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 143 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 8, + "fixed" + ] + }, + { + "id": 81, + "type": "PrimitiveFloat", + "pos": [ + 680, + 870 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 144 + ] + } + ], + "title": "CFG", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 82, + "type": "ComfySwitchNode", + "pos": [ + 1320, + 400 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 146 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 143 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 151 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 148 + ] + } + ], + "title": "Switch (Steps)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 83, + "type": "ComfySwitchNode", + "pos": [ + 1320, + 600 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "on_false", + "name": "on_false", + "type": "*", + "link": 145 + }, + { + "localized_name": "on_true", + "name": "on_true", + "type": "*", + "link": 144 + }, + { + "localized_name": "switch", + "name": "switch", + "type": "BOOLEAN", + "widget": { + "name": "switch" + }, + "link": 152 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "*", + "links": [ + 149 + ] + } + ], + "title": "Switch (CFG)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ComfySwitchNode" + }, + "widgets_values": [ + false + ] + }, + { + "id": 84, + "type": "PrimitiveInt", + "pos": [ + 680, + 60 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 146 + ] + } + ], + "title": "Steps", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 20, + "fixed" + ] + }, + { + "id": 85, + "type": "PrimitiveFloat", + "pos": [ + 680, + 230 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 145 + ] + } + ], + "title": "CFG", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 4 + ] + }, + { + "id": 86, + "type": "PrimitiveBoolean", + "pos": [ + 710, + 1070 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": 153 + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 150, + 151, + 152 + ] + } + ], + "title": "Enable Lightning LoRA", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "PrimitiveBoolean" + }, + "widgets_values": [ + false + ] + }, + { + "id": 87, + "type": "MarkdownNote", + "pos": [ + 620, + -160 + ], + "size": [ + 500, + 120 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + }, + "widgets_values": [ + "Try 50 steps, if you want original the [qwen image](https://huggingface.co/Qwen/Qwen-Image)'s setting, but it will takes longer" + ], + "color": "#222", + "bgcolor": "#000" + } + ], + "groups": [ + { + "id": 1, + "title": "Step1 - Load models", + "bounding": [ + -280, + -20, + 360, + 700 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Step2 - Image size", + "bounding": [ + -280, + 710, + 360, + 300 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Step3 - Prompt", + "bounding": [ + 110, + -20, + 470, + 700 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Lightx2v 8steps LoRA", + "bounding": [ + 610, + 390, + 520, + 620 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 5, + "title": "Original Settings", + "bounding": [ + 610, + -20, + 520, + 380 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 74, + "origin_id": 38, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 75, + "origin_id": 38, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 129, + "origin_id": 37, + "origin_slot": 0, + "target_id": 73, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 128, + "origin_id": 3, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 76, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 125, + "origin_id": 66, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 46, + "origin_id": 6, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 52, + "origin_id": 7, + "origin_slot": 0, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 107, + "origin_id": 58, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 110, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 132, + "origin_id": -10, + "origin_slot": 0, + "target_id": 6, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 134, + "origin_id": -10, + "origin_slot": 1, + "target_id": 58, + "target_slot": 0, + "type": "INT" + }, + { + "id": 135, + "origin_id": -10, + "origin_slot": 2, + "target_id": 58, + "target_slot": 1, + "type": "INT" + }, + { + "id": 136, + "origin_id": -10, + "origin_slot": 3, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 137, + "origin_id": -10, + "origin_slot": 4, + "target_id": 37, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 138, + "origin_id": -10, + "origin_slot": 5, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 139, + "origin_id": -10, + "origin_slot": 6, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 140, + "origin_id": -10, + "origin_slot": 7, + "target_id": 73, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 141, + "origin_id": 73, + "origin_slot": 0, + "target_id": 78, + "target_slot": 1, + "type": "MODEL" + }, + { + "id": 142, + "origin_id": 37, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 143, + "origin_id": 79, + "origin_slot": 0, + "target_id": 82, + "target_slot": 1, + "type": "INT" + }, + { + "id": 144, + "origin_id": 81, + "origin_slot": 0, + "target_id": 83, + "target_slot": 1, + "type": "FLOAT" + }, + { + "id": 145, + "origin_id": 85, + "origin_slot": 0, + "target_id": 83, + "target_slot": 0, + "type": "FLOAT" + }, + { + "id": 146, + "origin_id": 84, + "origin_slot": 0, + "target_id": 82, + "target_slot": 0, + "type": "INT" + }, + { + "id": 147, + "origin_id": 78, + "origin_slot": 0, + "target_id": 66, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 148, + "origin_id": 82, + "origin_slot": 0, + "target_id": 3, + "target_slot": 5, + "type": "INT" + }, + { + "id": 149, + "origin_id": 83, + "origin_slot": 0, + "target_id": 3, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 150, + "origin_id": 86, + "origin_slot": 0, + "target_id": 78, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 151, + "origin_id": 86, + "origin_slot": 0, + "target_id": 82, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 152, + "origin_id": 86, + "origin_slot": 0, + "target_id": 83, + "target_slot": 2, + "type": "BOOLEAN" + }, + { + "id": 153, + "origin_id": -10, + "origin_slot": 8, + "target_id": 86, + "target_slot": 0, + "type": "BOOLEAN" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using Qwen-Image, Alibaba's 20B MMDiT model with excellent multilingual text rendering." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Text to Image (Z-Image-Base).json b/blueprints/Text to Image (Z-Image-Base).json new file mode 100644 index 000000000..169263712 --- /dev/null +++ b/blueprints/Text to Image (Z-Image-Base).json @@ -0,0 +1,1184 @@ +{ + "revision": 0, + "last_node_id": 126, + "last_link_id": 0, + "nodes": [ + { + "id": 126, + "type": "8a2bb267-5858-4aaf-bdcd-61002711af19", + "pos": [ + -2280, + 2850 + ], + "size": [ + 410, + 560 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "67", + "text" + ], + [ + "68", + "width" + ], + [ + "68", + "height" + ], + [ + "69", + "steps" + ], + [ + "69", + "cfg" + ], + [ + "69", + "seed" + ], + [ + "66", + "unet_name" + ], + [ + "62", + "clip_name" + ], + [ + "63", + "vae_name" + ], + [ + "69", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Image (Z-Image-Base)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "8a2bb267-5858-4aaf-bdcd-61002711af19", + "version": 1, + "state": { + "lastGroupId": 16, + "lastNodeId": 126, + "lastLinkId": 229, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Z-Image-Base)", + "description": "Generates images from text prompts using Z-Image base weights with Qwen3 text encoder and bundled VAE.", + "inputNode": { + "id": -10, + "bounding": [ + -220, + 40, + 120, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1840, + -150, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "af36fee5-4f8b-4a8e-bfa8-cb8fe7006cc3", + "name": "text", + "type": "STRING", + "linkIds": [ + 108 + ], + "label": "prompt", + "pos": [ + -120, + 60 + ] + }, + { + "id": "357f0059-e8e6-41f6-a290-c53b0a60c0ed", + "name": "width", + "type": "INT", + "linkIds": [ + 114 + ], + "pos": [ + -120, + 80 + ] + }, + { + "id": "4a442743-a9c2-4aa5-9efd-05d43f3322d3", + "name": "height", + "type": "INT", + "linkIds": [ + 115 + ], + "pos": [ + -120, + 100 + ] + }, + { + "id": "a0fc336b-d349-418e-8415-318653f7b6b3", + "name": "steps", + "type": "INT", + "linkIds": [ + 116 + ], + "pos": [ + -120, + 120 + ] + }, + { + "id": "2f253ace-1e1a-415f-9b95-a10430bd5749", + "name": "cfg", + "type": "FLOAT", + "linkIds": [ + 117 + ], + "pos": [ + -120, + 140 + ] + }, + { + "id": "18a6ad37-23aa-4bf7-a0cd-1d6ca6e2a128", + "name": "seed", + "type": "INT", + "linkIds": [ + 118 + ], + "pos": [ + -120, + 160 + ] + }, + { + "id": "d1fc4937-8505-4ec6-9fc4-a33ef7b45eee", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 119 + ], + "pos": [ + -120, + 180 + ] + }, + { + "id": "db45dd49-d990-4ceb-a849-f96341874cdd", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 120 + ], + "pos": [ + -120, + 200 + ] + }, + { + "id": "37b8eac6-9b1b-452b-81f3-0ba9e34a576a", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 121 + ], + "pos": [ + -120, + 220 + ] + } + ], + "outputs": [ + { + "id": "f2bea309-bfe7-4ccb-9ffe-9475bf1da2ae", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 79 + ], + "localized_name": "IMAGE", + "pos": [ + 1860, + -130 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + 600, + -90 + ], + "size": [ + 410, + 320 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 78 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 108 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 75 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 68, + "type": "EmptySD3LatentImage", + "pos": [ + 240, + 620 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 114 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 115 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 77 + ] + } + ], + "properties": { + "Node name for S&R": "EmptySD3LatentImage", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 63, + "type": "VAELoader", + "pos": [ + 230, + 340 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 121 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 73 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 62, + "type": "CLIPLoader", + "pos": [ + 230, + 110 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 120 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 78, + 82 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 1450, + -150 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 72 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 73 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 79 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 70, + "type": "ModelSamplingAuraFlow", + "pos": [ + 1100, + -150 + ], + "size": [ + 310, + 110 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 109 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 74 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingAuraFlow", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 66, + "type": "UNETLoader", + "pos": [ + 230, + -90 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 119 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 109 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "z_image_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image/resolve/main/split_files/diffusion_models/z_image_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_bf16.safetensors", + "default" + ] + }, + { + "id": 71, + "type": "CLIPTextEncode", + "pos": [ + 600, + 310 + ], + "size": [ + 390, + 140 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 82 + }, + { + "label": "prompt", + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 83 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 69, + "type": "KSampler", + "pos": [ + 1100, + 10 + ], + "size": [ + 310, + 440 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 74 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 75 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 83 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 77 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 118 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 116 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 117 + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 72 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 25, + 4, + "res_multistep", + "simple", + 1 + ] + }, + { + "id": 87, + "type": "MarkdownNote", + "pos": [ + 1110, + -360 + ], + "size": [ + 300, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "- Steps: 30~50\n- cfg: 3~5" + ], + "color": "#222", + "bgcolor": "#000", + "title": "Original Settings" + } + ], + "groups": [ + { + "id": 2, + "title": "Step2 - Image size", + "bounding": [ + 200, + 530, + 330, + 287.9999544955691 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 3, + "title": "Step3 - Prompt", + "bounding": [ + 570, + -200, + 470, + 700 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 4, + "title": "Step1 - Load models", + "bounding": [ + 200, + -200, + 330, + 700 + ], + "color": "#3f789e", + "flags": {} + } + ], + "links": [ + { + "id": 78, + "origin_id": 62, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 74, + "origin_id": 70, + "origin_slot": 0, + "target_id": 69, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 75, + "origin_id": 67, + "origin_slot": 0, + "target_id": 69, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 83, + "origin_id": 71, + "origin_slot": 0, + "target_id": 69, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 77, + "origin_id": 68, + "origin_slot": 0, + "target_id": 69, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 82, + "origin_id": 62, + "origin_slot": 0, + "target_id": 71, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 72, + "origin_id": 69, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 73, + "origin_id": 63, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 79, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 108, + "origin_id": -10, + "origin_slot": 0, + "target_id": 67, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 109, + "origin_id": 66, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 114, + "origin_id": -10, + "origin_slot": 1, + "target_id": 68, + "target_slot": 0, + "type": "INT" + }, + { + "id": 115, + "origin_id": -10, + "origin_slot": 2, + "target_id": 68, + "target_slot": 1, + "type": "INT" + }, + { + "id": 116, + "origin_id": -10, + "origin_slot": 3, + "target_id": 69, + "target_slot": 5, + "type": "INT" + }, + { + "id": 117, + "origin_id": -10, + "origin_slot": 4, + "target_id": 69, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 118, + "origin_id": -10, + "origin_slot": 5, + "target_id": 69, + "target_slot": 4, + "type": "INT" + }, + { + "id": 119, + "origin_id": -10, + "origin_slot": 6, + "target_id": 66, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 120, + "origin_id": -10, + "origin_slot": 7, + "target_id": 62, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 121, + "origin_id": -10, + "origin_slot": 8, + "target_id": 63, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image" + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Text to Image (Z-Image-Turbo).json b/blueprints/Text to Image (Z-Image-Turbo).json index ce25ce1df..2501486fa 100644 --- a/blueprints/Text to Image (Z-Image-Turbo).json +++ b/blueprints/Text to Image (Z-Image-Turbo).json @@ -1 +1,1112 @@ -{"id": "1c3eaa76-5cfa-4dc7-8571-97a570324e01", "revision": 0, "last_node_id": 34, "last_link_id": 40, "nodes": [{"id": 5, "type": "dfe9eb32-97c0-43a5-90d5-4fd37768d91b", "pos": [-2.5766491043910378e-05, 1229.999928629805], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "width"], ["-1", "height"], ["3", "seed"], ["3", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.3.73", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1024, 1024, null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "dfe9eb32-97c0-43a5-90d5-4fd37768d91b", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 34, "lastLinkId": 40, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Text to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [-80, 425, 120, 160]}, "outputNode": {"id": -20, "bounding": [1490, 415, 120, 60]}, "inputs": [{"id": "fb178669-e742-4a53-8a69-7df59834dfd8", "name": "text", "type": "STRING", "linkIds": [34], "label": "prompt", "pos": [20, 445]}, {"id": "dd780b3c-23e9-46ff-8469-156008f42e5a", "name": "width", "type": "INT", "linkIds": [35], "pos": [20, 465]}, {"id": "7b08d546-6bb0-4ef9-82e9-ffae5e1ee6bc", "name": "height", "type": "INT", "linkIds": [36], "pos": [20, 485]}, {"id": "23087d15-8412-4fbd-b71e-9b6d7ef76de1", "name": "unet_name", "type": "COMBO", "linkIds": [38], "pos": [20, 505]}, {"id": "0677f5c3-2a3f-43d4-98ac-a4c56d5efdc0", "name": "clip_name", "type": "COMBO", "linkIds": [39], "pos": [20, 525]}, {"id": "c85c0445-2641-48b1-bbca-95057edf2fcf", "name": "vae_name", "type": "COMBO", "linkIds": [40], "pos": [20, 545]}], "outputs": [{"id": "1fa72a21-ce00-4952-814e-1f2ffbe87d1d", "name": "IMAGE", "type": "IMAGE", "linkIds": [16], "localized_name": "IMAGE", "pos": [1510, 435]}], "widgets": [], "nodes": [{"id": 30, "type": "CLIPLoader", "pos": [109.99997264844609, 329.99999029608756], "size": [269.9869791666667, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 39}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 29, "type": "VAELoader", "pos": [109.99997264844609, 479.9999847172637], "size": [269.9869791666667, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 40}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 33, "type": "ConditioningZeroOut", "pos": [639.9999103333332, 620.0000271257795], "size": [204.134765625, 26], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 32}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [1219.9999088104782, 160.00009184959066], "size": [209.98697916666669, 46], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 14}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 27}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [16]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 28, "type": "UNETLoader", "pos": [109.99997264844609, 200.0000502647102], "size": [269.9869791666667, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 38}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [26]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 27, "type": "CLIPTextEncode", "pos": [429.99997828947767, 200.0000502647102], "size": [409.9869791666667, 319.9869791666667], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 28}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 34}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [30, 32]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""]}, {"id": 13, "type": "EmptySD3LatentImage", "pos": [109.99997264844609, 629.9999791384399], "size": [259.9869791666667, 106], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 35}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 36}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [17]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 3, "type": "KSampler", "pos": [879.9999615530063, 269.9999774911694], "size": [314.9869791666667, 262], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 13}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 30}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 33}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 17}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 4, 1, "res_multistep", "simple", 1]}, {"id": 11, "type": "ModelSamplingAuraFlow", "pos": [879.9999615530063, 160.00009184959066], "size": [309.9869791666667, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 26}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}], "groups": [{"id": 2, "title": "Image size", "bounding": [100, 560, 290, 200], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [410, 130, 450, 540], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 4, "title": "Models", "bounding": [100, 130, 290, 413.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 32, "origin_id": 27, "origin_slot": 0, "target_id": 33, "target_slot": 0, "type": "CONDITIONING"}, {"id": 26, "origin_id": 28, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "MODEL"}, {"id": 14, "origin_id": 3, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 27, "origin_id": 29, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 30, "origin_id": 27, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 33, "origin_id": 33, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 17, "origin_id": 13, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 28, "origin_id": 30, "origin_slot": 0, "target_id": 27, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 27, "target_slot": 1, "type": "STRING"}, {"id": 35, "origin_id": -10, "origin_slot": 1, "target_id": 13, "target_slot": 0, "type": "INT"}, {"id": 36, "origin_id": -10, "origin_slot": 2, "target_id": 13, "target_slot": 1, "type": "INT"}, {"id": 38, "origin_id": -10, "origin_slot": 3, "target_id": 28, "target_slot": 0, "type": "COMBO"}, {"id": 39, "origin_id": -10, "origin_slot": 4, "target_id": 30, "target_slot": 0, "type": "COMBO"}, {"id": 40, "origin_id": -10, "origin_slot": 5, "target_id": 29, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Text to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.8401370345180755, "offset": [940.0587067393087, -830.7121087564725]}}, "version": 0.4} +{ + "revision": 0, + "last_node_id": 57, + "last_link_id": 0, + "nodes": [ + { + "id": 57, + "type": "f2fdebf6-dfaf-43b6-9eb2-7f70613cfdc1", + "pos": [ + 130, + 200 + ], + "size": [ + 400, + 470 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "27", + "text" + ], + [ + "13", + "width" + ], + [ + "13", + "height" + ], + [ + "3", + "seed" + ], + [ + "3", + "steps" + ], + [ + "28", + "unet_name" + ], + [ + "30", + "clip_name" + ], + [ + "29", + "vae_name" + ], + [ + "3", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Image (Z-Image-Turbo)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "f2fdebf6-dfaf-43b6-9eb2-7f70613cfdc1", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 61, + "lastLinkId": 75, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image (Z-Image-Turbo)", + "inputNode": { + "id": -10, + "bounding": [ + -560, + 480, + 120, + 200 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1670, + 320, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "fb178669-e742-4a53-8a69-7df59834dfd8", + "name": "text", + "type": "STRING", + "linkIds": [ + 34 + ], + "label": "prompt", + "pos": [ + -460, + 500 + ] + }, + { + "id": "dd780b3c-23e9-46ff-8469-156008f42e5a", + "name": "width", + "type": "INT", + "linkIds": [ + 35 + ], + "pos": [ + -460, + 520 + ] + }, + { + "id": "7b08d546-6bb0-4ef9-82e9-ffae5e1ee6bc", + "name": "height", + "type": "INT", + "linkIds": [ + 36 + ], + "pos": [ + -460, + 540 + ] + }, + { + "id": "f77677f7-6bf6-4c19-a71f-c4a553d5981e", + "name": "seed", + "type": "INT", + "linkIds": [ + 71 + ], + "pos": [ + -460, + 560 + ] + }, + { + "id": "ef9a9fb1-5983-4bc9-a60b-cf5aec48bff1", + "name": "steps", + "type": "INT", + "linkIds": [ + 72 + ], + "pos": [ + -460, + 580 + ] + }, + { + "id": "a20a1b30-785f-4a04-bb6d-3d61adab9764", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 73 + ], + "pos": [ + -460, + 600 + ] + }, + { + "id": "4af8fc2b-4655-4086-8240-45f8cb38c6f6", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 74 + ], + "pos": [ + -460, + 620 + ] + }, + { + "id": "4d518693-2807-439c-9cb6-cffd23ccba2c", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 75 + ], + "pos": [ + -460, + 640 + ] + } + ], + "outputs": [ + { + "id": "1fa72a21-ce00-4952-814e-1f2ffbe87d1d", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 16 + ], + "localized_name": "IMAGE", + "pos": [ + 1690, + 340 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 30, + "type": "CLIPLoader", + "pos": [ + 30, + 420 + ], + "size": [ + 270, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 74 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 28 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 29, + "type": "VAELoader", + "pos": [ + 30, + 650 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 75 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 27 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 33, + "type": "ConditioningZeroOut", + "pos": [ + 630, + 960 + ], + "size": [ + 230, + 80 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 32 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 33 + ] + } + ], + "properties": { + "Node name for S&R": "ConditioningZeroOut", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1320, + 230 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 14 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 27 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 16 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 28, + "type": "UNETLoader", + "pos": [ + 30, + 230 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 73 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 26 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 27, + "type": "CLIPTextEncode", + "pos": [ + 400, + 230 + ], + "size": [ + 450, + 650 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 28 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 34 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 30, + 32 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.3.73", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 13, + "type": "EmptySD3LatentImage", + "pos": [ + 40, + 890 + ], + "size": [ + 260, + 170 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 35 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 36 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 17 + ] + } + ], + "properties": { + "Node name for S&R": "EmptySD3LatentImage", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 11, + "type": "ModelSamplingAuraFlow", + "pos": [ + 950, + 230 + ], + "size": [ + 310, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 26 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 13 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingAuraFlow", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 950, + 400 + ], + "size": [ + 320, + 350 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 13 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 30 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 33 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 17 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": 71 + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 72 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 14 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler", + "cnr_id": "comfy-core", + "ver": "0.3.64", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 8, + 1, + "res_multistep", + "simple", + 1 + ] + } + ], + "groups": [ + { + "id": 2, + "title": "Step2 - Image size", + "bounding": [ + 10, + 820, + 320, + 280 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Step3 - Prompt", + "bounding": [ + 360, + 130, + 530, + 970 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Step1 - Load models", + "bounding": [ + 0, + 130, + 330, + 660 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 32, + "origin_id": 27, + "origin_slot": 0, + "target_id": 33, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 26, + "origin_id": 28, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 14, + "origin_id": 3, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 27, + "origin_id": 29, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 13, + "origin_id": 11, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 30, + "origin_id": 27, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 33, + "origin_id": 33, + "origin_slot": 0, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 17, + "origin_id": 13, + "origin_slot": 0, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 28, + "origin_id": 30, + "origin_slot": 0, + "target_id": 27, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 16, + "origin_id": 8, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 34, + "origin_id": -10, + "origin_slot": 0, + "target_id": 27, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 35, + "origin_id": -10, + "origin_slot": 1, + "target_id": 13, + "target_slot": 0, + "type": "INT" + }, + { + "id": 36, + "origin_id": -10, + "origin_slot": 2, + "target_id": 13, + "target_slot": 1, + "type": "INT" + }, + { + "id": 71, + "origin_id": -10, + "origin_slot": 3, + "target_id": 3, + "target_slot": 4, + "type": "INT" + }, + { + "id": 72, + "origin_id": -10, + "origin_slot": 4, + "target_id": 3, + "target_slot": 5, + "type": "INT" + }, + { + "id": 73, + "origin_id": -10, + "origin_slot": 5, + "target_id": 28, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 74, + "origin_id": -10, + "origin_slot": 6, + "target_id": 30, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 75, + "origin_id": -10, + "origin_slot": 7, + "target_id": 29, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using Z-Image-Turbo, Alibaba's distilled 6B DiT model." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Text to Image.json b/blueprints/Text to Image.json new file mode 100644 index 000000000..ffe3682ff --- /dev/null +++ b/blueprints/Text to Image.json @@ -0,0 +1,1132 @@ +{ + "revision": 0, + "last_node_id": 71, + "last_link_id": 0, + "nodes": [ + { + "id": 71, + "type": "2d5985c9-deef-41ae-9c34-6353d3d7d1ef", + "pos": [ + 90, + 800 + ], + "size": [ + 400, + 80 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + }, + { + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [] + } + ], + "title": "Text to Image", + "properties": { + "proxyWidgets": [ + [ + "67", + "text" + ], + [ + "68", + "width" + ], + [ + "68", + "height" + ], + [ + "66", + "unet_name" + ], + [ + "62", + "clip_name" + ], + [ + "63", + "vae_name" + ], + [ + "70", + "steps" + ], + [ + "70", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": { + "text": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "2d5985c9-deef-41ae-9c34-6353d3d7d1ef", + "version": 1, + "state": { + "lastGroupId": 4, + "lastNodeId": 71, + "lastLinkId": 70, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Image", + "inputNode": { + "id": -10, + "bounding": [ + -80, + 425, + 120, + 180 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1490, + 415, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "fb178669-e742-4a53-8a69-7df59834dfd8", + "name": "text", + "type": "STRING", + "linkIds": [ + 34 + ], + "label": "prompt", + "pos": [ + 20, + 445 + ] + }, + { + "id": "dd780b3c-23e9-46ff-8469-156008f42e5a", + "name": "width", + "type": "INT", + "linkIds": [ + 35 + ], + "pos": [ + 20, + 465 + ] + }, + { + "id": "7b08d546-6bb0-4ef9-82e9-ffae5e1ee6bc", + "name": "height", + "type": "INT", + "linkIds": [ + 36 + ], + "pos": [ + 20, + 485 + ] + }, + { + "id": "8ed4eb73-a2bf-4766-8bf4-c5890b560596", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 38 + ], + "pos": [ + 20, + 505 + ] + }, + { + "id": "f362d639-d412-4b5d-8490-1e9995dc5f82", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 39 + ], + "pos": [ + 20, + 525 + ] + }, + { + "id": "ee25ac16-de63-4b74-bbbb-5b29fdc1efcf", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 40 + ], + "pos": [ + 20, + 545 + ] + }, + { + "id": "51cbcd61-9218-4bcb-89ac-ecdfb1ef8892", + "name": "steps", + "type": "INT", + "linkIds": [ + 70 + ], + "pos": [ + 20, + 565 + ] + } + ], + "outputs": [ + { + "id": "1fa72a21-ce00-4952-814e-1f2ffbe87d1d", + "name": "IMAGE", + "type": "IMAGE", + "linkIds": [ + 16 + ], + "localized_name": "IMAGE", + "pos": [ + 1510, + 435 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 62, + "type": "CLIPLoader", + "pos": [ + 110, + 330 + ], + "size": [ + 270, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 39 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 28 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "qwen_3_4b.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "qwen_3_4b.safetensors", + "lumina2", + "default" + ] + }, + { + "id": 63, + "type": "VAELoader", + "pos": [ + 110, + 480 + ], + "size": [ + 270, + 60 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 40 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 27 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "ae.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "ae.safetensors" + ] + }, + { + "id": 64, + "type": "ConditioningZeroOut", + "pos": [ + 640, + 620 + ], + "size": [ + 210, + 30 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "conditioning", + "name": "conditioning", + "type": "CONDITIONING", + "link": 32 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ConditioningZeroOut", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 65, + "type": "VAEDecode", + "pos": [ + 1220, + 160 + ], + "size": [ + 210, + 50 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 14 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 27 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 16 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 66, + "type": "UNETLoader", + "pos": [ + 110, + 200 + ], + "size": [ + 270, + 90 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 38 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 26 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "z_image_turbo_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "z_image_turbo_bf16.safetensors", + "default" + ] + }, + { + "id": 67, + "type": "CLIPTextEncode", + "pos": [ + 430, + 200 + ], + "size": [ + 410, + 370 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 28 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 34 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 30, + 32 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.73", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 68, + "type": "EmptySD3LatentImage", + "pos": [ + 110, + 630 + ], + "size": [ + 260, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 35 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 36 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 17 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "EmptySD3LatentImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 69, + "type": "ModelSamplingAuraFlow", + "pos": [ + 880, + 160 + ], + "size": [ + 310, + 60 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 26 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 13 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "ModelSamplingAuraFlow", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 70, + "type": "KSampler", + "pos": [ + 880, + 270 + ], + "size": [ + 320, + 270 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 13 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 30 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 33 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 17 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": 70 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 14 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.64", + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0, + "randomize", + 8, + 1, + "res_multistep", + "simple", + 1 + ] + } + ], + "groups": [ + { + "id": 2, + "title": "Step2 - Image size", + "bounding": [ + 100, + 560, + 290, + 200 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Step3 - Prompt", + "bounding": [ + 410, + 130, + 450, + 540 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 4, + "title": "Step1 - Load models", + "bounding": [ + 100, + 130, + 290, + 413.6 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 32, + "origin_id": 67, + "origin_slot": 0, + "target_id": 64, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 26, + "origin_id": 66, + "origin_slot": 0, + "target_id": 69, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 14, + "origin_id": 70, + "origin_slot": 0, + "target_id": 65, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 27, + "origin_id": 63, + "origin_slot": 0, + "target_id": 65, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 13, + "origin_id": 69, + "origin_slot": 0, + "target_id": 70, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 30, + "origin_id": 67, + "origin_slot": 0, + "target_id": 70, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 33, + "origin_id": 64, + "origin_slot": 0, + "target_id": 70, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 17, + "origin_id": 68, + "origin_slot": 0, + "target_id": 70, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 28, + "origin_id": 62, + "origin_slot": 0, + "target_id": 67, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 16, + "origin_id": 65, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 34, + "origin_id": -10, + "origin_slot": 0, + "target_id": 67, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 35, + "origin_id": -10, + "origin_slot": 1, + "target_id": 68, + "target_slot": 0, + "type": "INT" + }, + { + "id": 36, + "origin_id": -10, + "origin_slot": 2, + "target_id": 68, + "target_slot": 1, + "type": "INT" + }, + { + "id": 38, + "origin_id": -10, + "origin_slot": 3, + "target_id": 66, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 39, + "origin_id": -10, + "origin_slot": 4, + "target_id": 62, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 40, + "origin_id": -10, + "origin_slot": 5, + "target_id": 63, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 70, + "origin_id": -10, + "origin_slot": 6, + "target_id": 70, + "target_slot": 5, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image generation and editing/Text to image", + "description": "Generates images from text prompts using Z-Image-Turbo defaults with Qwen3 text encoder and VAE." + } + ] + }, + "extra": {} +} diff --git a/blueprints/Text to Video (LTX-2.3).json b/blueprints/Text to Video (LTX-2.3).json new file mode 100644 index 000000000..f44a216dd --- /dev/null +++ b/blueprints/Text to Video (LTX-2.3).json @@ -0,0 +1,4297 @@ +{ + "revision": 0, + "last_node_id": 324, + "last_link_id": 0, + "nodes": [ + { + "id": 324, + "type": "871cf29d-2726-43a5-b61e-01fa939d699d", + "pos": [ + -300, + 4290 + ], + "size": [ + 400, + 170 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": null + }, + { + "label": "width", + "name": "value_2", + "type": "INT", + "widget": { + "name": "value_2" + }, + "link": null + }, + { + "label": "height", + "name": "value_3", + "type": "INT", + "widget": { + "name": "value_3" + }, + "link": null + }, + { + "label": "duration", + "name": "value_4", + "type": "INT", + "widget": { + "name": "value_4" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + }, + { + "label": "distilled_lora", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": null + }, + { + "label": "latent_upscale_model", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + }, + { + "label": "fps", + "name": "value_1", + "type": "INT", + "widget": { + "name": "value_1" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "320", + "value" + ], + [ + "314", + "value" + ], + [ + "301", + "value" + ], + [ + "303", + "value" + ], + [ + "318", + "ckpt_name" + ], + [ + "287", + "lora_name" + ], + [ + "319", + "text_encoder" + ], + [ + "313", + "model_name" + ], + [ + "302", + "value" + ], + [ + "279", + "noise_seed" + ], + [ + "279", + "control_after_generate" + ] + ], + "ue_properties": { + "widget_ue_connectable": { + "value_1": true, + "value_2": true, + "value_3": true, + "value_4": true, + "lora_name": true, + "model_name": true + }, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Text to Video (LTX-2.3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "871cf29d-2726-43a5-b61e-01fa939d699d", + "version": 1, + "state": { + "lastGroupId": 26, + "lastNodeId": 324, + "lastLinkId": 631, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Video (LTX-2.3)", + "inputNode": { + "id": -10, + "bounding": [ + 720, + 4240, + 162.162109375, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 6100, + 4160, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "9494c550-4172-49c6-930e-5b508f775e77", + "name": "value", + "type": "STRING", + "linkIds": [ + 595 + ], + "pos": [ + 862.162109375, + 4260 + ] + }, + { + "id": "58dbb3f6-f924-4548-96ef-e0e34610bd4e", + "name": "value_2", + "type": "INT", + "linkIds": [ + 597 + ], + "label": "width", + "pos": [ + 862.162109375, + 4280 + ] + }, + { + "id": "6086d5b8-2586-448c-a641-dd14d76dd102", + "name": "value_3", + "type": "INT", + "linkIds": [ + 598 + ], + "label": "height", + "pos": [ + 862.162109375, + 4300 + ] + }, + { + "id": "feb8c2eb-ae48-4fa8-bc24-929552d656c3", + "name": "value_4", + "type": "INT", + "linkIds": [ + 599 + ], + "label": "duration", + "pos": [ + 862.162109375, + 4320 + ] + }, + { + "id": "d7255058-319a-4880-8f9a-7e542c8f3c3c", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 601, + 604, + 605 + ], + "pos": [ + 862.162109375, + 4340 + ] + }, + { + "id": "4afce68d-8f65-4342-9d6d-ae0a7688c3e3", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 602 + ], + "label": "distilled_lora", + "pos": [ + 862.162109375, + 4360 + ] + }, + { + "id": "ab842b4b-c977-4679-b421-424722785b57", + "name": "text_encoder", + "type": "COMBO", + "linkIds": [ + 606 + ], + "pos": [ + 862.162109375, + 4380 + ] + }, + { + "id": "9e47372d-28d9-4311-91e9-e90d03f4eb43", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 607 + ], + "label": "latent_upscale_model", + "pos": [ + 862.162109375, + 4400 + ] + }, + { + "id": "7951b137-465e-4844-b05f-88b89f0e1ba8", + "name": "value_1", + "type": "INT", + "linkIds": [ + 627 + ], + "label": "fps", + "pos": [ + 862.162109375, + 4420 + ] + } + ], + "outputs": [ + { + "id": "954ef307-c897-4eea-8b5c-5c6ce15a5357", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 536 + ], + "localized_name": "VIDEO", + "pos": [ + 6120, + 4180 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 278, + "type": "RandomNoise", + "pos": [ + 4720, + 3750 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 490 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 42, + "fixed" + ] + }, + { + "id": 279, + "type": "RandomNoise", + "pos": [ + 3200, + 3900 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "links": [ + 483 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "RandomNoise", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 343011291748534, + "randomize" + ] + }, + { + "id": 280, + "type": "LTXVConcatAVLatent", + "pos": [ + 4730, + 4520 + ], + "size": [ + 280, + 100 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 512 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 513 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 494 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 281, + "type": "LTXVAudioVAELoader", + "pos": [ + 1660, + 4140 + ], + "size": [ + 430, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 604 + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "links": [ + 481, + 496 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVAudioVAELoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-dev-fp8.safetensors" + ] + }, + { + "id": 282, + "type": "KSamplerSelect", + "pos": [ + 4720, + 4160 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 492 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler_cfg_pp" + ] + }, + { + "id": 283, + "type": "ManualSigmas", + "pos": [ + 4720, + 4340 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 493 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "0.85, 0.7250, 0.4219, 0.0" + ] + }, + { + "id": 284, + "type": "CFGGuider", + "pos": [ + 4720, + 3930 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 478 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 479 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 480 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 491 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.71", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 285, + "type": "SamplerCustomAdvanced", + "pos": [ + 3620, + 3990 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 483 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 484 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 485 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 544 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 487 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 488 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 286, + "type": "LTXVCropGuides", + "pos": [ + 3900, + 3700 + ], + "size": [ + 250, + 120 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 475 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 476 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 477 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 479 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 480 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVCropGuides", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 287, + "type": "LoraLoaderModelOnly", + "pos": [ + 1660, + 3910 + ], + "size": [ + 430, + 140 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 520 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 602 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 478, + 541 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "LoraLoaderModelOnly", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-distilled-lora-384.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-22b-distilled-lora-384.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-distilled-lora-384.safetensors", + 0.5 + ] + }, + { + "id": 288, + "type": "ResizeImagesByLongerEdge", + "pos": [ + 2120, + 5040 + ], + "size": [ + 310, + 110 + ], + "flags": { + "collapsed": false + }, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 523 + }, + { + "localized_name": "longer_edge", + "name": "longer_edge", + "type": "INT", + "widget": { + "name": "longer_edge" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 505 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "ResizeImagesByLongerEdge", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1536 + ] + }, + { + "id": 289, + "type": "LTXVLatentUpsampler", + "pos": [ + 4270, + 3910 + ], + "size": [ + 330, + 120 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 547 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 545 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 554 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 548 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "LTXVLatentUpsampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 290, + "type": "LTXVImgToVideoInplace", + "pos": [ + 4280, + 4150 + ], + "size": [ + 300, + 180 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 552 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 515 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 548 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 543 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 512 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1, + false + ] + }, + { + "id": 291, + "type": "LTXVPreprocess", + "pos": [ + 2130, + 5190 + ], + "size": [ + 290, + 110 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 505 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { + "name": "img_compression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "links": [ + 510, + 515 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVPreprocess", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 18 + ] + }, + { + "id": 292, + "type": "ResizeImageMaskNode", + "pos": [ + 1670, + 5040 + ], + "size": [ + 300, + 160 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 626 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 558 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 559 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 523 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "ResizeImageMaskNode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "scale dimensions", + 1920, + 1088, + "center", + "lanczos" + ] + }, + { + "id": 293, + "type": "KSamplerSelect", + "pos": [ + 3200, + 4350 + ], + "size": [ + 280, + 110 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 485 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "KSamplerSelect", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "euler_ancestral_cfg_pp" + ] + }, + { + "id": 294, + "type": "ComfyMathExpression", + "pos": [ + 2530, + 5070 + ], + "size": [ + 230, + 170 + ], + "flags": { + "collapsed": true + }, + "order": 19, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 560 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 561 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a/2" + ] + }, + { + "id": 295, + "type": "Reroute", + "pos": [ + 3930, + 4090 + ], + "size": [ + 80, + 30 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "", + "type": "*", + "link": 557 + } + ], + "outputs": [ + { + "name": "", + "type": "VAE", + "links": [ + 552, + 553, + 554 + ] + } + ], + "properties": { + "showOutputText": false, + "horizontal": false, + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + } + } + }, + { + "id": 296, + "type": "ComfyMathExpression", + "pos": [ + 2530, + 5130 + ], + "size": [ + 230, + 170 + ], + "flags": { + "collapsed": true + }, + "order": 21, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 562 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 563 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a/2" + ] + }, + { + "id": 297, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 2980, + 5200 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 561 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 563 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 631 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 511 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.60", + "Node name for S&R": "EmptyLTXVLatentVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 298, + "type": "LTXVImgToVideoInplace", + "pos": [ + 3420, + 4990 + ], + "size": [ + 280, + 180 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 556 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 510 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 511 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "type": "BOOLEAN", + "widget": { + "name": "bypass" + }, + "link": 542 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 497 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVImgToVideoInplace", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0.7, + false + ] + }, + { + "id": 299, + "type": "LTXVAudioVAEDecode", + "pos": [ + 5770, + 3940 + ], + "size": [ + 270, + 100 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 495 + }, + { + "label": "Audio VAE", + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 496 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "links": [ + 534 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVAudioVAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 300, + "type": "ComfyMathExpression", + "pos": [ + 2530, + 5270 + ], + "size": [ + 230, + 170 + ], + "flags": { + "collapsed": true + }, + "order": 25, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 564 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 566, + 591 + ] + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 565 + ] + } + ], + "title": "Math Expression (fps)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "ComfyMathExpression", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "a" + ] + }, + { + "id": 301, + "type": "PrimitiveInt", + "pos": [ + 1160, + 4530 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 598 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 559, + 562 + ] + } + ], + "title": "Height", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 720, + "fixed" + ] + }, + { + "id": 302, + "type": "PrimitiveInt", + "pos": [ + 1160, + 4680 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 627 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 564, + 629 + ] + } + ], + "title": "Frame Rate", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 25, + "fixed" + ] + }, + { + "id": 303, + "type": "PrimitiveInt", + "pos": [ + 1160, + 4230 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 599 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 628 + ] + } + ], + "title": "Duration", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 5, + "fixed" + ] + }, + { + "id": 304, + "type": "PrimitiveBoolean", + "pos": [ + 1170, + 4080 + ], + "size": [ + 370, + 100 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "BOOLEAN", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "BOOLEAN", + "name": "BOOLEAN", + "type": "BOOLEAN", + "links": [ + 542, + 543 + ] + } + ], + "title": "Switch to Text to Video?", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.0", + "Node name for S&R": "PrimitiveBoolean", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + true + ] + }, + { + "id": 305, + "type": "CLIPTextEncode", + "pos": [ + 2170, + 3640 + ], + "size": [ + 550, + 740 + ], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 615 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 623 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 526 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 306, + "type": "LTXVConditioning", + "pos": [ + 2790, + 3670 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 526 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 527 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 566 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 475, + 518 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 476, + 519 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "LTXVConditioning", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 307, + "type": "LTXVEmptyLatentAudio", + "pos": [ + 2970, + 4970 + ], + "size": [ + 280, + 170 + ], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 481 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 630 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 565 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "links": [ + 498 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.68", + "Node name for S&R": "LTXVEmptyLatentAudio", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 97, + 25, + 1 + ] + }, + { + "id": 308, + "type": "ManualSigmas", + "pos": [ + 3200, + 4550 + ], + "size": [ + 500, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { + "name": "sigmas" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 544 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "ManualSigmas", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 309, + "type": "LTXVSeparateAVLatent", + "pos": [ + 3890, + 3910 + ], + "size": [ + 250, + 100 + ], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 488 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 477, + 547 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 513 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 310, + "type": "SamplerCustomAdvanced", + "pos": [ + 5070, + 3750 + ], + "size": [ + 230, + 170 + ], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 490 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 491 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 492 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 493 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 494 + } + ], + "outputs": [ + { + "localized_name": "output", + "name": "output", + "type": "LATENT", + "links": [ + 578 + ] + }, + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "links": [] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.75", + "Node name for S&R": "SamplerCustomAdvanced", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 311, + "type": "LTXVSeparateAVLatent", + "pos": [ + 5410, + 3750 + ], + "size": [ + 230, + 100 + ], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 578 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "links": [ + 539 + ] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "links": [ + 495 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "LTXVSeparateAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 312, + "type": "CreateVideo", + "pos": [ + 5740, + 4610 + ], + "size": [ + 280, + 130 + ], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 538 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 534 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 591 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 536 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 313, + "type": "LatentUpscaleModelLoader", + "pos": [ + 1670, + 4600 + ], + "size": [ + 400, + 110 + ], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 607 + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "links": [ + 545 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LatentUpscaleModelLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors", + "directory": "latent_upscale_models" + } + ] + }, + "widgets_values": [ + "ltx-2.3-spatial-upscaler-x2-1.1.safetensors" + ] + }, + { + "id": 314, + "type": "PrimitiveInt", + "pos": [ + 1160, + 4380 + ], + "size": [ + 370, + 110 + ], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { + "name": "value" + }, + "link": 597 + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 558, + 560 + ] + } + ], + "title": "Width", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveInt", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1280, + "fixed" + ] + }, + { + "id": 315, + "type": "CLIPTextEncode", + "pos": [ + 2180, + 4480 + ], + "size": [ + 530, + 240 + ], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 625 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 527 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "pc game, console game, video game, cartoon, childish, ugly" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 316, + "type": "CFGGuider", + "pos": [ + 3200, + 4100 + ], + "size": [ + 280, + 160 + ], + "flags": {}, + "order": 39, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 541 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 518 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 519 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 484 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.64", + "Node name for S&R": "CFGGuider", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 317, + "type": "VAEDecodeTiled", + "pos": [ + 5760, + 3650 + ], + "size": [ + 280, + 200 + ], + "flags": {}, + "order": 40, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 539 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 553 + }, + { + "localized_name": "tile_size", + "name": "tile_size", + "type": "INT", + "widget": { + "name": "tile_size" + }, + "link": null + }, + { + "localized_name": "overlap", + "name": "overlap", + "type": "INT", + "widget": { + "name": "overlap" + }, + "link": null + }, + { + "localized_name": "temporal_size", + "name": "temporal_size", + "type": "INT", + "widget": { + "name": "temporal_size" + }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 538 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.14.1", + "Node name for S&R": "VAEDecodeTiled", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 768, + 64, + 4096, + 4 + ] + }, + { + "id": 318, + "type": "CheckpointLoaderSimple", + "pos": [ + 1660, + 3660 + ], + "size": [ + 430, + 160 + ], + "flags": {}, + "order": 41, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 601 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 520 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": [ + 556, + 557 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.3.56", + "Node name for S&R": "CheckpointLoaderSimple", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "ltx-2.3-22b-dev-fp8.safetensors" + ] + }, + { + "id": 319, + "type": "LTXAVTextEncoderLoader", + "pos": [ + 1660, + 4340 + ], + "size": [ + 430, + 170 + ], + "flags": {}, + "order": 42, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { + "name": "text_encoder" + }, + "link": 606 + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 605 + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 615, + 625 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXAVTextEncoderLoader", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "ltx-2.3-22b-dev-fp8.safetensors", + "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors", + "directory": "checkpoints" + }, + { + "name": "gemma_3_12B_it_fp4_mixed.safetensors", + "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "gemma_3_12B_it_fp4_mixed.safetensors", + "ltx-2.3-22b-dev-fp8.safetensors", + "default" + ] + }, + { + "id": 320, + "type": "PrimitiveStringMultiline", + "pos": [ + 1160, + 3680 + ], + "size": [ + 370, + 350 + ], + "flags": {}, + "order": 43, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "STRING", + "widget": { + "name": "value" + }, + "link": 595 + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 623 + ] + } + ], + "title": "Prompt", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "PrimitiveStringMultiline", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 321, + "type": "LTXVConcatAVLatent", + "pos": [ + 3820, + 4990 + ], + "size": [ + 240, + 100 + ], + "flags": {}, + "order": 44, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 497 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 498 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 487 + ] + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.7.0", + "Node name for S&R": "LTXVConcatAVLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 322, + "type": "LoadImage", + "pos": [ + 1150, + 4940 + ], + "size": [ + 400, + 480 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "COMBO", + "widget": { + "name": "image" + }, + "link": null + }, + { + "localized_name": "choose file to upload", + "name": "upload", + "type": "IMAGEUPLOAD", + "widget": { + "name": "upload" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 626 + ] + }, + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "version": "7.7", + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.16.3", + "Node name for S&R": "LoadImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "example.png", + "image" + ] + }, + { + "id": 323, + "type": "ComfyMathExpression", + "pos": [ + 2540, + 5370 + ], + "size": [ + 260, + 190 + ], + "flags": { + "collapsed": true + }, + "order": 45, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 628 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": 629 + }, + { + "label": "c", + "localized_name": "values.c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 630, + 631 + ] + } + ], + "title": "Math Expression (length)", + "properties": { + "ue_properties": { + "widget_ue_connectable": {}, + "input_ue_unconnectable": {} + }, + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a * b + 1" + ] + } + ], + "groups": [ + { + "id": 1, + "title": "Model", + "bounding": [ + 1630, + 3550, + 480, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Generate Low Resolution", + "bounding": [ + 3150, + 3550, + 1020, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Prompt", + "bounding": [ + 2140, + 3550, + 980, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Generate High Resolution", + "bounding": [ + 4690, + 3550, + 960, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Lantent Upscale", + "bounding": [ + 4200, + 3550, + 460, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 19, + "title": "Video Settings", + "bounding": [ + 1110, + 3550, + 490, + 1270 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 20, + "title": "Image Preprocess", + "bounding": [ + 1630, + 4850, + 830, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 21, + "title": "Empty Latent", + "bounding": [ + 2830, + 4850, + 1340, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 22, + "title": "Number conversion", + "bounding": [ + 2490, + 4850, + 320, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 26, + "title": "Image will not affect the video", + "bounding": [ + 1110, + 4850, + 490, + 610 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 512, + "origin_id": 290, + "origin_slot": 0, + "target_id": 280, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 513, + "origin_id": 309, + "origin_slot": 1, + "target_id": 280, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 478, + "origin_id": 287, + "origin_slot": 0, + "target_id": 284, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 479, + "origin_id": 286, + "origin_slot": 0, + "target_id": 284, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 480, + "origin_id": 286, + "origin_slot": 1, + "target_id": 284, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 541, + "origin_id": 287, + "origin_slot": 0, + "target_id": 316, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 518, + "origin_id": 306, + "origin_slot": 0, + "target_id": 316, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 519, + "origin_id": 306, + "origin_slot": 1, + "target_id": 316, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 483, + "origin_id": 279, + "origin_slot": 0, + "target_id": 285, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 484, + "origin_id": 316, + "origin_slot": 0, + "target_id": 285, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 485, + "origin_id": 293, + "origin_slot": 0, + "target_id": 285, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 544, + "origin_id": 308, + "origin_slot": 0, + "target_id": 285, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 487, + "origin_id": 321, + "origin_slot": 0, + "target_id": 285, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 475, + "origin_id": 306, + "origin_slot": 0, + "target_id": 286, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 476, + "origin_id": 306, + "origin_slot": 1, + "target_id": 286, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 477, + "origin_id": 309, + "origin_slot": 0, + "target_id": 286, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 520, + "origin_id": 318, + "origin_slot": 0, + "target_id": 287, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 523, + "origin_id": 292, + "origin_slot": 0, + "target_id": 288, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 547, + "origin_id": 309, + "origin_slot": 0, + "target_id": 289, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 545, + "origin_id": 313, + "origin_slot": 0, + "target_id": 289, + "target_slot": 1, + "type": "LATENT_UPSCALE_MODEL" + }, + { + "id": 554, + "origin_id": 295, + "origin_slot": 0, + "target_id": 289, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 552, + "origin_id": 295, + "origin_slot": 0, + "target_id": 290, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 515, + "origin_id": 291, + "origin_slot": 0, + "target_id": 290, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 548, + "origin_id": 289, + "origin_slot": 0, + "target_id": 290, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 543, + "origin_id": 304, + "origin_slot": 0, + "target_id": 290, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 505, + "origin_id": 288, + "origin_slot": 0, + "target_id": 291, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 558, + "origin_id": 314, + "origin_slot": 0, + "target_id": 292, + "target_slot": 2, + "type": "INT" + }, + { + "id": 559, + "origin_id": 301, + "origin_slot": 0, + "target_id": 292, + "target_slot": 3, + "type": "INT" + }, + { + "id": 560, + "origin_id": 314, + "origin_slot": 0, + "target_id": 294, + "target_slot": 0, + "type": "INT" + }, + { + "id": 557, + "origin_id": 318, + "origin_slot": 2, + "target_id": 295, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 562, + "origin_id": 301, + "origin_slot": 0, + "target_id": 296, + "target_slot": 0, + "type": "INT" + }, + { + "id": 561, + "origin_id": 294, + "origin_slot": 1, + "target_id": 297, + "target_slot": 0, + "type": "INT" + }, + { + "id": 563, + "origin_id": 296, + "origin_slot": 1, + "target_id": 297, + "target_slot": 1, + "type": "INT" + }, + { + "id": 556, + "origin_id": 318, + "origin_slot": 2, + "target_id": 298, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 510, + "origin_id": 291, + "origin_slot": 0, + "target_id": 298, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 511, + "origin_id": 297, + "origin_slot": 0, + "target_id": 298, + "target_slot": 2, + "type": "LATENT" + }, + { + "id": 542, + "origin_id": 304, + "origin_slot": 0, + "target_id": 298, + "target_slot": 4, + "type": "BOOLEAN" + }, + { + "id": 495, + "origin_id": 311, + "origin_slot": 1, + "target_id": 299, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 496, + "origin_id": 281, + "origin_slot": 0, + "target_id": 299, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 564, + "origin_id": 302, + "origin_slot": 0, + "target_id": 300, + "target_slot": 0, + "type": "INT" + }, + { + "id": 526, + "origin_id": 305, + "origin_slot": 0, + "target_id": 306, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 527, + "origin_id": 315, + "origin_slot": 0, + "target_id": 306, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 566, + "origin_id": 300, + "origin_slot": 0, + "target_id": 306, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 497, + "origin_id": 298, + "origin_slot": 0, + "target_id": 321, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 498, + "origin_id": 307, + "origin_slot": 0, + "target_id": 321, + "target_slot": 1, + "type": "LATENT" + }, + { + "id": 481, + "origin_id": 281, + "origin_slot": 0, + "target_id": 307, + "target_slot": 0, + "type": "VAE" + }, + { + "id": 565, + "origin_id": 300, + "origin_slot": 1, + "target_id": 307, + "target_slot": 2, + "type": "INT" + }, + { + "id": 488, + "origin_id": 285, + "origin_slot": 0, + "target_id": 309, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 490, + "origin_id": 278, + "origin_slot": 0, + "target_id": 310, + "target_slot": 0, + "type": "NOISE" + }, + { + "id": 491, + "origin_id": 284, + "origin_slot": 0, + "target_id": 310, + "target_slot": 1, + "type": "GUIDER" + }, + { + "id": 492, + "origin_id": 282, + "origin_slot": 0, + "target_id": 310, + "target_slot": 2, + "type": "SAMPLER" + }, + { + "id": 493, + "origin_id": 283, + "origin_slot": 0, + "target_id": 310, + "target_slot": 3, + "type": "SIGMAS" + }, + { + "id": 494, + "origin_id": 280, + "origin_slot": 0, + "target_id": 310, + "target_slot": 4, + "type": "LATENT" + }, + { + "id": 578, + "origin_id": 310, + "origin_slot": 0, + "target_id": 311, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 539, + "origin_id": 311, + "origin_slot": 0, + "target_id": 317, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 553, + "origin_id": 295, + "origin_slot": 0, + "target_id": 317, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 538, + "origin_id": 317, + "origin_slot": 0, + "target_id": 312, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 534, + "origin_id": 299, + "origin_slot": 0, + "target_id": 312, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 591, + "origin_id": 300, + "origin_slot": 0, + "target_id": 312, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 536, + "origin_id": 312, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 595, + "origin_id": -10, + "origin_slot": 0, + "target_id": 320, + "target_slot": 0, + "type": "STRING" + }, + { + "id": 597, + "origin_id": -10, + "origin_slot": 1, + "target_id": 314, + "target_slot": 0, + "type": "INT" + }, + { + "id": 598, + "origin_id": -10, + "origin_slot": 2, + "target_id": 301, + "target_slot": 0, + "type": "INT" + }, + { + "id": 599, + "origin_id": -10, + "origin_slot": 3, + "target_id": 303, + "target_slot": 0, + "type": "INT" + }, + { + "id": 601, + "origin_id": -10, + "origin_slot": 4, + "target_id": 318, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 602, + "origin_id": -10, + "origin_slot": 5, + "target_id": 287, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 604, + "origin_id": -10, + "origin_slot": 4, + "target_id": 281, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 605, + "origin_id": -10, + "origin_slot": 4, + "target_id": 319, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 606, + "origin_id": -10, + "origin_slot": 6, + "target_id": 319, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 607, + "origin_id": -10, + "origin_slot": 7, + "target_id": 313, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 615, + "origin_id": 319, + "origin_slot": 0, + "target_id": 305, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 623, + "origin_id": 320, + "origin_slot": 0, + "target_id": 305, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 625, + "origin_id": 319, + "origin_slot": 0, + "target_id": 315, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 626, + "origin_id": 322, + "origin_slot": 0, + "target_id": 292, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 627, + "origin_id": -10, + "origin_slot": 8, + "target_id": 302, + "target_slot": 0, + "type": "INT" + }, + { + "id": 628, + "origin_id": 303, + "origin_slot": 0, + "target_id": 323, + "target_slot": 0, + "type": "INT" + }, + { + "id": 629, + "origin_id": 302, + "origin_slot": 0, + "target_id": 323, + "target_slot": 1, + "type": "INT" + }, + { + "id": 630, + "origin_id": 323, + "origin_slot": 1, + "target_id": 307, + "target_slot": 1, + "type": "INT" + }, + { + "id": 631, + "origin_id": 323, + "origin_slot": 1, + "target_id": 297, + "target_slot": 2, + "type": "INT" + } + ], + "extra": { + "workflowRendererVersion": "Vue-corrected" + }, + "category": "Video generation and editing/Text to video", + "description": "Generates video from text prompts using LTX-2.3, Lightricks' video diffusion model." + } + ] + }, + "extra": { + "ue_links": [] + } +} \ No newline at end of file diff --git a/blueprints/Text to Video (Wan 2.2).json b/blueprints/Text to Video (Wan 2.2).json index 9f1b69669..a264a490d 100644 --- a/blueprints/Text to Video (Wan 2.2).json +++ b/blueprints/Text to Video (Wan 2.2).json @@ -1 +1,1590 @@ -{"id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", "revision": 0, "last_node_id": 116, "last_link_id": 188, "nodes": [{"id": 114, "type": "59b2f9c7-af11-45c8-a22b-871166f816c0", "pos": [900.0000142553818, 629.999938027585], "size": [400, 394.97395833333337], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}], "outputs": [{"name": "VIDEO", "type": "VIDEO", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "length"], ["-1", "width"], ["-1", "height"], ["81", "noise_seed"], ["81", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 81, 640, 640]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "59b2f9c7-af11-45c8-a22b-871166f816c0", "version": 1, "state": {"lastGroupId": 15, "lastNodeId": 114, "lastLinkId": 196, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Text to Video (Wan 2.2)", "inputNode": {"id": -10, "bounding": [-99.66668418897854, 621.3333300391974, 120, 120]}, "outputNode": {"id": -20, "bounding": [1661.9927561248032, 500.2133490758798, 120, 60]}, "inputs": [{"id": "3a15ef44-456f-4a3a-ade7-7a0840166830", "name": "text", "type": "STRING", "linkIds": [189], "label": "prompt", "pos": [0.333315811021464, 641.3333300391974]}, {"id": "ec76f1bf-b130-4dc9-a50c-0b10002725d6", "name": "length", "type": "INT", "linkIds": [190], "pos": [0.333315811021464, 661.3333300391974]}, {"id": "1abb6b00-a8b4-4e72-9d87-53f1fc5d281e", "name": "width", "type": "INT", "linkIds": [191], "pos": [0.333315811021464, 681.3333300391974]}, {"id": "0af36ab5-ee95-4ce5-9ad9-26436319a0d2", "name": "height", "type": "INT", "linkIds": [192], "pos": [0.333315811021464, 701.3333300391974]}], "outputs": [{"id": "6bdfda51-5568-48bf-8985-dbad1e11b3d8", "name": "VIDEO", "type": "VIDEO", "linkIds": [196], "pos": [1681.9927561248032, 520.2133490758798]}], "widgets": [], "nodes": [{"id": 71, "type": "CLIPLoader", "pos": [50.33329119280961, 51.33334121884377], "size": [346.38020833333337, 98], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [141, 160]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 73, "type": "VAELoader", "pos": [50.33329119280961, 211.33336855035554], "size": [344.7135416666667, 50], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [158]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 76, "type": "UNETLoader", "pos": [50.33329119280961, -78.66666636275716], "size": [346.7447916666667, 74], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [155]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 75, "type": "UNETLoader", "pos": [50.33329119280961, -208.66667394435814], "size": [346.7447916666667, 74], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [153]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 83, "type": "LoraLoaderModelOnly", "pos": [450.3332425195698, -198.66662836038148], "size": [279.9869791666667, 74], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 153}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [152]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", "directory": "loras"}]}, "widgets_values": ["wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", 1.0000000000000002]}, {"id": 85, "type": "LoraLoaderModelOnly", "pos": [450.3332425195698, -58.66669219682302], "size": [279.9869791666667, 74], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 155}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [156]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", "directory": "loras"}]}, "widgets_values": ["wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", 1.0000000000000002]}, {"id": 86, "type": "ModelSamplingSD3", "pos": [740.3332774326827, -58.66669219682302], "size": [210, 50], "flags": {"collapsed": false}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 156}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [183]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3"}, "widgets_values": [5.000000000000001]}, {"id": 82, "type": "ModelSamplingSD3", "pos": [740.3332774326827, -198.66662836038148], "size": [210, 50], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 152}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [181]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3"}, "widgets_values": [5.000000000000001]}, {"id": 81, "type": "KSamplerAdvanced", "pos": [990.3333640139272, -248.66668077723608], "size": [300, 440.98958333333337], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 181}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 149}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 150}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 151}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced"}, "widgets_values": ["enable", 0, "randomize", 4, 1, "euler", "simple", 0, 2, "enable"]}, {"id": 74, "type": "EmptyHunyuanLatentVideo", "pos": [70.33326535874369, 381.33332446382485], "size": [314.9869791666667, 122], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 191}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 192}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 190}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [151]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "EmptyHunyuanLatentVideo"}, "widgets_values": [640, 640, 81, 1]}, {"id": 78, "type": "KSamplerAdvanced", "pos": [1310.3334186769505, -248.66668077723608], "size": [304.73958333333337, 440.98958333333337], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 183}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 143}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 144}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 145}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [157]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced"}, "widgets_values": ["disable", 0, "fixed", 4, 1, "euler", "simple", 2, 4, "disable"]}, {"id": 114, "type": "CreateVideo", "pos": [1320.333347258908, 441.33336396364655], "size": [269.9869791666667, 70], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 195}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [196]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [16]}, {"id": 112, "type": "Note", "pos": [30.33320002485607, -428.6666237736725], "size": [359.9869791666667, 52], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [], "title": "About 4 Steps LoRA", "properties": {}, "widgets_values": ["Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed."], "color": "#222", "bgcolor": "#000"}, {"id": 62, "type": "MarkdownNote", "pos": [-489.666771800538, -278.666700527147], "size": [479.9869791666667, 542.1354166666667], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "Model Links", "properties": {}, "widgets_values": ["[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n) | [教程](https://docs.comfy.org/zh-CN/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model** \n- [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors)\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├───wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors\n│ │ └───wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n"], "color": "#222", "bgcolor": "#000"}, {"id": 87, "type": "VAEDecode", "pos": [1020.3331497597994, 471.3333837135574], "size": [210, 46], "flags": {"collapsed": false}, "order": 14, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 157}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 158}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [195]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 72, "type": "CLIPTextEncode", "pos": [440.3333139376125, 331.3333305479798], "size": [500, 170], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 141}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [144, 150]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": ["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,裸露,NSFW"], "color": "#322", "bgcolor": "#533"}, {"id": 89, "type": "CLIPTextEncode", "pos": [440.3333139376125, 131.33323788258042], "size": [510, 170], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 160}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 189}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [143, 149]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}], "groups": [{"id": 13, "title": "Wan2.2 T2V fp8_scaled + 4 steps LoRA", "bounding": [31.999982477688036, -317.00000329413615, 1610, 880], "color": "#444", "font_size": 24, "flags": {}}, {"id": 6, "title": "Step3 Prompt", "bounding": [431.99998247768815, 57.99999670586385, 530, 460], "color": "#444", "font_size": 24, "flags": {}}, {"id": 7, "title": "Lightx2v 4steps LoRA", "bounding": [431.99998247768815, -275.33333662746946, 530, 320], "color": "#444", "font_size": 24, "flags": {}}, {"id": 11, "title": "Step 1 - Load models", "bounding": [40.33331581102152, -275.33333662746946, 366.7470703125, 563.5814208984375], "color": "#444", "font_size": 24, "flags": {}}, {"id": 12, "title": "Step 2 - Video size", "bounding": [40.33331581102152, 299.6666633725306, 370, 230], "color": "#444", "font_size": 24, "flags": {}}], "links": [{"id": 153, "origin_id": 75, "origin_slot": 0, "target_id": 83, "target_slot": 0, "type": "MODEL"}, {"id": 155, "origin_id": 76, "origin_slot": 0, "target_id": 85, "target_slot": 0, "type": "MODEL"}, {"id": 156, "origin_id": 85, "origin_slot": 0, "target_id": 86, "target_slot": 0, "type": "MODEL"}, {"id": 152, "origin_id": 83, "origin_slot": 0, "target_id": 82, "target_slot": 0, "type": "MODEL"}, {"id": 160, "origin_id": 71, "origin_slot": 0, "target_id": 89, "target_slot": 0, "type": "CLIP"}, {"id": 181, "origin_id": 82, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "MODEL"}, {"id": 149, "origin_id": 89, "origin_slot": 0, "target_id": 81, "target_slot": 1, "type": "CONDITIONING"}, {"id": 150, "origin_id": 72, "origin_slot": 0, "target_id": 81, "target_slot": 2, "type": "CONDITIONING"}, {"id": 151, "origin_id": 74, "origin_slot": 0, "target_id": 81, "target_slot": 3, "type": "LATENT"}, {"id": 157, "origin_id": 78, "origin_slot": 0, "target_id": 87, "target_slot": 0, "type": "LATENT"}, {"id": 158, "origin_id": 73, "origin_slot": 0, "target_id": 87, "target_slot": 1, "type": "VAE"}, {"id": 141, "origin_id": 71, "origin_slot": 0, "target_id": 72, "target_slot": 0, "type": "CLIP"}, {"id": 183, "origin_id": 86, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "MODEL"}, {"id": 143, "origin_id": 89, "origin_slot": 0, "target_id": 78, "target_slot": 1, "type": "CONDITIONING"}, {"id": 144, "origin_id": 72, "origin_slot": 0, "target_id": 78, "target_slot": 2, "type": "CONDITIONING"}, {"id": 145, "origin_id": 81, "origin_slot": 0, "target_id": 78, "target_slot": 3, "type": "LATENT"}, {"id": 189, "origin_id": -10, "origin_slot": 0, "target_id": 89, "target_slot": 1, "type": "STRING"}, {"id": 190, "origin_id": -10, "origin_slot": 1, "target_id": 74, "target_slot": 2, "type": "INT"}, {"id": 191, "origin_id": -10, "origin_slot": 2, "target_id": 74, "target_slot": 0, "type": "INT"}, {"id": 192, "origin_id": -10, "origin_slot": 3, "target_id": 74, "target_slot": 1, "type": "INT"}, {"id": 195, "origin_id": 87, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "IMAGE"}, {"id": 196, "origin_id": 114, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Text to video"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 116, + "last_link_id": 188, + "nodes": [ + { + "id": 114, + "type": "59b2f9c7-af11-45c8-a22b-871166f816c0", + "pos": [ + 900.0000142553818, + 629.999938027585 + ], + "size": [ + 400, + 394.97395833333337 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "prompt", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + } + ], + "outputs": [ + { + "name": "VIDEO", + "type": "VIDEO", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "-1", + "text" + ], + [ + "-1", + "length" + ], + [ + "-1", + "width" + ], + [ + "-1", + "height" + ], + [ + "81", + "noise_seed" + ], + [ + "81", + "control_after_generate" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.11.0" + }, + "widgets_values": [ + "", + 81, + 640, + 640 + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "59b2f9c7-af11-45c8-a22b-871166f816c0", + "version": 1, + "state": { + "lastGroupId": 15, + "lastNodeId": 114, + "lastLinkId": 196, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Text to Video (Wan 2.2)", + "inputNode": { + "id": -10, + "bounding": [ + -99.66668418897854, + 621.3333300391974, + 120, + 120 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1661.9927561248032, + 500.2133490758798, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "3a15ef44-456f-4a3a-ade7-7a0840166830", + "name": "text", + "type": "STRING", + "linkIds": [ + 189 + ], + "label": "prompt", + "pos": [ + 0.333315811021464, + 641.3333300391974 + ] + }, + { + "id": "ec76f1bf-b130-4dc9-a50c-0b10002725d6", + "name": "length", + "type": "INT", + "linkIds": [ + 190 + ], + "pos": [ + 0.333315811021464, + 661.3333300391974 + ] + }, + { + "id": "1abb6b00-a8b4-4e72-9d87-53f1fc5d281e", + "name": "width", + "type": "INT", + "linkIds": [ + 191 + ], + "pos": [ + 0.333315811021464, + 681.3333300391974 + ] + }, + { + "id": "0af36ab5-ee95-4ce5-9ad9-26436319a0d2", + "name": "height", + "type": "INT", + "linkIds": [ + 192 + ], + "pos": [ + 0.333315811021464, + 701.3333300391974 + ] + } + ], + "outputs": [ + { + "id": "6bdfda51-5568-48bf-8985-dbad1e11b3d8", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 196 + ], + "pos": [ + 1681.9927561248032, + 520.2133490758798 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 71, + "type": "CLIPLoader", + "pos": [ + 50.33329119280961, + 51.33334121884377 + ], + "size": [ + 346.38020833333337, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 141, + 160 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "directory": "text_encoders" + } + ] + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 73, + "type": "VAELoader", + "pos": [ + 50.33329119280961, + 211.33336855035554 + ], + "size": [ + 344.7135416666667, + 50 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 158 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "wan_2.1_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", + "directory": "vae" + } + ] + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 76, + "type": "UNETLoader", + "pos": [ + 50.33329119280961, + -78.66666636275716 + ], + "size": [ + 346.7447916666667, + 74 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 155 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 75, + "type": "UNETLoader", + "pos": [ + 50.33329119280961, + -208.66667394435814 + ], + "size": [ + 346.7447916666667, + 74 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 153 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "directory": "diffusion_models" + } + ] + }, + "widgets_values": [ + "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ] + }, + { + "id": 83, + "type": "LoraLoaderModelOnly", + "pos": [ + 450.3332425195698, + -198.66662836038148 + ], + "size": [ + 279.9869791666667, + 74 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 153 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 152 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.49", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", + 1.0000000000000002 + ] + }, + { + "id": 85, + "type": "LoraLoaderModelOnly", + "pos": [ + 450.3332425195698, + -58.66669219682302 + ], + "size": [ + 279.9869791666667, + 74 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 155 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 156 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.49", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", + "directory": "loras" + } + ] + }, + "widgets_values": [ + "wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", + 1.0000000000000002 + ] + }, + { + "id": 86, + "type": "ModelSamplingSD3", + "pos": [ + 740.3332774326827, + -58.66669219682302 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": false + }, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 156 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 183 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 5.000000000000001 + ] + }, + { + "id": 82, + "type": "ModelSamplingSD3", + "pos": [ + 740.3332774326827, + -198.66662836038148 + ], + "size": [ + 210, + 50 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 152 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 181 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 5.000000000000001 + ] + }, + { + "id": 81, + "type": "KSamplerAdvanced", + "pos": [ + 990.3333640139272, + -248.66668077723608 + ], + "size": [ + 300, + 440.98958333333337 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 149 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 150 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 151 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "COMBO", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "start_at_step", + "name": "start_at_step", + "type": "INT", + "widget": { + "name": "start_at_step" + }, + "link": null + }, + { + "localized_name": "end_at_step", + "name": "end_at_step", + "type": "INT", + "widget": { + "name": "end_at_step" + }, + "link": null + }, + { + "localized_name": "return_with_leftover_noise", + "name": "return_with_leftover_noise", + "type": "COMBO", + "widget": { + "name": "return_with_leftover_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 145 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 0, + "randomize", + 4, + 1, + "euler", + "simple", + 0, + 2, + "enable" + ] + }, + { + "id": 74, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 70.33326535874369, + 381.33332446382485 + ], + "size": [ + 314.9869791666667, + 122 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 191 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 192 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 190 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 151 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 640, + 640, + 81, + 1 + ] + }, + { + "id": 78, + "type": "KSamplerAdvanced", + "pos": [ + 1310.3334186769505, + -248.66668077723608 + ], + "size": [ + 304.73958333333337, + 440.98958333333337 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 183 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 143 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 144 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 145 + }, + { + "localized_name": "add_noise", + "name": "add_noise", + "type": "COMBO", + "widget": { + "name": "add_noise" + }, + "link": null + }, + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { + "name": "noise_seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "start_at_step", + "name": "start_at_step", + "type": "INT", + "widget": { + "name": "start_at_step" + }, + "link": null + }, + { + "localized_name": "end_at_step", + "name": "end_at_step", + "type": "INT", + "widget": { + "name": "end_at_step" + }, + "link": null + }, + { + "localized_name": "return_with_leftover_noise", + "name": "return_with_leftover_noise", + "type": "COMBO", + "widget": { + "name": "return_with_leftover_noise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 157 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 4, + 1, + "euler", + "simple", + 2, + 4, + "disable" + ] + }, + { + "id": 114, + "type": "CreateVideo", + "pos": [ + 1320.333347258908, + 441.33336396364655 + ], + "size": [ + 269.9869791666667, + 70 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 195 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 196 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.11.0", + "Node name for S&R": "CreateVideo" + }, + "widgets_values": [ + 16 + ] + }, + { + "id": 112, + "type": "Note", + "pos": [ + 30.33320002485607, + -428.6666237736725 + ], + "size": [ + 359.9869791666667, + 52 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "About 4 Steps LoRA", + "properties": {}, + "widgets_values": [ + "Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed." + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 62, + "type": "MarkdownNote", + "pos": [ + -489.666771800538, + -278.666700527147 + ], + "size": [ + 479.9869791666667, + 542.1354166666667 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Model Links", + "properties": {}, + "widgets_values": [ + "[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n) | [教程](https://docs.comfy.org/zh-CN/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model** \n- [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors)\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├───wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors\n│ │ └───wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n" + ], + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 87, + "type": "VAEDecode", + "pos": [ + 1020.3331497597994, + 471.3333837135574 + ], + "size": [ + 210, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 157 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 158 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 195 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 72, + "type": "CLIPTextEncode", + "pos": [ + 440.3333139376125, + 331.3333305479798 + ], + "size": [ + 500, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 141 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 144, + 150 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,裸露,NSFW" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 89, + "type": "CLIPTextEncode", + "pos": [ + 440.3333139376125, + 131.33323788258042 + ], + "size": [ + 510, + 170 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 160 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 189 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 143, + 149 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.45", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "groups": [ + { + "id": 13, + "title": "Wan2.2 T2V fp8_scaled + 4 steps LoRA", + "bounding": [ + 31.999982477688036, + -317.00000329413615, + 1610, + 880 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 6, + "title": "Step3 Prompt", + "bounding": [ + 431.99998247768815, + 57.99999670586385, + 530, + 460 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 7, + "title": "Lightx2v 4steps LoRA", + "bounding": [ + 431.99998247768815, + -275.33333662746946, + 530, + 320 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 11, + "title": "Step 1 - Load models", + "bounding": [ + 40.33331581102152, + -275.33333662746946, + 366.7470703125, + 563.5814208984375 + ], + "color": "#444", + "font_size": 24, + "flags": {} + }, + { + "id": 12, + "title": "Step 2 - Video size", + "bounding": [ + 40.33331581102152, + 299.6666633725306, + 370, + 230 + ], + "color": "#444", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 153, + "origin_id": 75, + "origin_slot": 0, + "target_id": 83, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 155, + "origin_id": 76, + "origin_slot": 0, + "target_id": 85, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 156, + "origin_id": 85, + "origin_slot": 0, + "target_id": 86, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 152, + "origin_id": 83, + "origin_slot": 0, + "target_id": 82, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 160, + "origin_id": 71, + "origin_slot": 0, + "target_id": 89, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 181, + "origin_id": 82, + "origin_slot": 0, + "target_id": 81, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 149, + "origin_id": 89, + "origin_slot": 0, + "target_id": 81, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 150, + "origin_id": 72, + "origin_slot": 0, + "target_id": 81, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 151, + "origin_id": 74, + "origin_slot": 0, + "target_id": 81, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 157, + "origin_id": 78, + "origin_slot": 0, + "target_id": 87, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 158, + "origin_id": 73, + "origin_slot": 0, + "target_id": 87, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 141, + "origin_id": 71, + "origin_slot": 0, + "target_id": 72, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 183, + "origin_id": 86, + "origin_slot": 0, + "target_id": 78, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 143, + "origin_id": 89, + "origin_slot": 0, + "target_id": 78, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 144, + "origin_id": 72, + "origin_slot": 0, + "target_id": 78, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 145, + "origin_id": 81, + "origin_slot": 0, + "target_id": 78, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 189, + "origin_id": -10, + "origin_slot": 0, + "target_id": 89, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 190, + "origin_id": -10, + "origin_slot": 1, + "target_id": 74, + "target_slot": 2, + "type": "INT" + }, + { + "id": 191, + "origin_id": -10, + "origin_slot": 2, + "target_id": 74, + "target_slot": 0, + "type": "INT" + }, + { + "id": 192, + "origin_id": -10, + "origin_slot": 3, + "target_id": 74, + "target_slot": 1, + "type": "INT" + }, + { + "id": 195, + "origin_id": 87, + "origin_slot": 0, + "target_id": 114, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 196, + "origin_id": 114, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Text to video", + "description": "Generates video from text prompts using Wan2.2, Alibaba's diffusion video model." + } + ] + }, + "config": {}, + "extra": { + "frontendVersion": "1.37.10", + "workflowRendererVersion": "LG", + "VHS_latentpreview": false, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "version": 0.4 +} \ No newline at end of file diff --git a/blueprints/Unsharp Mask.json b/blueprints/Unsharp Mask.json index 9363037ef..79a4c954f 100644 --- a/blueprints/Unsharp Mask.json +++ b/blueprints/Unsharp Mask.json @@ -1 +1,442 @@ -{"revision": 0, "last_node_id": 30, "last_link_id": 0, "nodes": [{"id": 30, "type": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", "pos": [4420, -370], "size": [210, 106], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Unsharp Mask", "properties": {"proxyWidgets": [["27", "value"], ["28", "value"], ["29", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 29, "lastLinkId": 43, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Unsharp Mask", "inputNode": {"id": -10, "bounding": [3920, -405, 120, 60]}, "outputNode": {"id": -20, "bounding": [4930, -405, 120, 60]}, "inputs": [{"id": "75354555-d2f3-46b9-a3dd-b076dcfca561", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image0", "pos": [4020, -385]}], "outputs": [{"id": "04368b94-2a96-46ff-8c07-d0ce3235b40d", "name": "IMAGE0", "type": "IMAGE", "linkIds": [40], "localized_name": "IMAGE0", "pos": [4950, -385]}], "widgets": [], "nodes": [{"id": 27, "type": "PrimitiveFloat", "pos": [4100, -540], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [41]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [1]}, {"id": 28, "type": "PrimitiveFloat", "pos": [4100, -430], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [42]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 10, "precision": 1, "step": 0.5}, "widgets_values": [3]}, {"id": 29, "type": "PrimitiveFloat", "pos": [4100, -320], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [43]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}, {"id": 26, "type": "GLSLShader", "pos": [4470, -580], "size": [400, 232], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 41}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 42}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 43}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [40]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // amount [0.0 - 3.0] typical: 0.5-1.5\nuniform float u_float1; // radius [0.5 - 10.0] blur radius in pixels\nuniform float u_float2; // threshold [0.0 - 0.1] min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n float radius = max(u_float1, 0.5);\n float amount = u_float0;\n float threshold = u_float2;\n\n vec4 original = texture(u_image0, v_texCoord);\n\n // Gaussian blur for the \"unsharp\" mask\n int samples = int(ceil(radius));\n float sigma = radius / 2.0;\n\n vec4 blurred = vec4(0.0);\n float totalWeight = 0.0;\n\n for (int x = -samples; x <= samples; x++) {\n for (int y = -samples; y <= samples; y++) {\n vec2 offset = vec2(float(x), float(y)) * texel;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float dist = length(vec2(float(x), float(y)));\n float weight = gaussian(dist, sigma);\n blurred += sample_color * weight;\n totalWeight += weight;\n }\n }\n blurred /= totalWeight;\n\n // Unsharp mask = original - blurred\n vec3 mask = original.rgb - blurred.rgb;\n\n // Luminance-based threshold with smooth falloff\n float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n mask *= thresholdScale;\n\n // Sharpen: original + mask * amount\n vec3 sharpened = original.rgb + mask * amount;\n\n fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 41, "origin_id": 27, "origin_slot": 0, "target_id": 26, "target_slot": 2, "type": "FLOAT"}, {"id": 42, "origin_id": 28, "origin_slot": 0, "target_id": 26, "target_slot": 3, "type": "FLOAT"}, {"id": 43, "origin_id": 29, "origin_slot": 0, "target_id": 26, "target_slot": 4, "type": "FLOAT"}, {"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 26, "target_slot": 0, "type": "IMAGE"}, {"id": 40, "origin_id": 26, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}} +{ + "revision": 0, + "last_node_id": 30, + "last_link_id": 0, + "nodes": [ + { + "id": 30, + "type": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", + "pos": [ + 4420, + -370 + ], + "size": [ + 210, + 106 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [] + } + ], + "title": "Unsharp Mask", + "properties": { + "proxyWidgets": [ + [ + "27", + "value" + ], + [ + "28", + "value" + ], + [ + "29", + "value" + ] + ] + }, + "widgets_values": [] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 29, + "lastLinkId": 43, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Unsharp Mask", + "inputNode": { + "id": -10, + "bounding": [ + 3920, + -405, + 120, + 60 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 4930, + -405, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "75354555-d2f3-46b9-a3dd-b076dcfca561", + "name": "images.image0", + "type": "IMAGE", + "linkIds": [ + 39 + ], + "localized_name": "images.image0", + "label": "image0", + "pos": [ + 4020, + -385 + ] + } + ], + "outputs": [ + { + "id": "04368b94-2a96-46ff-8c07-d0ce3235b40d", + "name": "IMAGE0", + "type": "IMAGE", + "linkIds": [ + 40 + ], + "localized_name": "IMAGE0", + "pos": [ + 4950, + -385 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 27, + "type": "PrimitiveFloat", + "pos": [ + 4100, + -540 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "amount", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 41 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 3, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 28, + "type": "PrimitiveFloat", + "pos": [ + 4100, + -430 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "label": "radius", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 42 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 10, + "precision": 1, + "step": 0.5 + }, + "widgets_values": [ + 3 + ] + }, + { + "id": 29, + "type": "PrimitiveFloat", + "pos": [ + 4100, + -320 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "threshold", + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { + "name": "value" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 43 + ] + } + ], + "properties": { + "Node name for S&R": "PrimitiveFloat", + "min": 0, + "max": 1, + "precision": 2, + "step": 0.05 + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 26, + "type": "GLSLShader", + "pos": [ + 4470, + -580 + ], + "size": [ + 400, + 232 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "label": "image0", + "localized_name": "images.image0", + "name": "images.image0", + "type": "IMAGE", + "link": 39 + }, + { + "label": "image1", + "localized_name": "images.image1", + "name": "images.image1", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "label": "u_float0", + "localized_name": "floats.u_float0", + "name": "floats.u_float0", + "shape": 7, + "type": "FLOAT", + "link": 41 + }, + { + "label": "u_float1", + "localized_name": "floats.u_float1", + "name": "floats.u_float1", + "shape": 7, + "type": "FLOAT", + "link": 42 + }, + { + "label": "u_float2", + "localized_name": "floats.u_float2", + "name": "floats.u_float2", + "shape": 7, + "type": "FLOAT", + "link": 43 + }, + { + "label": "u_float3", + "localized_name": "floats.u_float3", + "name": "floats.u_float3", + "shape": 7, + "type": "FLOAT", + "link": null + }, + { + "label": "u_int0", + "localized_name": "ints.u_int0", + "name": "ints.u_int0", + "shape": 7, + "type": "INT", + "link": null + }, + { + "localized_name": "fragment_shader", + "name": "fragment_shader", + "type": "STRING", + "widget": { + "name": "fragment_shader" + }, + "link": null + }, + { + "localized_name": "size_mode", + "name": "size_mode", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "size_mode" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE0", + "name": "IMAGE0", + "type": "IMAGE", + "links": [ + 40 + ] + }, + { + "localized_name": "IMAGE1", + "name": "IMAGE1", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE2", + "name": "IMAGE2", + "type": "IMAGE", + "links": null + }, + { + "localized_name": "IMAGE3", + "name": "IMAGE3", + "type": "IMAGE", + "links": null + } + ], + "properties": { + "Node name for S&R": "GLSLShader" + }, + "widgets_values": [ + "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // amount [0.0 - 3.0] typical: 0.5-1.5\nuniform float u_float1; // radius [0.5 - 10.0] blur radius in pixels\nuniform float u_float2; // threshold [0.0 - 0.1] min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n float radius = max(u_float1, 0.5);\n float amount = u_float0;\n float threshold = u_float2;\n\n vec4 original = texture(u_image0, v_texCoord);\n\n // Gaussian blur for the \"unsharp\" mask\n int samples = int(ceil(radius));\n float sigma = radius / 2.0;\n\n vec4 blurred = vec4(0.0);\n float totalWeight = 0.0;\n\n for (int x = -samples; x <= samples; x++) {\n for (int y = -samples; y <= samples; y++) {\n vec2 offset = vec2(float(x), float(y)) * texel;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float dist = length(vec2(float(x), float(y)));\n float weight = gaussian(dist, sigma);\n blurred += sample_color * weight;\n totalWeight += weight;\n }\n }\n blurred /= totalWeight;\n\n // Unsharp mask = original - blurred\n vec3 mask = original.rgb - blurred.rgb;\n\n // Luminance-based threshold with smooth falloff\n float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n mask *= thresholdScale;\n\n // Sharpen: original + mask * amount\n vec3 sharpened = original.rgb + mask * amount;\n\n fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n", + "from_input" + ] + } + ], + "groups": [], + "links": [ + { + "id": 41, + "origin_id": 27, + "origin_slot": 0, + "target_id": 26, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 42, + "origin_id": 28, + "origin_slot": 0, + "target_id": 26, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 43, + "origin_id": 29, + "origin_slot": 0, + "target_id": 26, + "target_slot": 4, + "type": "FLOAT" + }, + { + "id": 39, + "origin_id": -10, + "origin_slot": 0, + "target_id": 26, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 40, + "origin_id": 26, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Image Tools/Sharpen", + "description": "Enhances edge contrast via unsharp masking for a sharper image appearance." + } + ] + } +} \ No newline at end of file diff --git a/blueprints/Video Captioning (Gemini).json b/blueprints/Video Captioning (Gemini).json index 1d72718a1..7642b23c1 100644 --- a/blueprints/Video Captioning (Gemini).json +++ b/blueprints/Video Captioning (Gemini).json @@ -1 +1,315 @@ -{"revision": 0, "last_node_id": 233, "last_link_id": 0, "nodes": [{"id": 233, "type": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", "pos": [0, 1140], "size": [400, 260], "flags": {}, "order": 7, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"name": "video", "type": "VIDEO", "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": []}], "title": "Video Captioning(Gemini)", "properties": {"proxyWidgets": [["-1", "prompt"], ["-1", "model"], ["1", "seed"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["Describe this video", "gemini-2.5-pro"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 16, "lastLinkId": 17, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Captioning(Gemini)", "inputNode": {"id": -10, "bounding": [-6870, 2530, 120, 100]}, "outputNode": {"id": -20, "bounding": [-6240, 2530, 120, 60]}, "inputs": [{"id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", "name": "prompt", "type": "STRING", "linkIds": [15], "pos": [-6770, 2550]}, {"id": "b034e26a-d114-4604-aec2-32783e86aa6b", "name": "model", "type": "COMBO", "linkIds": [16], "pos": [-6770, 2570]}, {"id": "f7363f60-a106-4e06-90af-df5f53355b98", "name": "video", "type": "VIDEO", "linkIds": [17], "pos": [-6770, 2590]}], "outputs": [{"id": "e12c6e80-5210-4328-a581-bc8924c53070", "name": "STRING", "type": "STRING", "linkIds": [6], "localized_name": "STRING", "pos": [-6220, 2550]}], "widgets": [], "nodes": [{"id": 1, "type": "GeminiNode", "pos": [-6690, 2360], "size": [390, 430], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": null}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": 17}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 15}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": 16}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["Describe this video", "gemini-2.5-pro", 511865409297955, "randomize", "- Role: AI Video Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze videos (including frame sequences, dynamic movements, audio-visual elements) and generate detailed, structured descriptions. These descriptions must be directly usable as video generation prompts to create similar videos, serving core tasks such as video content creation, creative inspiration extraction, and artistic style exploration.\n- Profile: As an AI Video Analysis and Description Specialist, you possess expertise in computer vision, video temporal sequence processing, motion analysis, and multi-modal natural language generation. You excel at interpreting dynamic visual data (frame-by-frame features + continuous motion) and translating it into precise descriptive text that fully guides the creation of new videos with matching style, rhythm, and content.\n- Skills: Proficiency in video frame feature extraction, motion trajectory recognition, temporal rhythm analysis, scene/shot segmentation, color grading detection, camera movement identification (pan/tilt/zoom/dolly), audio-visual element correlation analysis, and descriptive language generation that captures both static visual features and dynamic temporal characteristics. Mastery of artistic elements in video: composition (per frame + dynamic framing), color palette (consistent + transitional), texture (surface details + motion blur), pacing (frame rate, shot duration), and sound style (background music, ambient sound cues).\n- Goals: To analyze the provided video comprehensively, generate a detailed, structured description that captures all key video elements (static visual features + dynamic motion/temporal characteristics + audio-visual style), and ensure this description can directly serve as a high-quality prompt for creating similar videos.\n- Constraints: \n 1. The description must be clear, structured, and specific enough to guide end-to-end video creation (including frame rate, shot duration, camera movement, motion speed, color transitions).\n 2. Avoid ambiguity; focus on the most salient static (per-frame) and dynamic (temporal) features of the video.\n 3. Prioritize video-specific elements: motion trajectory, shot types (close-up/wide shot/etc.), camera movement, frame rate, scene transitions, rhythm/pacing, and temporal color changes.\n 4. The output must only contain the video generation prompt (no extra explanations).\n- OutputFormat: A detailed, hierarchical text description of the video, structured as follows:\n 1. Core Content & Narrative: Brief overview of the video's subject and temporal progression\n 2. Visual Style (Static): Per-frame key elements (objects, colors, composition, lighting, texture)\n 3. Dynamic Elements (Temporal): Motion details (speed, trajectory, direction), camera movement (type, speed, direction), shot duration/frame rate, scene transitions\n 4. Audio-Visual Style: Color grading (consistent/transitional), rhythm/pacing, and implied audio style (if discernible)\n- Workflow:\n 1. Analyze the video to segment shots/scenes, identify frame-by-frame static visual elements (objects, colors, composition) and cross-frame dynamic elements (motion, camera movement, temporal changes).\n 2. Extract video-specific technical features: frame rate, shot duration, scene transition types, motion speed/rhythm.\n 3. Generate a structured, detailed description that captures the essence of the video (static + dynamic + temporal characteristics), ensuring specificity and actionability for video generation.\n 4. Refine the description for clarity, conciseness, and alignment with video generation prompt norms (e.g., including frame rate, camera movement terms, motion speed descriptors)."], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 6, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "*"}, {"id": 15, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "STRING"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 5, "type": "COMBO"}, {"id": 17, "origin_id": -10, "origin_slot": 2, "target_id": 1, "target_slot": 2, "type": "VIDEO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Video Captioning"}]}} +{ + "revision": 0, + "last_node_id": 233, + "last_link_id": 0, + "nodes": [ + { + "id": 233, + "type": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", + "pos": [ + 0, + 1140 + ], + "size": [ + 400, + 260 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": null + }, + { + "name": "model", + "type": "COMBO", + "widget": { + "name": "model" + }, + "link": null + }, + { + "name": "video", + "type": "VIDEO", + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [] + } + ], + "title": "Video Captioning(Gemini)", + "properties": { + "proxyWidgets": [ + [ + "-1", + "prompt" + ], + [ + "-1", + "model" + ], + [ + "1", + "seed" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + "Describe this video", + "gemini-2.5-pro" + ] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", + "version": 1, + "state": { + "lastGroupId": 1, + "lastNodeId": 16, + "lastLinkId": 17, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Captioning(Gemini)", + "inputNode": { + "id": -10, + "bounding": [ + -6870, + 2530, + 120, + 100 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -6240, + 2530, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", + "name": "prompt", + "type": "STRING", + "linkIds": [ + 15 + ], + "pos": [ + -6770, + 2550 + ] + }, + { + "id": "b034e26a-d114-4604-aec2-32783e86aa6b", + "name": "model", + "type": "COMBO", + "linkIds": [ + 16 + ], + "pos": [ + -6770, + 2570 + ] + }, + { + "id": "f7363f60-a106-4e06-90af-df5f53355b98", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 17 + ], + "pos": [ + -6770, + 2590 + ] + } + ], + "outputs": [ + { + "id": "e12c6e80-5210-4328-a581-bc8924c53070", + "name": "STRING", + "type": "STRING", + "linkIds": [ + 6 + ], + "localized_name": "STRING", + "pos": [ + -6220, + 2550 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 1, + "type": "GeminiNode", + "pos": [ + -6690, + 2360 + ], + "size": [ + 390, + 430 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "localized_name": "video", + "name": "video", + "shape": 7, + "type": "VIDEO", + "link": 17 + }, + { + "localized_name": "files", + "name": "files", + "shape": 7, + "type": "GEMINI_INPUT_FILES", + "link": null + }, + { + "localized_name": "prompt", + "name": "prompt", + "type": "STRING", + "widget": { + "name": "prompt" + }, + "link": 15 + }, + { + "localized_name": "model", + "name": "model", + "type": "COMBO", + "widget": { + "name": "model" + }, + "link": 16 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "system_prompt", + "name": "system_prompt", + "shape": 7, + "type": "STRING", + "widget": { + "name": "system_prompt" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "STRING", + "name": "STRING", + "type": "STRING", + "links": [ + 6 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.5.1", + "Node name for S&R": "GeminiNode" + }, + "widgets_values": [ + "Describe this video", + "gemini-2.5-pro", + 511865409297955, + "randomize", + "- Role: AI Video Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze videos (including frame sequences, dynamic movements, audio-visual elements) and generate detailed, structured descriptions. These descriptions must be directly usable as video generation prompts to create similar videos, serving core tasks such as video content creation, creative inspiration extraction, and artistic style exploration.\n- Profile: As an AI Video Analysis and Description Specialist, you possess expertise in computer vision, video temporal sequence processing, motion analysis, and multi-modal natural language generation. You excel at interpreting dynamic visual data (frame-by-frame features + continuous motion) and translating it into precise descriptive text that fully guides the creation of new videos with matching style, rhythm, and content.\n- Skills: Proficiency in video frame feature extraction, motion trajectory recognition, temporal rhythm analysis, scene/shot segmentation, color grading detection, camera movement identification (pan/tilt/zoom/dolly), audio-visual element correlation analysis, and descriptive language generation that captures both static visual features and dynamic temporal characteristics. Mastery of artistic elements in video: composition (per frame + dynamic framing), color palette (consistent + transitional), texture (surface details + motion blur), pacing (frame rate, shot duration), and sound style (background music, ambient sound cues).\n- Goals: To analyze the provided video comprehensively, generate a detailed, structured description that captures all key video elements (static visual features + dynamic motion/temporal characteristics + audio-visual style), and ensure this description can directly serve as a high-quality prompt for creating similar videos.\n- Constraints: \n 1. The description must be clear, structured, and specific enough to guide end-to-end video creation (including frame rate, shot duration, camera movement, motion speed, color transitions).\n 2. Avoid ambiguity; focus on the most salient static (per-frame) and dynamic (temporal) features of the video.\n 3. Prioritize video-specific elements: motion trajectory, shot types (close-up/wide shot/etc.), camera movement, frame rate, scene transitions, rhythm/pacing, and temporal color changes.\n 4. The output must only contain the video generation prompt (no extra explanations).\n- OutputFormat: A detailed, hierarchical text description of the video, structured as follows:\n 1. Core Content & Narrative: Brief overview of the video's subject and temporal progression\n 2. Visual Style (Static): Per-frame key elements (objects, colors, composition, lighting, texture)\n 3. Dynamic Elements (Temporal): Motion details (speed, trajectory, direction), camera movement (type, speed, direction), shot duration/frame rate, scene transitions\n 4. Audio-Visual Style: Color grading (consistent/transitional), rhythm/pacing, and implied audio style (if discernible)\n- Workflow:\n 1. Analyze the video to segment shots/scenes, identify frame-by-frame static visual elements (objects, colors, composition) and cross-frame dynamic elements (motion, camera movement, temporal changes).\n 2. Extract video-specific technical features: frame rate, shot duration, scene transition types, motion speed/rhythm.\n 3. Generate a structured, detailed description that captures the essence of the video (static + dynamic + temporal characteristics), ensuring specificity and actionability for video generation.\n 4. Refine the description for clarity, conciseness, and alignment with video generation prompt norms (e.g., including frame rate, camera movement terms, motion speed descriptors)." + ], + "color": "#432", + "bgcolor": "#653" + } + ], + "groups": [], + "links": [ + { + "id": 6, + "origin_id": 1, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "*" + }, + { + "id": 15, + "origin_id": -10, + "origin_slot": 0, + "target_id": 1, + "target_slot": 4, + "type": "STRING" + }, + { + "id": 16, + "origin_id": -10, + "origin_slot": 1, + "target_id": 1, + "target_slot": 5, + "type": "COMBO" + }, + { + "id": 17, + "origin_id": -10, + "origin_slot": 2, + "target_id": 1, + "target_slot": 2, + "type": "VIDEO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Text generation/Video Captioning", + "description": "Generates descriptive captions for video input using Google's Gemini multimodal LLM." + } + ] + } +} diff --git a/blueprints/Video Inpaint(Wan2.1 VACE).json b/blueprints/Video Inpaint(Wan2.1 VACE).json index a7c6db003..a658be5f8 100644 --- a/blueprints/Video Inpaint(Wan2.1 VACE).json +++ b/blueprints/Video Inpaint(Wan2.1 VACE).json @@ -1 +1,2388 @@ -{"id": "2f429c60-2e03-4117-908b-31e1fab04bba", "revision": 0, "last_node_id": 229, "last_link_id": 366, "nodes": [{"id": 229, "type": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", "pos": [-230, 160], "size": [400, 480], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "video mask", "localized_name": "mask", "name": "mask", "type": "MASK", "link": null}, {"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"label": "reference image", "name": "reference_image_1", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["6", "text"], ["-1", "width"], ["-1", "height"], ["3", "seed"], ["3", "control_after_generate"], ["-1", "unet_name"], ["-1", "lora_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": [null, 720, 720, null, null, "wan2.1_vace_14B_fp16.safetensors", "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan_2.1_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", "version": 1, "state": {"lastGroupId": 25, "lastNodeId": 229, "lastLinkId": 366, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Video Inpaint(Wan2.1 VACE)", "inputNode": {"id": -10, "bounding": [-970, 800, 132.54296875, 220]}, "outputNode": {"id": -20, "bounding": [1480, 535, 120, 60]}, "inputs": [{"id": "9fdda38d-6aa7-48ad-b425-f493d8aa585c", "name": "mask", "type": "MASK", "linkIds": [351, 335, 345], "localized_name": "mask", "label": "video mask", "pos": [-857.45703125, 820]}, {"id": "8b1788cc-46d2-4f40-8b33-70fd56b4cb24", "name": "video", "type": "VIDEO", "linkIds": [336], "localized_name": "video", "pos": [-857.45703125, 840]}, {"id": "09393f21-257e-4476-bb02-54899a8252b8", "name": "width", "type": "INT", "linkIds": [355], "pos": [-857.45703125, 860]}, {"id": "07a030f7-7eac-4b3f-b8f3-f00ee87b191d", "name": "height", "type": "INT", "linkIds": [356], "pos": [-857.45703125, 880]}, {"id": "255908d3-6cc9-48fc-b76b-ab9fb72695bc", "name": "reference_image_1", "type": "IMAGE", "linkIds": [361], "label": "reference image", "pos": [-857.45703125, 900]}, {"id": "18a5d241-523c-433d-ae05-25b6e69d1e29", "name": "unet_name", "type": "COMBO", "linkIds": [363], "pos": [-857.45703125, 920]}, {"id": "d7576e1b-da5f-402f-81b2-d37f838b1f8f", "name": "lora_name", "type": "COMBO", "linkIds": [364], "pos": [-857.45703125, 940]}, {"id": "41676a3e-c710-4723-821e-f651ad3784b1", "name": "clip_name", "type": "COMBO", "linkIds": [365], "pos": [-857.45703125, 960]}, {"id": "41fc878c-9aa6-4c12-bef3-ceda6b094b7c", "name": "vae_name", "type": "COMBO", "linkIds": [366], "pos": [-857.45703125, 980]}], "outputs": [{"id": "d4861f39-1011-49dc-80fd-ee318b614a8d", "name": "VIDEO", "type": "VIDEO", "linkIds": [129], "localized_name": "VIDEO", "pos": [1500, 555]}], "widgets": [], "nodes": [{"id": 58, "type": "TrimVideoLatent", "pos": [760, 390], "size": [315, 60], "flags": {"collapsed": false}, "order": 13, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 116}, {"localized_name": "trim_amount", "name": "trim_amount", "type": "INT", "widget": {"name": "trim_amount"}, "link": 115}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [117]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "TrimVideoLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"trim_amount": true}}, "widgets_values": [0]}, {"id": 8, "type": "VAEDecode", "pos": [770, 500], "size": [315, 46], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 117}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [139]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 48, "type": "ModelSamplingSD3", "pos": [400, 50], "size": [315, 58], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 279}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ModelSamplingSD3", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [5]}, {"id": 219, "type": "InvertMask", "pos": [400, 990], "size": [140, 26], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 351}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "InvertMask"}, "widgets_values": []}, {"id": 216, "type": "MaskToImage", "pos": [560, 990], "size": [193.2779296875, 26], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 352}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [334]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 213, "type": "RebatchImages", "pos": [410, 690], "size": [230, 60], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 360}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": 340}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "shape": 6, "type": "IMAGE", "links": [333]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "RebatchImages"}, "widgets_values": [1]}, {"id": 68, "type": "CreateVideo", "pos": [1150, 50], "size": [270, 78], "flags": {"collapsed": false}, "order": 14, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 139}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 362}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 353}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [129]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [16]}, {"id": 208, "type": "ImageCompositeMasked", "pos": [410, 790], "size": [230, 146], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "destination", "name": "destination", "type": "IMAGE", "link": 333}, {"localized_name": "source", "name": "source", "type": "IMAGE", "link": 334}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": 335}, {"localized_name": "x", "name": "x", "type": "INT", "widget": {"name": "x"}, "link": null}, {"localized_name": "y", "name": "y", "type": "INT", "widget": {"name": "y"}, "link": null}, {"localized_name": "resize_source", "name": "resize_source", "type": "BOOLEAN", "widget": {"name": "resize_source"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [341, 344]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "ImageCompositeMasked"}, "widgets_values": [0, 0, true]}, {"id": 214, "type": "PreviewImage", "pos": [760, 690], "size": [300, 300], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 341}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 111, "type": "MaskToImage", "pos": [20, 1270], "size": [240, 26], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 345}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "MaskToImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 129, "type": "RepeatImageBatch", "pos": [20, 1160], "size": [240, 60], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 201}, {"localized_name": "amount", "name": "amount", "type": "INT", "widget": {"name": "amount"}, "link": 346}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [202]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "RepeatImageBatch", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"amount": true}}, "widgets_values": [17]}, {"id": 130, "type": "ImageToMask", "pos": [20, 1050], "size": [240, 60], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 202}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [349]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageToMask", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["red"]}, {"id": 3, "type": "KSampler", "pos": [770, 50], "size": [315, 262], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 98}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 99}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 160}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [116]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [584027519362099, "randomize", 4, 1, "uni_pc", "simple", 1]}, {"id": 224, "type": "MarkdownNote", "pos": [420, -160], "size": [310, 110], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [], "title": "About Video Size", "properties": {}, "widgets_values": ["| Model | 480P | 720P |\n| ------------------------------------------------------------ | ---- | ---- |\n| [VACE-1.3B](https://huggingface.co/Wan-AI/Wan2.1-VACE-1.3B) | ✅ | ❌ |\n| [VACE-14B](https://huggingface.co/Wan-AI/Wan2.1-VACE-14B) | ✅ | ✅ |"], "color": "#432", "bgcolor": "#000"}, {"id": 223, "type": "MarkdownNote", "pos": [770, -210], "size": [303.90106201171875, 158.5415802001953], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "title": "KSampler Setting", "properties": {}, "widgets_values": ["## Default\n\n- steps:20\n- cfg:6.0\n\n## For CausVid LoRA\n\n- steps: 2-4\n- cfg: 1.0\n\n"], "color": "#432", "bgcolor": "#000"}, {"id": 6, "type": "CLIPTextEncode", "pos": [-80, 60], "size": [420, 280], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [96]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 140, "type": "UNETLoader", "pos": [-505.8336486816406, 88.22794342041016], "size": [360, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 363}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [248]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.1_vace_14B_fp16.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["wan2.1_vace_14B_fp16.safetensors", "fp8_e4m3fn_fast"]}, {"id": 154, "type": "LoraLoaderModelOnly", "pos": [-505.8336486816406, 228.2279510498047], "size": [360, 85.11004638671875], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 248}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 364}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["Wan21_CausVid_14B_T2V_lora_rank32.safetensors", 0.30000000000000004]}, {"id": 38, "type": "CLIPLoader", "pos": [-499.14141845703125, 368.0911865234375], "size": [360, 106], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 365}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors?download=true", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 39, "type": "VAELoader", "pos": [-498.5298156738281, 517.2576293945312], "size": [360, 60], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 366}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 101]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 221, "type": "MarkdownNote", "pos": [380, 1090], "size": [480, 170], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "[EN] About video mask", "properties": {"widget_ue_connectable": {}}, "widgets_values": ["Currently, it's difficult to perfectly draw dynamic masks for different frames using only core nodes. However, to avoid requiring users to install additional custom nodes, our templates only use core nodes. You can refer to this implementation idea to achieve video inpainting.\n\nYou can use KJNode’s Points Editor and Sam2Segmentation to create some dynamic mask functions.\n\nCustom node links:\n- [ComfyUI-KJNodes](https://github.com/kijai/ComfyUI-KJNodes)\n- [ComfyUI-segment-anything-2](https://github.com/kijai/ComfyUI-segment-anything-2)"], "color": "#432", "bgcolor": "#000"}, {"id": 7, "type": "CLIPTextEncode", "pos": [-80, 390], "size": [425.27801513671875, 180.6060791015625], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [97]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,"], "color": "#223", "bgcolor": "#335"}, {"id": 229, "type": "ImageFromBatch", "pos": [-510, 800], "size": [270, 82], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 358}, {"localized_name": "batch_index", "name": "batch_index", "type": "INT", "widget": {"name": "batch_index"}, "link": null}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [359, 360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageFromBatch"}, "widgets_values": [0, 81]}, {"id": 49, "type": "WanVaceToVideo", "pos": [400, 200], "size": [315, 254], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 96}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 97}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 101}, {"localized_name": "control_video", "name": "control_video", "shape": 7, "type": "IMAGE", "link": 344}, {"localized_name": "control_masks", "name": "control_masks", "shape": 7, "type": "MASK", "link": 349}, {"localized_name": "reference_image", "name": "reference_image", "shape": 7, "type": "IMAGE", "link": 361}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 355}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 356}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [98]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [99]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [160]}, {"localized_name": "trim_latent", "name": "trim_latent", "type": "INT", "links": [115]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "WanVaceToVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"width": true, "height": true, "length": true}}, "widgets_values": [720, 720, 81, 1, 1]}, {"id": 211, "type": "GetImageSize", "pos": [70, 800], "size": [190, 66], "flags": {"collapsed": false}, "order": 20, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 359}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": null}, {"localized_name": "height", "name": "height", "type": "INT", "links": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [340, 346]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "GetImageSize"}, "widgets_values": []}, {"id": 210, "type": "GetVideoComponents", "pos": [-510, 690], "size": [193.530859375, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 336}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [358]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [362]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [353]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "GetVideoComponents"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Step1 - Load models here", "bounding": [-540, -30, 430, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [-90, -30, 450, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Sampling & Decoding", "bounding": [380, -30, 720, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 10, "title": "Repeat Mask Batch", "bounding": [-90, 910, 450, 460], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 21, "title": "Get video info", "bounding": [-540, 610, 900, 290], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 22, "title": "Composite video & masks", "bounding": [380, 610, 720, 420], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 23, "title": "Step4 - Set video size & length", "bounding": [390, 130, 360, 340], "color": "#A88", "font_size": 24, "flags": {}}, {"id": 25, "title": "14B", "bounding": [-520, 10, 380, 308.7100524902344], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 116, "origin_id": 3, "origin_slot": 0, "target_id": 58, "target_slot": 0, "type": "LATENT"}, {"id": 115, "origin_id": 49, "origin_slot": 3, "target_id": 58, "target_slot": 1, "type": "INT"}, {"id": 117, "origin_id": 58, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 279, "origin_id": 154, "origin_slot": 0, "target_id": 48, "target_slot": 0, "type": "MODEL"}, {"id": 352, "origin_id": 219, "origin_slot": 0, "target_id": 216, "target_slot": 0, "type": "MASK"}, {"id": 340, "origin_id": 211, "origin_slot": 2, "target_id": 213, "target_slot": 1, "type": "INT"}, {"id": 96, "origin_id": 6, "origin_slot": 0, "target_id": 49, "target_slot": 0, "type": "CONDITIONING"}, {"id": 97, "origin_id": 7, "origin_slot": 0, "target_id": 49, "target_slot": 1, "type": "CONDITIONING"}, {"id": 101, "origin_id": 39, "origin_slot": 0, "target_id": 49, "target_slot": 2, "type": "VAE"}, {"id": 344, "origin_id": 208, "origin_slot": 0, "target_id": 49, "target_slot": 3, "type": "IMAGE"}, {"id": 349, "origin_id": 130, "origin_slot": 0, "target_id": 49, "target_slot": 4, "type": "MASK"}, {"id": 139, "origin_id": 8, "origin_slot": 0, "target_id": 68, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 210, "origin_slot": 2, "target_id": 68, "target_slot": 2, "type": "FLOAT"}, {"id": 333, "origin_id": 213, "origin_slot": 0, "target_id": 208, "target_slot": 0, "type": "IMAGE"}, {"id": 334, "origin_id": 216, "origin_slot": 0, "target_id": 208, "target_slot": 1, "type": "IMAGE"}, {"id": 341, "origin_id": 208, "origin_slot": 0, "target_id": 214, "target_slot": 0, "type": "IMAGE"}, {"id": 201, "origin_id": 111, "origin_slot": 0, "target_id": 129, "target_slot": 0, "type": "IMAGE"}, {"id": 346, "origin_id": 211, "origin_slot": 2, "target_id": 129, "target_slot": 1, "type": "INT"}, {"id": 202, "origin_id": 129, "origin_slot": 0, "target_id": 130, "target_slot": 0, "type": "IMAGE"}, {"id": 280, "origin_id": 48, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 98, "origin_id": 49, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 99, "origin_id": 49, "origin_slot": 1, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 160, "origin_id": 49, "origin_slot": 2, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 248, "origin_id": 140, "origin_slot": 0, "target_id": 154, "target_slot": 0, "type": "MODEL"}, {"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 351, "origin_id": -10, "origin_slot": 0, "target_id": 219, "target_slot": 0, "type": "MASK"}, {"id": 335, "origin_id": -10, "origin_slot": 0, "target_id": 208, "target_slot": 2, "type": "MASK"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "MASK"}, {"id": 336, "origin_id": -10, "origin_slot": 1, "target_id": 210, "target_slot": 0, "type": "VIDEO"}, {"id": 129, "origin_id": 68, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 355, "origin_id": -10, "origin_slot": 2, "target_id": 49, "target_slot": 6, "type": "INT"}, {"id": 356, "origin_id": -10, "origin_slot": 3, "target_id": 49, "target_slot": 7, "type": "INT"}, {"id": 358, "origin_id": 210, "origin_slot": 0, "target_id": 229, "target_slot": 0, "type": "IMAGE"}, {"id": 359, "origin_id": 229, "origin_slot": 0, "target_id": 211, "target_slot": 0, "type": "IMAGE"}, {"id": 360, "origin_id": 229, "origin_slot": 0, "target_id": 213, "target_slot": 0, "type": "IMAGE"}, {"id": 361, "origin_id": -10, "origin_slot": 4, "target_id": 49, "target_slot": 5, "type": "IMAGE"}, {"id": 362, "origin_id": 210, "origin_slot": 1, "target_id": 68, "target_slot": 1, "type": "AUDIO"}, {"id": 363, "origin_id": -10, "origin_slot": 5, "target_id": 140, "target_slot": 0, "type": "COMBO"}, {"id": 364, "origin_id": -10, "origin_slot": 6, "target_id": 154, "target_slot": 1, "type": "COMBO"}, {"id": 365, "origin_id": -10, "origin_slot": 7, "target_id": 38, "target_slot": 0, "type": "COMBO"}, {"id": 366, "origin_id": -10, "origin_slot": 8, "target_id": 39, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Inpaint video"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.8183828377358485, "offset": [1215.8643989712405, 178.87024992690183]}}, "version": 0.4} +{ + "id": "2f429c60-2e03-4117-908b-31e1fab04bba", + "revision": 0, + "last_node_id": 229, + "last_link_id": 366, + "nodes": [ + { + "id": 229, + "type": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", + "pos": [ + -230, + 160 + ], + "size": [ + 400, + 480 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "label": "video mask", + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": null + }, + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": null + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": null + }, + { + "label": "reference image", + "name": "reference_image_1", + "type": "IMAGE", + "link": null + }, + { + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": null + }, + { + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": null + }, + { + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": null + }, + { + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "6", + "text" + ], + [ + "-1", + "width" + ], + [ + "-1", + "height" + ], + [ + "3", + "seed" + ], + [ + "3", + "control_after_generate" + ], + [ + "-1", + "unet_name" + ], + [ + "-1", + "lora_name" + ], + [ + "-1", + "clip_name" + ], + [ + "-1", + "vae_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + null, + 720, + 720, + null, + null, + "wan2.1_vace_14B_fp16.safetensors", + "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan_2.1_vae.safetensors" + ] + } + ], + "links": [], + "groups": [], + "definitions": { + "subgraphs": [ + { + "id": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", + "version": 1, + "state": { + "lastGroupId": 25, + "lastNodeId": 229, + "lastLinkId": 366, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Inpaint (Wan 2.1 VACE)", + "inputNode": { + "id": -10, + "bounding": [ + -970, + 800, + 132.54296875, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1480, + 535, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "9fdda38d-6aa7-48ad-b425-f493d8aa585c", + "name": "mask", + "type": "MASK", + "linkIds": [ + 351, + 335, + 345 + ], + "localized_name": "mask", + "label": "video mask", + "pos": [ + -857.45703125, + 820 + ] + }, + { + "id": "8b1788cc-46d2-4f40-8b33-70fd56b4cb24", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 336 + ], + "localized_name": "video", + "pos": [ + -857.45703125, + 840 + ] + }, + { + "id": "09393f21-257e-4476-bb02-54899a8252b8", + "name": "width", + "type": "INT", + "linkIds": [ + 355 + ], + "pos": [ + -857.45703125, + 860 + ] + }, + { + "id": "07a030f7-7eac-4b3f-b8f3-f00ee87b191d", + "name": "height", + "type": "INT", + "linkIds": [ + 356 + ], + "pos": [ + -857.45703125, + 880 + ] + }, + { + "id": "255908d3-6cc9-48fc-b76b-ab9fb72695bc", + "name": "reference_image_1", + "type": "IMAGE", + "linkIds": [ + 361 + ], + "label": "reference image", + "pos": [ + -857.45703125, + 900 + ] + }, + { + "id": "18a5d241-523c-433d-ae05-25b6e69d1e29", + "name": "unet_name", + "type": "COMBO", + "linkIds": [ + 363 + ], + "pos": [ + -857.45703125, + 920 + ] + }, + { + "id": "d7576e1b-da5f-402f-81b2-d37f838b1f8f", + "name": "lora_name", + "type": "COMBO", + "linkIds": [ + 364 + ], + "pos": [ + -857.45703125, + 940 + ] + }, + { + "id": "41676a3e-c710-4723-821e-f651ad3784b1", + "name": "clip_name", + "type": "COMBO", + "linkIds": [ + 365 + ], + "pos": [ + -857.45703125, + 960 + ] + }, + { + "id": "41fc878c-9aa6-4c12-bef3-ceda6b094b7c", + "name": "vae_name", + "type": "COMBO", + "linkIds": [ + 366 + ], + "pos": [ + -857.45703125, + 980 + ] + } + ], + "outputs": [ + { + "id": "d4861f39-1011-49dc-80fd-ee318b614a8d", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 129 + ], + "localized_name": "VIDEO", + "pos": [ + 1500, + 555 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 58, + "type": "TrimVideoLatent", + "pos": [ + 760, + 390 + ], + "size": [ + 315, + 60 + ], + "flags": { + "collapsed": false + }, + "order": 13, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 116 + }, + { + "localized_name": "trim_amount", + "name": "trim_amount", + "type": "INT", + "widget": { + "name": "trim_amount" + }, + "link": 115 + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "links": [ + 117 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "TrimVideoLatent", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": { + "trim_amount": true + } + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 770, + 500 + ], + "size": [ + 315, + 46 + ], + "flags": { + "collapsed": false + }, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 117 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 139 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAEDecode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 400, + 50 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 279 + }, + { + "localized_name": "shift", + "name": "shift", + "type": "FLOAT", + "widget": { + "name": "shift" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 280 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "ModelSamplingSD3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 5 + ] + }, + { + "id": 219, + "type": "InvertMask", + "pos": [ + 400, + 990 + ], + "size": [ + 140, + 26 + ], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 351 + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 352 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "InvertMask" + }, + "widgets_values": [] + }, + { + "id": 216, + "type": "MaskToImage", + "pos": [ + 560, + 990 + ], + "size": [ + 193.2779296875, + 26 + ], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 352 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 334 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "MaskToImage" + }, + "widgets_values": [] + }, + { + "id": 213, + "type": "RebatchImages", + "pos": [ + 410, + 690 + ], + "size": [ + 230, + 60 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 360 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": 340 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "shape": 6, + "type": "IMAGE", + "links": [ + 333 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "RebatchImages" + }, + "widgets_values": [ + 1 + ] + }, + { + "id": 68, + "type": "CreateVideo", + "pos": [ + 1150, + 50 + ], + "size": [ + 270, + 78 + ], + "flags": { + "collapsed": false + }, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 139 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 362 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 353 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 129 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "CreateVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 16 + ] + }, + { + "id": 208, + "type": "ImageCompositeMasked", + "pos": [ + 410, + 790 + ], + "size": [ + 230, + 146 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "localized_name": "destination", + "name": "destination", + "type": "IMAGE", + "link": 333 + }, + { + "localized_name": "source", + "name": "source", + "type": "IMAGE", + "link": 334 + }, + { + "localized_name": "mask", + "name": "mask", + "shape": 7, + "type": "MASK", + "link": 335 + }, + { + "localized_name": "x", + "name": "x", + "type": "INT", + "widget": { + "name": "x" + }, + "link": null + }, + { + "localized_name": "y", + "name": "y", + "type": "INT", + "widget": { + "name": "y" + }, + "link": null + }, + { + "localized_name": "resize_source", + "name": "resize_source", + "type": "BOOLEAN", + "widget": { + "name": "resize_source" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 341, + 344 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "ImageCompositeMasked" + }, + "widgets_values": [ + 0, + 0, + true + ] + }, + { + "id": 214, + "type": "PreviewImage", + "pos": [ + 760, + 690 + ], + "size": [ + 300, + 300 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 341 + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "PreviewImage" + }, + "widgets_values": [] + }, + { + "id": 111, + "type": "MaskToImage", + "pos": [ + 20, + 1270 + ], + "size": [ + 240, + 26 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "mask", + "name": "mask", + "type": "MASK", + "link": 345 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 201 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "MaskToImage", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [] + }, + { + "id": 129, + "type": "RepeatImageBatch", + "pos": [ + 20, + 1160 + ], + "size": [ + 240, + 60 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 201 + }, + { + "localized_name": "amount", + "name": "amount", + "type": "INT", + "widget": { + "name": "amount" + }, + "link": 346 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 202 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "RepeatImageBatch", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": { + "amount": true + } + }, + "widgets_values": [ + 17 + ] + }, + { + "id": 130, + "type": "ImageToMask", + "pos": [ + 20, + 1050 + ], + "size": [ + 240, + 60 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 202 + }, + { + "localized_name": "channel", + "name": "channel", + "type": "COMBO", + "widget": { + "name": "channel" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "links": [ + 349 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "ImageToMask", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "red" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 770, + 50 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 280 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 98 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 99 + }, + { + "localized_name": "latent_image", + "name": "latent_image", + "type": "LATENT", + "link": 160 + }, + { + "localized_name": "seed", + "name": "seed", + "type": "INT", + "widget": { + "name": "seed" + }, + "link": null + }, + { + "localized_name": "steps", + "name": "steps", + "type": "INT", + "widget": { + "name": "steps" + }, + "link": null + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": null + }, + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { + "name": "sampler_name" + }, + "link": null + }, + { + "localized_name": "scheduler", + "name": "scheduler", + "type": "COMBO", + "widget": { + "name": "scheduler" + }, + "link": null + }, + { + "localized_name": "denoise", + "name": "denoise", + "type": "FLOAT", + "widget": { + "name": "denoise" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 116 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "KSampler", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + 584027519362099, + "randomize", + 4, + 1, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 224, + "type": "MarkdownNote", + "pos": [ + 420, + -160 + ], + "size": [ + 310, + 110 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "About Video Size", + "properties": {}, + "widgets_values": [ + "| Model | 480P | 720P |\n| ------------------------------------------------------------ | ---- | ---- |\n| [VACE-1.3B](https://huggingface.co/Wan-AI/Wan2.1-VACE-1.3B) | ✅ | ❌ |\n| [VACE-14B](https://huggingface.co/Wan-AI/Wan2.1-VACE-14B) | ✅ | ✅ |" + ], + "color": "#432", + "bgcolor": "#000" + }, + { + "id": 223, + "type": "MarkdownNote", + "pos": [ + 770, + -210 + ], + "size": [ + 303.90106201171875, + 158.5415802001953 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "KSampler Setting", + "properties": {}, + "widgets_values": [ + "## Default\n\n- steps:20\n- cfg:6.0\n\n## For CausVid LoRA\n\n- steps: 2-4\n- cfg: 1.0\n\n" + ], + "color": "#432", + "bgcolor": "#000" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -80, + 60 + ], + "size": [ + 420, + 280 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 74 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 96 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 140, + "type": "UNETLoader", + "pos": [ + -505.8336486816406, + 88.22794342041016 + ], + "size": [ + 360, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "unet_name", + "name": "unet_name", + "type": "COMBO", + "widget": { + "name": "unet_name" + }, + "link": 363 + }, + { + "localized_name": "weight_dtype", + "name": "weight_dtype", + "type": "COMBO", + "widget": { + "name": "weight_dtype" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 248 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "UNETLoader", + "models": [ + { + "name": "wan2.1_vace_14B_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors", + "directory": "diffusion_models" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "wan2.1_vace_14B_fp16.safetensors", + "fp8_e4m3fn_fast" + ] + }, + { + "id": 154, + "type": "LoraLoaderModelOnly", + "pos": [ + -505.8336486816406, + 228.2279510498047 + ], + "size": [ + 360, + 85.11004638671875 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 248 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { + "name": "lora_name" + }, + "link": 364 + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { + "name": "strength_model" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 279 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "LoraLoaderModelOnly", + "models": [ + { + "name": "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", + "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors", + "directory": "loras" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", + 0.30000000000000004 + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -499.14141845703125, + 368.0911865234375 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "clip_name", + "name": "clip_name", + "type": "COMBO", + "widget": { + "name": "clip_name" + }, + "link": 365 + }, + { + "localized_name": "type", + "name": "type", + "type": "COMBO", + "widget": { + "name": "type" + }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "shape": 7, + "type": "COMBO", + "widget": { + "name": "device" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "CLIPLoader", + "models": [ + { + "name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors?download=true", + "directory": "text_encoders" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + -498.5298156738281, + 517.2576293945312 + ], + "size": [ + 360, + 60 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "vae_name", + "name": "vae_name", + "type": "COMBO", + "widget": { + "name": "vae_name" + }, + "link": 366 + } + ], + "outputs": [ + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 101 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "VAELoader", + "models": [ + { + "name": "wan_2.1_vae.safetensors", + "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", + "directory": "vae" + } + ], + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 221, + "type": "MarkdownNote", + "pos": [ + 380, + 1090 + ], + "size": [ + 480, + 170 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "[EN] About video mask", + "properties": { + "widget_ue_connectable": {} + }, + "widgets_values": [ + "Currently, it's difficult to perfectly draw dynamic masks for different frames using only core nodes. However, to avoid requiring users to install additional custom nodes, our templates only use core nodes. You can refer to this implementation idea to achieve video inpainting.\n\nYou can use KJNode’s Points Editor and Sam2Segmentation to create some dynamic mask functions.\n\nCustom node links:\n- [ComfyUI-KJNodes](https://github.com/kijai/ComfyUI-KJNodes)\n- [ComfyUI-segment-anything-2](https://github.com/kijai/ComfyUI-segment-anything-2)" + ], + "color": "#432", + "bgcolor": "#000" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + -80, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 75 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 97 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "CLIPTextEncode", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": {} + }, + "widgets_values": [ + "过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝," + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 229, + "type": "ImageFromBatch", + "pos": [ + -510, + 800 + ], + "size": [ + 270, + 82 + ], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 358 + }, + { + "localized_name": "batch_index", + "name": "batch_index", + "type": "INT", + "widget": { + "name": "batch_index" + }, + "link": null + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 359, + 360 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.13.0", + "Node name for S&R": "ImageFromBatch" + }, + "widgets_values": [ + 0, + 81 + ] + }, + { + "id": 49, + "type": "WanVaceToVideo", + "pos": [ + 400, + 200 + ], + "size": [ + 315, + 254 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 96 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 97 + }, + { + "localized_name": "vae", + "name": "vae", + "type": "VAE", + "link": 101 + }, + { + "localized_name": "control_video", + "name": "control_video", + "shape": 7, + "type": "IMAGE", + "link": 344 + }, + { + "localized_name": "control_masks", + "name": "control_masks", + "shape": 7, + "type": "MASK", + "link": 349 + }, + { + "localized_name": "reference_image", + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 361 + }, + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 355 + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 356 + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": null + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { + "name": "batch_size" + }, + "link": null + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { + "name": "strength" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "links": [ + 98 + ] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "links": [ + 99 + ] + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "links": [ + 160 + ] + }, + { + "localized_name": "trim_latent", + "name": "trim_latent", + "type": "INT", + "links": [ + 115 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.34", + "Node name for S&R": "WanVaceToVideo", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "widget_ue_connectable": { + "width": true, + "height": true, + "length": true + } + }, + "widgets_values": [ + 720, + 720, + 81, + 1, + 1 + ] + }, + { + "id": 211, + "type": "GetImageSize", + "pos": [ + 70, + 800 + ], + "size": [ + 190, + 66 + ], + "flags": { + "collapsed": false + }, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 359 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": null + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": null + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": [ + 340, + 346 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "GetImageSize" + }, + "widgets_values": [] + }, + { + "id": 210, + "type": "GetVideoComponents", + "pos": [ + -510, + 690 + ], + "size": [ + 193.530859375, + 66 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 336 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 358 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 362 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 353 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.40", + "Node name for S&R": "GetVideoComponents" + }, + "widgets_values": [] + } + ], + "groups": [ + { + "id": 1, + "title": "Step1 - Load models here", + "bounding": [ + -540, + -30, + 430, + 620 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Prompt", + "bounding": [ + -90, + -30, + 450, + 620 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 3, + "title": "Sampling & Decoding", + "bounding": [ + 380, + -30, + 720, + 620 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 10, + "title": "Repeat Mask Batch", + "bounding": [ + -90, + 910, + 450, + 460 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 21, + "title": "Get video info", + "bounding": [ + -540, + 610, + 900, + 290 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 22, + "title": "Composite video & masks", + "bounding": [ + 380, + 610, + 720, + 420 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 23, + "title": "Step4 - Set video size & length", + "bounding": [ + 390, + 130, + 360, + 340 + ], + "color": "#A88", + "font_size": 24, + "flags": {} + }, + { + "id": 25, + "title": "14B", + "bounding": [ + -520, + 10, + 380, + 308.7100524902344 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], + "links": [ + { + "id": 116, + "origin_id": 3, + "origin_slot": 0, + "target_id": 58, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 115, + "origin_id": 49, + "origin_slot": 3, + "target_id": 58, + "target_slot": 1, + "type": "INT" + }, + { + "id": 117, + "origin_id": 58, + "origin_slot": 0, + "target_id": 8, + "target_slot": 0, + "type": "LATENT" + }, + { + "id": 76, + "origin_id": 39, + "origin_slot": 0, + "target_id": 8, + "target_slot": 1, + "type": "VAE" + }, + { + "id": 279, + "origin_id": 154, + "origin_slot": 0, + "target_id": 48, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 352, + "origin_id": 219, + "origin_slot": 0, + "target_id": 216, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 340, + "origin_id": 211, + "origin_slot": 2, + "target_id": 213, + "target_slot": 1, + "type": "INT" + }, + { + "id": 96, + "origin_id": 6, + "origin_slot": 0, + "target_id": 49, + "target_slot": 0, + "type": "CONDITIONING" + }, + { + "id": 97, + "origin_id": 7, + "origin_slot": 0, + "target_id": 49, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 101, + "origin_id": 39, + "origin_slot": 0, + "target_id": 49, + "target_slot": 2, + "type": "VAE" + }, + { + "id": 344, + "origin_id": 208, + "origin_slot": 0, + "target_id": 49, + "target_slot": 3, + "type": "IMAGE" + }, + { + "id": 349, + "origin_id": 130, + "origin_slot": 0, + "target_id": 49, + "target_slot": 4, + "type": "MASK" + }, + { + "id": 139, + "origin_id": 8, + "origin_slot": 0, + "target_id": 68, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 353, + "origin_id": 210, + "origin_slot": 2, + "target_id": 68, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 333, + "origin_id": 213, + "origin_slot": 0, + "target_id": 208, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 334, + "origin_id": 216, + "origin_slot": 0, + "target_id": 208, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 341, + "origin_id": 208, + "origin_slot": 0, + "target_id": 214, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 201, + "origin_id": 111, + "origin_slot": 0, + "target_id": 129, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 346, + "origin_id": 211, + "origin_slot": 2, + "target_id": 129, + "target_slot": 1, + "type": "INT" + }, + { + "id": 202, + "origin_id": 129, + "origin_slot": 0, + "target_id": 130, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 280, + "origin_id": 48, + "origin_slot": 0, + "target_id": 3, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 98, + "origin_id": 49, + "origin_slot": 0, + "target_id": 3, + "target_slot": 1, + "type": "CONDITIONING" + }, + { + "id": 99, + "origin_id": 49, + "origin_slot": 1, + "target_id": 3, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 160, + "origin_id": 49, + "origin_slot": 2, + "target_id": 3, + "target_slot": 3, + "type": "LATENT" + }, + { + "id": 74, + "origin_id": 38, + "origin_slot": 0, + "target_id": 6, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 248, + "origin_id": 140, + "origin_slot": 0, + "target_id": 154, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 75, + "origin_id": 38, + "origin_slot": 0, + "target_id": 7, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 351, + "origin_id": -10, + "origin_slot": 0, + "target_id": 219, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 335, + "origin_id": -10, + "origin_slot": 0, + "target_id": 208, + "target_slot": 2, + "type": "MASK" + }, + { + "id": 345, + "origin_id": -10, + "origin_slot": 0, + "target_id": 111, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 336, + "origin_id": -10, + "origin_slot": 1, + "target_id": 210, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 129, + "origin_id": 68, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 355, + "origin_id": -10, + "origin_slot": 2, + "target_id": 49, + "target_slot": 6, + "type": "INT" + }, + { + "id": 356, + "origin_id": -10, + "origin_slot": 3, + "target_id": 49, + "target_slot": 7, + "type": "INT" + }, + { + "id": 358, + "origin_id": 210, + "origin_slot": 0, + "target_id": 229, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 359, + "origin_id": 229, + "origin_slot": 0, + "target_id": 211, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 360, + "origin_id": 229, + "origin_slot": 0, + "target_id": 213, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 361, + "origin_id": -10, + "origin_slot": 4, + "target_id": 49, + "target_slot": 5, + "type": "IMAGE" + }, + { + "id": 362, + "origin_id": 210, + "origin_slot": 1, + "target_id": 68, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 363, + "origin_id": -10, + "origin_slot": 5, + "target_id": 140, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 364, + "origin_id": -10, + "origin_slot": 6, + "target_id": 154, + "target_slot": 1, + "type": "COMBO" + }, + { + "id": 365, + "origin_id": -10, + "origin_slot": 7, + "target_id": 38, + "target_slot": 0, + "type": "COMBO" + }, + { + "id": 366, + "origin_id": -10, + "origin_slot": 8, + "target_id": 39, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Inpaint video", + "description": "Inpaints masked regions in video frames using Wan 2.1 VACE." + } + ] + }, + "config": {}, + "extra": { + "workflowRendererVersion": "LG", + "ds": { + "scale": 0.8183828377358485, + "offset": [ + 1215.8643989712405, + 178.87024992690183 + ] + } + }, + "version": 0.4 +} diff --git a/blueprints/Video Segmentation (SAM3).json b/blueprints/Video Segmentation (SAM3).json new file mode 100644 index 000000000..4d9a13412 --- /dev/null +++ b/blueprints/Video Segmentation (SAM3).json @@ -0,0 +1,827 @@ +{ + "revision": 0, + "last_node_id": 130, + "last_link_id": 0, + "nodes": [ + { + "id": 130, + "type": "7937cf78-b52b-40a3-93b2-b4e2e5f98df1", + "pos": [ + -1210, + -2780 + ], + "size": [ + 300, + 370 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": null + }, + { + "name": "bboxes", + "type": "BOUNDING_BOX", + "link": null + }, + { + "name": "positive_coords", + "type": "STRING", + "link": null + }, + { + "name": "negative_coords", + "type": "STRING", + "link": null + }, + { + "name": "threshold", + "type": "FLOAT", + "widget": { + "name": "threshold" + }, + "link": null + }, + { + "name": "refine_iterations", + "type": "INT", + "widget": { + "name": "refine_iterations" + }, + "link": null + }, + { + "name": "individual_masks", + "type": "BOOLEAN", + "widget": { + "name": "individual_masks" + }, + "link": null + }, + { + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "masks", + "name": "masks", + "type": "MASK", + "links": [] + }, + { + "localized_name": "bboxes", + "name": "bboxes", + "type": "BOUNDING_BOX", + "links": [] + }, + { + "name": "audio", + "type": "AUDIO", + "links": null + }, + { + "name": "fps", + "type": "FLOAT", + "links": null + } + ], + "properties": { + "proxyWidgets": [ + [ + "125", + "text" + ], + [ + "126", + "threshold" + ], + [ + "126", + "refine_iterations" + ], + [ + "126", + "individual_masks" + ], + [ + "127", + "ckpt_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [], + "title": "Video Segmentation (SAM3)" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "7937cf78-b52b-40a3-93b2-b4e2e5f98df1", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 130, + "lastLinkId": 299, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Segmentation (SAM3)", + "inputNode": { + "id": -10, + "bounding": [ + -2260, + -3450, + 136.369140625, + 220 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -1050, + -3510, + 120, + 120 + ] + }, + "inputs": [ + { + "id": "680ffd88-32fe-48be-88d6-91ea44d5eaee", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 252 + ], + "pos": [ + -2143.630859375, + -3430 + ] + }, + { + "id": "ceaf249c-32d7-4624-8bf6-e590e347ed90", + "name": "text", + "type": "STRING", + "linkIds": [ + 254 + ], + "pos": [ + -2143.630859375, + -3410 + ] + }, + { + "id": "1ffbff36-da0c-4854-8cb4-88ad31e64f99", + "name": "bboxes", + "type": "BOUNDING_BOX", + "linkIds": [ + 255 + ], + "pos": [ + -2143.630859375, + -3390 + ] + }, + { + "id": "67b7f4c7-cec0-4e00-b154-23cc1abf880e", + "name": "positive_coords", + "type": "STRING", + "linkIds": [ + 256 + ], + "pos": [ + -2143.630859375, + -3370 + ] + }, + { + "id": "b090a498-2bde-46b9-9554-18501401d687", + "name": "negative_coords", + "type": "STRING", + "linkIds": [ + 257 + ], + "pos": [ + -2143.630859375, + -3350 + ] + }, + { + "id": "1a76dfcf-ce95-46af-bba5-c42160c683dd", + "name": "threshold", + "type": "FLOAT", + "linkIds": [ + 261 + ], + "pos": [ + -2143.630859375, + -3330 + ] + }, + { + "id": "999523fa-c476-4c53-80c3-0a2f554d18ab", + "name": "refine_iterations", + "type": "INT", + "linkIds": [ + 262 + ], + "pos": [ + -2143.630859375, + -3310 + ] + }, + { + "id": "d2371011-7fe5-4a39-b0c1-df2e0bbd6ece", + "name": "individual_masks", + "type": "BOOLEAN", + "linkIds": [ + 263 + ], + "pos": [ + -2143.630859375, + -3290 + ] + }, + { + "id": "675a8b37-17db-48d1-853c-2fe5d6a74582", + "name": "ckpt_name", + "type": "COMBO", + "linkIds": [ + 273 + ], + "pos": [ + -2143.630859375, + -3270 + ] + } + ], + "outputs": [ + { + "id": "ff50da09-1e59-4a58-9b7f-be1a00aa5913", + "name": "masks", + "type": "MASK", + "linkIds": [ + 231 + ], + "localized_name": "masks", + "pos": [ + -1030, + -3490 + ] + }, + { + "id": "8f622e40-8528-4078-b7d3-147e9f872194", + "name": "bboxes", + "type": "BOUNDING_BOX", + "linkIds": [ + 232 + ], + "localized_name": "bboxes", + "pos": [ + -1030, + -3470 + ] + }, + { + "id": "6c9924ec-f0fa-4509-83ea-8f97f5889bcc", + "name": "audio", + "type": "AUDIO", + "linkIds": [ + 259 + ], + "pos": [ + -1030, + -3450 + ] + }, + { + "id": "82c1cddc-ab11-44eb-9e2f-1a5c7ea5645b", + "name": "fps", + "type": "FLOAT", + "linkIds": [ + 260 + ], + "pos": [ + -1030, + -3430 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 125, + "type": "CLIPTextEncode", + "pos": [ + -2010, + -3040 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "clip", + "name": "clip", + "type": "CLIP", + "link": 240 + }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 254 + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 200 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + "" + ] + }, + { + "id": 126, + "type": "SAM3_Detect", + "pos": [ + -1520, + -3520 + ], + "size": [ + 270, + 290 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "model", + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 237 + }, + { + "label": "image", + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 253 + }, + { + "label": "conditioning", + "localized_name": "conditioning", + "name": "conditioning", + "shape": 7, + "type": "CONDITIONING", + "link": 200 + }, + { + "label": "bboxes", + "localized_name": "bboxes", + "name": "bboxes", + "shape": 7, + "type": "BOUNDING_BOX", + "link": 255 + }, + { + "label": "positive_coords", + "localized_name": "positive_coords", + "name": "positive_coords", + "shape": 7, + "type": "STRING", + "link": 256 + }, + { + "label": "negative_coords", + "localized_name": "negative_coords", + "name": "negative_coords", + "shape": 7, + "type": "STRING", + "link": 257 + }, + { + "localized_name": "threshold", + "name": "threshold", + "type": "FLOAT", + "widget": { + "name": "threshold" + }, + "link": 261 + }, + { + "localized_name": "refine_iterations", + "name": "refine_iterations", + "type": "INT", + "widget": { + "name": "refine_iterations" + }, + "link": 262 + }, + { + "localized_name": "individual_masks", + "name": "individual_masks", + "type": "BOOLEAN", + "widget": { + "name": "individual_masks" + }, + "link": 263 + } + ], + "outputs": [ + { + "localized_name": "masks", + "name": "masks", + "type": "MASK", + "links": [ + 231 + ] + }, + { + "localized_name": "bboxes", + "name": "bboxes", + "type": "BOUNDING_BOX", + "links": [ + 232 + ] + } + ], + "properties": { + "Node name for S&R": "SAM3_Detect", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + }, + "widgets_values": [ + 0.5, + 2, + false + ] + }, + { + "id": 127, + "type": "CheckpointLoaderSimple", + "pos": [ + -1970, + -3310 + ], + "size": [ + 330, + 160 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { + "name": "ckpt_name" + }, + "link": 273 + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "links": [ + 237 + ] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "links": [ + 240 + ] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "links": null + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65, + "models": [ + { + "name": "sam3.1_multiplex_fp16.safetensors", + "url": "https://huggingface.co/Comfy-Org/sam3.1/resolve/main/checkpoints/sam3.1_multiplex_fp16.safetensors", + "directory": "checkpoints" + } + ] + }, + "widgets_values": [ + "sam3.1_multiplex_fp16.safetensors" + ] + }, + { + "id": 128, + "type": "GetVideoComponents", + "pos": [ + -1910, + -3540 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 252 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 253 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 259 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 260 + ] + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.19.3", + "enableTabs": false, + "tabWidth": 65, + "tabXOffset": 10, + "hasSecondTab": false, + "secondTabText": "Send Back", + "secondTabOffset": 80, + "secondTabWidth": 65 + } + }, + { + "id": 129, + "type": "Note", + "pos": [ + -1980, + -2790 + ], + "size": [ + 370, + 250 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "title": "Note: Prompt format", + "properties": {}, + "widgets_values": [ + "Max tokens for this model is only 32, to separately prompt multiple subjects you can separate prompts with comma, and set the max amount of objects detected for each prompt with :N\n\nFor example above test prompt finds 2 cakes, one apron, 4 window panels" + ], + "color": "#432", + "bgcolor": "#653" + } + ], + "groups": [], + "links": [ + { + "id": 237, + "origin_id": 127, + "origin_slot": 0, + "target_id": 126, + "target_slot": 0, + "type": "MODEL" + }, + { + "id": 200, + "origin_id": 125, + "origin_slot": 0, + "target_id": 126, + "target_slot": 2, + "type": "CONDITIONING" + }, + { + "id": 240, + "origin_id": 127, + "origin_slot": 1, + "target_id": 125, + "target_slot": 0, + "type": "CLIP" + }, + { + "id": 231, + "origin_id": 126, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "MASK" + }, + { + "id": 232, + "origin_id": 126, + "origin_slot": 1, + "target_id": -20, + "target_slot": 1, + "type": "BOUNDING_BOX" + }, + { + "id": 252, + "origin_id": -10, + "origin_slot": 0, + "target_id": 128, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 253, + "origin_id": 128, + "origin_slot": 0, + "target_id": 126, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 254, + "origin_id": -10, + "origin_slot": 1, + "target_id": 125, + "target_slot": 1, + "type": "STRING" + }, + { + "id": 255, + "origin_id": -10, + "origin_slot": 2, + "target_id": 126, + "target_slot": 3, + "type": "BOUNDING_BOX" + }, + { + "id": 256, + "origin_id": -10, + "origin_slot": 3, + "target_id": 126, + "target_slot": 4, + "type": "STRING" + }, + { + "id": 257, + "origin_id": -10, + "origin_slot": 4, + "target_id": 126, + "target_slot": 5, + "type": "STRING" + }, + { + "id": 259, + "origin_id": 128, + "origin_slot": 1, + "target_id": -20, + "target_slot": 2, + "type": "AUDIO" + }, + { + "id": 260, + "origin_id": 128, + "origin_slot": 2, + "target_id": -20, + "target_slot": 3, + "type": "FLOAT" + }, + { + "id": 261, + "origin_id": -10, + "origin_slot": 5, + "target_id": 126, + "target_slot": 6, + "type": "FLOAT" + }, + { + "id": 262, + "origin_id": -10, + "origin_slot": 6, + "target_id": 126, + "target_slot": 7, + "type": "INT" + }, + { + "id": 263, + "origin_id": -10, + "origin_slot": 7, + "target_id": 126, + "target_slot": 8, + "type": "BOOLEAN" + }, + { + "id": 273, + "origin_id": -10, + "origin_slot": 8, + "target_id": 127, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": {}, + "category": "Video Tools", + "description": "Segments video into temporally consistent masks using Meta SAM3 from text or interactive prompts." + } + ] + }, + "extra": {} +} diff --git a/blueprints/Video Stitch.json b/blueprints/Video Stitch.json index 11bcf6b7d..2ac78b328 100644 --- a/blueprints/Video Stitch.json +++ b/blueprints/Video Stitch.json @@ -1 +1,901 @@ -{"revision": 0, "last_node_id": 84, "last_link_id": 0, "nodes": [{"id": 84, "type": "8e8aa94a-647e-436d-8440-8ee4691864de", "pos": [-6100, 2620], "size": [290, 160], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "Before Video", "localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"label": "After Video", "localized_name": "video_1", "name": "video_1", "type": "VIDEO", "link": null}, {"name": "direction", "type": "COMBO", "widget": {"name": "direction"}, "link": null}, {"name": "match_image_size", "type": "BOOLEAN", "widget": {"name": "match_image_size"}, "link": null}, {"name": "spacing_width", "type": "INT", "widget": {"name": "spacing_width"}, "link": null}, {"name": "spacing_color", "type": "COMBO", "widget": {"name": "spacing_color"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "direction"], ["-1", "match_image_size"], ["-1", "spacing_width"], ["-1", "spacing_color"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["right", true, 0, "white"], "title": "Video Stitch"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "8e8aa94a-647e-436d-8440-8ee4691864de", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 84, "lastLinkId": 262, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Stitch", "inputNode": {"id": -10, "bounding": [-6580, 2649, 143.55859375, 160]}, "outputNode": {"id": -20, "bounding": [-5720, 2659, 120, 60]}, "inputs": [{"id": "85555afe-c7a1-4f6e-b073-7c37f7bace7f", "name": "video", "type": "VIDEO", "linkIds": [253], "localized_name": "video", "label": "Before Video", "pos": [-6456.44140625, 2669]}, {"id": "022773ee-6b4f-4e3d-bead-68b3e75e2d20", "name": "video_1", "type": "VIDEO", "linkIds": [254], "localized_name": "video_1", "label": "After Video", "pos": [-6456.44140625, 2689]}, {"id": "7bcd7cbc-e918-472a-a0cf-2e0900545372", "name": "direction", "type": "COMBO", "linkIds": [259], "pos": [-6456.44140625, 2709]}, {"id": "9a00389d-c1c8-40d5-87fe-f41019b61fbc", "name": "match_image_size", "type": "BOOLEAN", "linkIds": [260], "pos": [-6456.44140625, 2729]}, {"id": "b95e0440-3ea8-4ae0-887e-12e75701042a", "name": "spacing_width", "type": "INT", "linkIds": [261], "pos": [-6456.44140625, 2749]}, {"id": "83ab9382-0a70-4169-b26a-66ab026b43c4", "name": "spacing_color", "type": "COMBO", "linkIds": [262], "pos": [-6456.44140625, 2769]}], "outputs": [{"id": "09707f43-7552-4a6e-bd23-d962d31801c2", "name": "VIDEO", "type": "VIDEO", "linkIds": [255], "localized_name": "VIDEO", "pos": [-5700, 2679]}], "widgets": [], "nodes": [{"id": 78, "type": "GetVideoComponents", "pos": [-6390, 2560], "size": [193.530859375, 66], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 254}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [249]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 77, "type": "GetVideoComponents", "pos": [-6390, 2420], "size": [193.530859375, 66], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 253}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [248]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [251]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [252]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 79, "type": "ImageStitch", "pos": [-6390, 2700], "size": [270, 150], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image1", "name": "image1", "type": "IMAGE", "link": 248}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 249}, {"localized_name": "direction", "name": "direction", "type": "COMBO", "widget": {"name": "direction"}, "link": 259}, {"localized_name": "match_image_size", "name": "match_image_size", "type": "BOOLEAN", "widget": {"name": "match_image_size"}, "link": 260}, {"localized_name": "spacing_width", "name": "spacing_width", "type": "INT", "widget": {"name": "spacing_width"}, "link": 261}, {"localized_name": "spacing_color", "name": "spacing_color", "type": "COMBO", "widget": {"name": "spacing_color"}, "link": 262}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [250]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageStitch"}, "widgets_values": ["right", true, 0, "white"]}, {"id": 80, "type": "CreateVideo", "pos": [-6040, 2610], "size": [270, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 250}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 251}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 252}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [255]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}], "groups": [], "links": [{"id": 248, "origin_id": 77, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "IMAGE"}, {"id": 249, "origin_id": 78, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "IMAGE"}, {"id": 250, "origin_id": 79, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 251, "origin_id": 77, "origin_slot": 1, "target_id": 80, "target_slot": 1, "type": "AUDIO"}, {"id": 252, "origin_id": 77, "origin_slot": 2, "target_id": 80, "target_slot": 2, "type": "FLOAT"}, {"id": 253, "origin_id": -10, "origin_slot": 0, "target_id": 77, "target_slot": 0, "type": "VIDEO"}, {"id": 254, "origin_id": -10, "origin_slot": 1, "target_id": 78, "target_slot": 0, "type": "VIDEO"}, {"id": 255, "origin_id": 80, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 259, "origin_id": -10, "origin_slot": 2, "target_id": 79, "target_slot": 2, "type": "COMBO"}, {"id": 260, "origin_id": -10, "origin_slot": 3, "target_id": 79, "target_slot": 3, "type": "BOOLEAN"}, {"id": 261, "origin_id": -10, "origin_slot": 4, "target_id": 79, "target_slot": 4, "type": "INT"}, {"id": 262, "origin_id": -10, "origin_slot": 5, "target_id": 79, "target_slot": 5, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video Tools/Stitch videos"}]}} +{ + "revision": 0, + "last_node_id": 85, + "last_link_id": 0, + "nodes": [ + { + "id": 85, + "type": "637913e7-0206-46ba-8ded-70ae3a7c2e19", + "pos": [ + -880, + -2260 + ], + "size": [ + 290, + 160 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "label": "Before Video", + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "label": "After Video", + "localized_name": "video_1", + "name": "video_1", + "type": "VIDEO", + "link": null + }, + { + "name": "direction", + "type": "COMBO", + "widget": { + "name": "direction" + }, + "link": null + }, + { + "name": "match_image_size", + "type": "BOOLEAN", + "widget": { + "name": "match_image_size" + }, + "link": null + }, + { + "name": "spacing_width", + "type": "INT", + "widget": { + "name": "spacing_width" + }, + "link": null + }, + { + "name": "spacing_color", + "type": "COMBO", + "widget": { + "name": "spacing_color" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "properties": { + "proxyWidgets": [ + [ + "79", + "direction" + ], + [ + "79", + "match_image_size" + ], + [ + "79", + "spacing_width" + ], + [ + "79", + "spacing_color" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [], + "title": "Video Stitch" + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "637913e7-0206-46ba-8ded-70ae3a7c2e19", + "version": 1, + "state": { + "lastGroupId": 1, + "lastNodeId": 97, + "lastLinkId": 282, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Stitch", + "inputNode": { + "id": -10, + "bounding": [ + -6810, + 2580, + 143.55859375, + 160 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + -4770, + 2600, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "85555afe-c7a1-4f6e-b073-7c37f7bace7f", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 253 + ], + "localized_name": "video", + "label": "Before Video", + "pos": [ + -6686.44140625, + 2600 + ] + }, + { + "id": "022773ee-6b4f-4e3d-bead-68b3e75e2d20", + "name": "video_1", + "type": "VIDEO", + "linkIds": [ + 254 + ], + "localized_name": "video_1", + "label": "After Video", + "pos": [ + -6686.44140625, + 2620 + ] + }, + { + "id": "7bcd7cbc-e918-472a-a0cf-2e0900545372", + "name": "direction", + "type": "COMBO", + "linkIds": [ + 259 + ], + "pos": [ + -6686.44140625, + 2640 + ] + }, + { + "id": "9a00389d-c1c8-40d5-87fe-f41019b61fbc", + "name": "match_image_size", + "type": "BOOLEAN", + "linkIds": [ + 260 + ], + "pos": [ + -6686.44140625, + 2660 + ] + }, + { + "id": "b95e0440-3ea8-4ae0-887e-12e75701042a", + "name": "spacing_width", + "type": "INT", + "linkIds": [ + 261 + ], + "pos": [ + -6686.44140625, + 2680 + ] + }, + { + "id": "83ab9382-0a70-4169-b26a-66ab026b43c4", + "name": "spacing_color", + "type": "COMBO", + "linkIds": [ + 262 + ], + "pos": [ + -6686.44140625, + 2700 + ] + } + ], + "outputs": [ + { + "id": "09707f43-7552-4a6e-bd23-d962d31801c2", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 255 + ], + "localized_name": "VIDEO", + "pos": [ + -4750, + 2620 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 78, + "type": "GetVideoComponents", + "pos": [ + -6390, + 2600 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 254 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 249 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": null + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.13.0" + } + }, + { + "id": 77, + "type": "GetVideoComponents", + "pos": [ + -6390, + 2420 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 253 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 248 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 251 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 252 + ] + } + ], + "properties": { + "Node name for S&R": "GetVideoComponents", + "cnr_id": "comfy-core", + "ver": "0.13.0" + } + }, + { + "id": 90, + "type": "GetImageSize", + "pos": [ + -6390, + 3030 + ], + "size": [ + 230, + 120 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 266 + } + ], + "outputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "links": [ + 274 + ] + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "links": [ + 276 + ] + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "links": null + } + ], + "properties": { + "Node name for S&R": "GetImageSize" + } + }, + { + "id": 80, + "type": "CreateVideo", + "pos": [ + -5190, + 2420 + ], + "size": [ + 270, + 130 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 282 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 251 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 252 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 255 + ] + } + ], + "properties": { + "Node name for S&R": "CreateVideo", + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + 30 + ] + }, + { + "id": 95, + "type": "ComfyMathExpression", + "pos": [ + -6040, + 3020 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 274 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 279 + ] + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a & ~1" + ] + }, + { + "id": 96, + "type": "ComfyMathExpression", + "pos": [ + -6040, + 3290 + ], + "size": [ + 400, + 200 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "label": "a", + "localized_name": "values.a", + "name": "values.a", + "type": "FLOAT,INT", + "link": 276 + }, + { + "label": "b", + "localized_name": "values.b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT", + "link": null + }, + { + "localized_name": "expression", + "name": "expression", + "type": "STRING", + "widget": { + "name": "expression" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "links": null + }, + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "links": [ + 280 + ] + } + ], + "properties": { + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "a & ~1" + ] + }, + { + "id": 79, + "type": "ImageStitch", + "pos": [ + -6390, + 2780 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "image1", + "name": "image1", + "type": "IMAGE", + "link": 248 + }, + { + "localized_name": "image2", + "name": "image2", + "shape": 7, + "type": "IMAGE", + "link": 249 + }, + { + "localized_name": "direction", + "name": "direction", + "type": "COMBO", + "widget": { + "name": "direction" + }, + "link": 259 + }, + { + "localized_name": "match_image_size", + "name": "match_image_size", + "type": "BOOLEAN", + "widget": { + "name": "match_image_size" + }, + "link": 260 + }, + { + "localized_name": "spacing_width", + "name": "spacing_width", + "type": "INT", + "widget": { + "name": "spacing_width" + }, + "link": 261 + }, + { + "localized_name": "spacing_color", + "name": "spacing_color", + "type": "COMBO", + "widget": { + "name": "spacing_color" + }, + "link": 262 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 266, + 281 + ] + } + ], + "properties": { + "Node name for S&R": "ImageStitch", + "cnr_id": "comfy-core", + "ver": "0.13.0" + }, + "widgets_values": [ + "right", + true, + 0, + "white" + ] + }, + { + "id": 97, + "type": "ResizeImageMaskNode", + "pos": [ + -5560, + 2790 + ], + "size": [ + 270, + 160 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 281 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { + "name": "resize_type" + }, + "link": null + }, + { + "localized_name": "width", + "name": "resize_type.width", + "type": "INT", + "widget": { + "name": "resize_type.width" + }, + "link": 279 + }, + { + "localized_name": "height", + "name": "resize_type.height", + "type": "INT", + "widget": { + "name": "resize_type.height" + }, + "link": 280 + }, + { + "localized_name": "crop", + "name": "resize_type.crop", + "type": "COMBO", + "widget": { + "name": "resize_type.crop" + }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { + "name": "scale_method" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "*", + "links": [ + 282 + ] + } + ], + "properties": { + "Node name for S&R": "ResizeImageMaskNode" + }, + "widgets_values": [ + "scale dimensions", + 512, + 512, + "center", + "area" + ] + } + ], + "groups": [], + "links": [ + { + "id": 248, + "origin_id": 77, + "origin_slot": 0, + "target_id": 79, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 249, + "origin_id": 78, + "origin_slot": 0, + "target_id": 79, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 251, + "origin_id": 77, + "origin_slot": 1, + "target_id": 80, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 252, + "origin_id": 77, + "origin_slot": 2, + "target_id": 80, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 253, + "origin_id": -10, + "origin_slot": 0, + "target_id": 77, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 254, + "origin_id": -10, + "origin_slot": 1, + "target_id": 78, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 255, + "origin_id": 80, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 259, + "origin_id": -10, + "origin_slot": 2, + "target_id": 79, + "target_slot": 2, + "type": "COMBO" + }, + { + "id": 260, + "origin_id": -10, + "origin_slot": 3, + "target_id": 79, + "target_slot": 3, + "type": "BOOLEAN" + }, + { + "id": 261, + "origin_id": -10, + "origin_slot": 4, + "target_id": 79, + "target_slot": 4, + "type": "INT" + }, + { + "id": 262, + "origin_id": -10, + "origin_slot": 5, + "target_id": 79, + "target_slot": 5, + "type": "COMBO" + }, + { + "id": 266, + "origin_id": 79, + "origin_slot": 0, + "target_id": 90, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 274, + "origin_id": 90, + "origin_slot": 0, + "target_id": 95, + "target_slot": 0, + "type": "INT" + }, + { + "id": 276, + "origin_id": 90, + "origin_slot": 1, + "target_id": 96, + "target_slot": 0, + "type": "INT" + }, + { + "id": 279, + "origin_id": 95, + "origin_slot": 1, + "target_id": 97, + "target_slot": 2, + "type": "INT" + }, + { + "id": 280, + "origin_id": 96, + "origin_slot": 1, + "target_id": 97, + "target_slot": 3, + "type": "INT" + }, + { + "id": 281, + "origin_id": 79, + "origin_slot": 0, + "target_id": 97, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 282, + "origin_id": 97, + "origin_slot": 0, + "target_id": 80, + "target_slot": 0, + "type": "IMAGE" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video Tools/Stitch videos", + "description": "Stitches multiple video clips into a single sequential video file." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/blueprints/Video Upscale(GAN x4).json b/blueprints/Video Upscale(GAN x4).json index e80b2e229..73476e36b 100644 --- a/blueprints/Video Upscale(GAN x4).json +++ b/blueprints/Video Upscale(GAN x4).json @@ -1 +1,421 @@ -{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}} +{ + "revision": 0, + "last_node_id": 13, + "last_link_id": 0, + "nodes": [ + { + "id": 13, + "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", + "pos": [ + 1120, + 330 + ], + "size": [ + 240, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": null + }, + { + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [] + } + ], + "title": "Video Upscale(GAN x4)", + "properties": { + "proxyWidgets": [ + [ + "-1", + "model_name" + ] + ], + "cnr_id": "comfy-core", + "ver": "0.14.1" + }, + "widgets_values": [ + "RealESRGAN_x4plus.safetensors" + ] + } + ], + "links": [], + "version": 0.4, + "definitions": { + "subgraphs": [ + { + "id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", + "version": 1, + "state": { + "lastGroupId": 0, + "lastNodeId": 13, + "lastLinkId": 19, + "lastRerouteId": 0 + }, + "revision": 0, + "config": {}, + "name": "Video Upscale(GAN x4)", + "inputNode": { + "id": -10, + "bounding": [ + 550, + 460, + 120, + 80 + ] + }, + "outputNode": { + "id": -20, + "bounding": [ + 1490, + 460, + 120, + 60 + ] + }, + "inputs": [ + { + "id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", + "name": "video", + "type": "VIDEO", + "linkIds": [ + 10 + ], + "localized_name": "video", + "pos": [ + 650, + 480 + ] + }, + { + "id": "2e23a087-caa8-4d65-99e6-662761aa905a", + "name": "model_name", + "type": "COMBO", + "linkIds": [ + 19 + ], + "pos": [ + 650, + 500 + ] + } + ], + "outputs": [ + { + "id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", + "name": "VIDEO", + "type": "VIDEO", + "linkIds": [ + 15 + ], + "localized_name": "VIDEO", + "pos": [ + 1510, + 480 + ] + } + ], + "widgets": [], + "nodes": [ + { + "id": 2, + "type": "ImageUpscaleWithModel", + "pos": [ + 1110, + 450 + ], + "size": [ + 320, + 46 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "UPSCALE_MODEL", + "link": 1 + }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 14 + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 13 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "ImageUpscaleWithModel" + } + }, + { + "id": 11, + "type": "CreateVideo", + "pos": [ + 1110, + 550 + ], + "size": [ + 320, + 78 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 13 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 16 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { + "name": "fps" + }, + "link": 12 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "links": [ + 15 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "CreateVideo" + }, + "widgets_values": [ + 30 + ] + }, + { + "id": 10, + "type": "GetVideoComponents", + "pos": [ + 1110, + 330 + ], + "size": [ + 320, + 70 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 10 + } + ], + "outputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "links": [ + 14 + ] + }, + { + "localized_name": "audio", + "name": "audio", + "type": "AUDIO", + "links": [ + 16 + ] + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "links": [ + 12 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "GetVideoComponents" + } + }, + { + "id": 1, + "type": "UpscaleModelLoader", + "pos": [ + 750, + 450 + ], + "size": [ + 280, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { + "name": "model_name" + }, + "link": 19 + } + ], + "outputs": [ + { + "localized_name": "UPSCALE_MODEL", + "name": "UPSCALE_MODEL", + "type": "UPSCALE_MODEL", + "links": [ + 1 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.10.0", + "Node name for S&R": "UpscaleModelLoader", + "models": [ + { + "name": "RealESRGAN_x4plus.safetensors", + "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", + "directory": "upscale_models" + } + ] + }, + "widgets_values": [ + "RealESRGAN_x4plus.safetensors" + ] + } + ], + "groups": [], + "links": [ + { + "id": 1, + "origin_id": 1, + "origin_slot": 0, + "target_id": 2, + "target_slot": 0, + "type": "UPSCALE_MODEL" + }, + { + "id": 14, + "origin_id": 10, + "origin_slot": 0, + "target_id": 2, + "target_slot": 1, + "type": "IMAGE" + }, + { + "id": 13, + "origin_id": 2, + "origin_slot": 0, + "target_id": 11, + "target_slot": 0, + "type": "IMAGE" + }, + { + "id": 16, + "origin_id": 10, + "origin_slot": 1, + "target_id": 11, + "target_slot": 1, + "type": "AUDIO" + }, + { + "id": 12, + "origin_id": 10, + "origin_slot": 2, + "target_id": 11, + "target_slot": 2, + "type": "FLOAT" + }, + { + "id": 10, + "origin_id": -10, + "origin_slot": 0, + "target_id": 10, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 15, + "origin_id": 11, + "origin_slot": 0, + "target_id": -20, + "target_slot": 0, + "type": "VIDEO" + }, + { + "id": 19, + "origin_id": -10, + "origin_slot": 1, + "target_id": 1, + "target_slot": 0, + "type": "COMBO" + } + ], + "extra": { + "workflowRendererVersion": "LG" + }, + "category": "Video generation and editing/Enhance video", + "description": "Upscales video to 4× resolution using a GAN-based upscaling model." + } + ] + }, + "extra": {} +} \ No newline at end of file diff --git a/comfy/background_removal/birefnet.json b/comfy/background_removal/birefnet.json new file mode 100644 index 000000000..f0960af39 --- /dev/null +++ b/comfy/background_removal/birefnet.json @@ -0,0 +1,7 @@ +{ + "model_type": "birefnet", + "image_std": [1.0, 1.0, 1.0], + "image_mean": [0.0, 0.0, 0.0], + "image_size": 1024, + "resize_to_original": true +} diff --git a/comfy/background_removal/birefnet.py b/comfy/background_removal/birefnet.py new file mode 100644 index 000000000..df54b2b90 --- /dev/null +++ b/comfy/background_removal/birefnet.py @@ -0,0 +1,689 @@ +import torch +import comfy.ops +import numpy as np +import torch.nn as nn +from functools import partial +import torch.nn.functional as F +from torchvision.ops import deform_conv2d +from comfy.ldm.modules.attention import optimized_attention_for_device + +CXT = [3072, 1536, 768, 384][1:][::-1][-3:] + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, device=None, dtype=None, operations=None): + super().__init__() + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.q = operations.Linear(dim, dim, bias=qkv_bias, device=device, dtype=dtype) + self.kv = operations.Linear(dim, dim * 2, bias=qkv_bias, device=device, dtype=dtype) + self.proj = operations.Linear(dim, dim, device=device, dtype=dtype) + + def forward(self, x): + B, N, C = x.shape + optimized_attention = optimized_attention_for_device(x.device, mask=False, small_input=True) + q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3) + kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + k, v = kv[0], kv[1] + + x = optimized_attention( + q, k, v, heads=self.num_heads, skip_output_reshape=True, skip_reshape=True + ).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + + return x + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, device=None, dtype=None, operations=None): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = operations.Linear(in_features, hidden_features, device=device, dtype=dtype) + self.act = nn.GELU() + self.fc2 = operations.Linear(hidden_features, out_features, device=device, dtype=dtype) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.fc2(x) + return x + + +def window_partition(x, window_size): + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, device=None, dtype=None, operations=None): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads, device=device, dtype=dtype)) + + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing='ij')) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, device=device, dtype=dtype) + self.proj = operations.Linear(dim, dim, device=device, dtype=dtype) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.long().view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + return x + + +class SwinTransformerBlock(nn.Module): + def __init__(self, dim, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, qk_scale=None, + norm_layer=nn.LayerNorm, device=None, dtype=None, operations=None): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + + self.norm1 = norm_layer(dim, device=device, dtype=dtype) + self.attn = WindowAttention( + dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, device=device, dtype=dtype, operations=operations) + + self.norm2 = norm_layer(dim, device=device, dtype=dtype) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, device=device, dtype=dtype, operations=operations) + + self.H = None + self.W = None + + def forward(self, x, mask_matrix): + B, L, C = x.shape + H, W = self.H, self.W + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + attn_mask = mask_matrix + else: + shifted_x = x + attn_mask = None + + x_windows = window_partition(shifted_x, self.window_size) + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) + + attn_windows = self.attn(x_windows, mask=attn_mask) + + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + x = shortcut + x + x = x + self.mlp(self.norm2(x)) + + return x + + +class PatchMerging(nn.Module): + def __init__(self, dim, device=None, dtype=None, operations=None): + super().__init__() + self.dim = dim + self.reduction = operations.Linear(4 * dim, 2 * dim, bias=False, device=device, dtype=dtype) + self.norm = operations.LayerNorm(4 * dim, device=device, dtype=dtype) + + def forward(self, x, H, W): + B, L, C = x.shape + x = x.view(B, H, W, C) + + # padding + pad_input = (H % 2 == 1) or (W % 2 == 1) + if pad_input: + x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class BasicLayer(nn.Module): + def __init__(self, + dim, + depth, + num_heads, + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + norm_layer=nn.LayerNorm, + downsample=None, + device=None, dtype=None, operations=None): + super().__init__() + self.window_size = window_size + self.shift_size = window_size // 2 + self.depth = depth + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock( + dim=dim, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + norm_layer=norm_layer, + device=device, dtype=dtype, operations=operations) + for i in range(depth)]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(dim=dim, device=device, dtype=dtype, operations=operations) + else: + self.downsample = None + + def forward(self, x, H, W): + Hp = int(np.ceil(H / self.window_size)) * self.window_size + Wp = int(np.ceil(W / self.window_size)) * self.window_size + img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + for blk in self.blocks: + blk.H, blk.W = H, W + x = blk(x, attn_mask) + if self.downsample is not None: + x_down = self.downsample(x, H, W) + Wh, Ww = (H + 1) // 2, (W + 1) // 2 + return x, H, W, x_down, Wh, Ww + else: + return x, H, W, x, H, W + + +class PatchEmbed(nn.Module): + def __init__(self, patch_size=4, in_channels=3, embed_dim=96, norm_layer=None, device=None, dtype=None, operations=None): + super().__init__() + patch_size = (patch_size, patch_size) + self.patch_size = patch_size + + self.in_channels = in_channels + self.embed_dim = embed_dim + + self.proj = operations.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size, device=device, dtype=dtype) + if norm_layer is not None: + self.norm = norm_layer(embed_dim, device=device, dtype=dtype) + else: + self.norm = None + + def forward(self, x): + _, _, H, W = x.size() + if W % self.patch_size[1] != 0: + x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) + if H % self.patch_size[0] != 0: + x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) + + x = self.proj(x) # B C Wh Ww + if self.norm is not None: + Wh, Ww = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) + + return x + + +class SwinTransformer(nn.Module): + def __init__(self, + pretrain_img_size=224, + patch_size=4, + in_channels=3, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + patch_norm=True, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + device=None, dtype=None, operations=None): + super().__init__() + + norm_layer = partial(operations.LayerNorm, device=device, dtype=dtype) + self.pretrain_img_size = pretrain_img_size + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.patch_norm = patch_norm + self.out_indices = out_indices + self.frozen_stages = frozen_stages + + self.patch_embed = PatchEmbed( + patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim, + device=device, dtype=dtype, operations=operations, + norm_layer=norm_layer if self.patch_norm else None) + + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2 ** i_layer), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + device=device, dtype=dtype, operations=operations) + self.layers.append(layer) + + num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)] + self.num_features = num_features + + for i_layer in out_indices: + layer = norm_layer(num_features[i_layer]) + layer_name = f'norm{i_layer}' + self.add_module(layer_name, layer) + + + def forward(self, x): + x = self.patch_embed(x) + + Wh, Ww = x.size(2), x.size(3) + + outs = [] + x = x.flatten(2).transpose(1, 2) + for i in range(self.num_layers): + layer = self.layers[i] + x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) + + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + x_out = norm_layer(x_out) + + out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) + +class DeformableConv2d(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, device=None, dtype=None, operations=None): + + super(DeformableConv2d, self).__init__() + + kernel_size = kernel_size if type(kernel_size) is tuple else (kernel_size, kernel_size) + self.stride = stride if type(stride) is tuple else (stride, stride) + self.padding = padding + + self.offset_conv = operations.Conv2d(in_channels, + 2 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True, device=device, dtype=dtype) + + self.modulator_conv = operations.Conv2d(in_channels, + 1 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True, device=device, dtype=dtype) + + self.regular_conv = operations.Conv2d(in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=bias, device=device, dtype=dtype) + + def forward(self, x): + offset = self.offset_conv(x) + modulator = 2. * torch.sigmoid(self.modulator_conv(x)) + weight, bias, offload_info = comfy.ops.cast_bias_weight(self.regular_conv, x, offloadable=True) + + x = deform_conv2d( + input=x, + offset=offset, + weight=weight, + bias=None, + padding=self.padding, + mask=modulator, + stride=self.stride, + ) + comfy.ops.uncast_bias_weight(self.regular_conv, weight, bias, offload_info) + return x + +class BasicDecBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, inter_channels=64, device=None, dtype=None, operations=None): + super(BasicDecBlk, self).__init__() + inter_channels = 64 + self.conv_in = operations.Conv2d(in_channels, inter_channels, 3, 1, padding=1, device=device, dtype=dtype) + self.relu_in = nn.ReLU(inplace=True) + self.dec_att = ASPPDeformable(in_channels=inter_channels, device=device, dtype=dtype, operations=operations) + self.conv_out = operations.Conv2d(inter_channels, out_channels, 3, 1, padding=1, device=device, dtype=dtype) + self.bn_in = operations.BatchNorm2d(inter_channels, device=device, dtype=dtype) + self.bn_out = operations.BatchNorm2d(out_channels, device=device, dtype=dtype) + + def forward(self, x): + x = self.conv_in(x) + x = self.bn_in(x) + x = self.relu_in(x) + x = self.dec_att(x) + x = self.conv_out(x) + x = self.bn_out(x) + return x + + +class BasicLatBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, device=None, dtype=None, operations=None): + super(BasicLatBlk, self).__init__() + self.conv = operations.Conv2d(in_channels, out_channels, 1, 1, 0, device=device, dtype=dtype) + + def forward(self, x): + x = self.conv(x) + return x + + +class _ASPPModuleDeformable(nn.Module): + def __init__(self, in_channels, planes, kernel_size, padding, device, dtype, operations): + super(_ASPPModuleDeformable, self).__init__() + self.atrous_conv = DeformableConv2d(in_channels, planes, kernel_size=kernel_size, + stride=1, padding=padding, bias=False, device=device, dtype=dtype, operations=operations) + self.bn = operations.BatchNorm2d(planes, device=device, dtype=dtype) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.atrous_conv(x) + x = self.bn(x) + + return self.relu(x) + + +class ASPPDeformable(nn.Module): + def __init__(self, in_channels, out_channels=None, parallel_block_sizes=[1, 3, 7], device=None, dtype=None, operations=None): + super(ASPPDeformable, self).__init__() + self.down_scale = 1 + if out_channels is None: + out_channels = in_channels + self.in_channelster = 256 // self.down_scale + + self.aspp1 = _ASPPModuleDeformable(in_channels, self.in_channelster, 1, padding=0, device=device, dtype=dtype, operations=operations) + self.aspp_deforms = nn.ModuleList([ + _ASPPModuleDeformable(in_channels, self.in_channelster, conv_size, padding=int(conv_size//2), device=device, dtype=dtype, operations=operations) + for conv_size in parallel_block_sizes + ]) + + self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + operations.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False, device=device, dtype=dtype), + operations.BatchNorm2d(self.in_channelster, device=device, dtype=dtype), + nn.ReLU(inplace=True)) + self.conv1 = operations.Conv2d(self.in_channelster * (2 + len(self.aspp_deforms)), out_channels, 1, bias=False, device=device, dtype=dtype) + self.bn1 = operations.BatchNorm2d(out_channels, device=device, dtype=dtype) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x1 = self.aspp1(x) + x_aspp_deforms = [aspp_deform(x) for aspp_deform in self.aspp_deforms] + x5 = self.global_avg_pool(x) + x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True) + x = torch.cat((x1, *x_aspp_deforms, x5), dim=1) + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + return x + +class BiRefNet(nn.Module): + def __init__(self, config=None, dtype=None, device=None, operations=None): + super(BiRefNet, self).__init__() + self.bb = SwinTransformer(embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=12, device=device, dtype=dtype, operations=operations) + + channels = [1536, 768, 384, 192] + channels = [c * 2 for c in channels] + self.cxt = channels[1:][::-1][-3:] + self.squeeze_module = nn.Sequential(*[ + BasicDecBlk(channels[0]+sum(self.cxt), channels[0], device=device, dtype=dtype, operations=operations) + for _ in range(1) + ]) + + self.decoder = Decoder(channels, device=device, dtype=dtype, operations=operations) + + def forward_enc(self, x): + x1, x2, x3, x4 = self.bb(x) + B, C, H, W = x.shape + x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True)) + x1 = torch.cat([x1, F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x2 = torch.cat([x2, F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x3 = torch.cat([x3, F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x4 = torch.cat([x4, F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x4 = torch.cat( + ( + *[ + F.interpolate(x1, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x2, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x3, size=x4.shape[2:], mode='bilinear', align_corners=True), + ][-len(CXT):], + x4 + ), + dim=1 + ) + return (x1, x2, x3, x4) + + def forward_ori(self, x): + (x1, x2, x3, x4) = self.forward_enc(x) + x4 = self.squeeze_module(x4) + features = [x, x1, x2, x3, x4] + scaled_preds = self.decoder(features) + return scaled_preds + + def forward(self, pixel_values, intermediate_output=None): + scaled_preds = self.forward_ori(pixel_values) + return scaled_preds + + +class Decoder(nn.Module): + def __init__(self, channels, device, dtype, operations): + super(Decoder, self).__init__() + # factory kwargs + fk = {"device":device, "dtype":dtype, "operations":operations} + DecoderBlock = partial(BasicDecBlk, **fk) + LateralBlock = partial(BasicLatBlk, **fk) + DBlock = partial(SimpleConvs, **fk) + + self.split = True + N_dec_ipt = 64 + ic = 64 + ipt_cha_opt = 1 + self.ipt_blk5 = DBlock(2**10*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk4 = DBlock(2**8*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk3 = DBlock(2**6*3 if self.split else 3, [N_dec_ipt, channels[1]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk2 = DBlock(2**4*3 if self.split else 3, [N_dec_ipt, channels[2]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk1 = DBlock(2**0*3 if self.split else 3, [N_dec_ipt, channels[3]//8][ipt_cha_opt], inter_channels=ic) + + self.decoder_block4 = DecoderBlock(channels[0]+([N_dec_ipt, channels[0]//8][ipt_cha_opt]), channels[1]) + self.decoder_block3 = DecoderBlock(channels[1]+([N_dec_ipt, channels[0]//8][ipt_cha_opt]), channels[2]) + self.decoder_block2 = DecoderBlock(channels[2]+([N_dec_ipt, channels[1]//8][ipt_cha_opt]), channels[3]) + self.decoder_block1 = DecoderBlock(channels[3]+([N_dec_ipt, channels[2]//8][ipt_cha_opt]), channels[3]//2) + + fk = {"device":device, "dtype":dtype} + + self.conv_out1 = nn.Sequential(operations.Conv2d(channels[3]//2+([N_dec_ipt, channels[3]//8][ipt_cha_opt]), 1, 1, 1, 0, **fk)) + + self.lateral_block4 = LateralBlock(channels[1], channels[1]) + self.lateral_block3 = LateralBlock(channels[2], channels[2]) + self.lateral_block2 = LateralBlock(channels[3], channels[3]) + + self.conv_ms_spvn_4 = operations.Conv2d(channels[1], 1, 1, 1, 0, **fk) + self.conv_ms_spvn_3 = operations.Conv2d(channels[2], 1, 1, 1, 0, **fk) + self.conv_ms_spvn_2 = operations.Conv2d(channels[3], 1, 1, 1, 0, **fk) + + _N = 16 + + self.gdt_convs_4 = nn.Sequential(operations.Conv2d(channels[0] // 2, _N, 3, 1, 1, **fk), operations.BatchNorm2d(_N, **fk), nn.ReLU(inplace=True)) + self.gdt_convs_3 = nn.Sequential(operations.Conv2d(channels[1] // 2, _N, 3, 1, 1, **fk), operations.BatchNorm2d(_N, **fk), nn.ReLU(inplace=True)) + self.gdt_convs_2 = nn.Sequential(operations.Conv2d(channels[2] // 2, _N, 3, 1, 1, **fk), operations.BatchNorm2d(_N, **fk), nn.ReLU(inplace=True)) + + [setattr(self, f"gdt_convs_pred_{i}", nn.Sequential(operations.Conv2d(_N, 1, 1, 1, 0, **fk))) for i in range(2, 5)] + [setattr(self, f"gdt_convs_attn_{i}", nn.Sequential(operations.Conv2d(_N, 1, 1, 1, 0, **fk))) for i in range(2, 5)] + + def get_patches_batch(self, x, p): + _size_h, _size_w = p.shape[2:] + patches_batch = [] + for idx in range(x.shape[0]): + columns_x = torch.split(x[idx], split_size_or_sections=_size_w, dim=-1) + patches_x = [] + for column_x in columns_x: + patches_x += [p.unsqueeze(0) for p in torch.split(column_x, split_size_or_sections=_size_h, dim=-2)] + patch_sample = torch.cat(patches_x, dim=1) + patches_batch.append(patch_sample) + return torch.cat(patches_batch, dim=0) + + def forward(self, features): + x, x1, x2, x3, x4 = features + + patches_batch = self.get_patches_batch(x, x4) if self.split else x + x4 = torch.cat((x4, self.ipt_blk5(F.interpolate(patches_batch, size=x4.shape[2:], mode='bilinear', align_corners=True))), 1) + p4 = self.decoder_block4(x4) + p4_gdt = self.gdt_convs_4(p4) + gdt_attn_4 = self.gdt_convs_attn_4(p4_gdt).sigmoid() + p4 = p4 * gdt_attn_4 + _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True) + _p3 = _p4 + self.lateral_block4(x3) + + patches_batch = self.get_patches_batch(x, _p3) if self.split else x + _p3 = torch.cat((_p3, self.ipt_blk4(F.interpolate(patches_batch, size=x3.shape[2:], mode='bilinear', align_corners=True))), 1) + p3 = self.decoder_block3(_p3) + + p3_gdt = self.gdt_convs_3(p3) + gdt_attn_3 = self.gdt_convs_attn_3(p3_gdt).sigmoid() + p3 = p3 * gdt_attn_3 + _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True) + _p2 = _p3 + self.lateral_block3(x2) + + patches_batch = self.get_patches_batch(x, _p2) if self.split else x + _p2 = torch.cat((_p2, self.ipt_blk3(F.interpolate(patches_batch, size=x2.shape[2:], mode='bilinear', align_corners=True))), 1) + p2 = self.decoder_block2(_p2) + + p2_gdt = self.gdt_convs_2(p2) + gdt_attn_2 = self.gdt_convs_attn_2(p2_gdt).sigmoid() + p2 = p2 * gdt_attn_2 + + _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True) + _p1 = _p2 + self.lateral_block2(x1) + + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk2(F.interpolate(patches_batch, size=x1.shape[2:], mode='bilinear', align_corners=True))), 1) + _p1 = self.decoder_block1(_p1) + _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True) + + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk1(F.interpolate(patches_batch, size=x.shape[2:], mode='bilinear', align_corners=True))), 1) + p1_out = self.conv_out1(_p1) + return p1_out + + +class SimpleConvs(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, inter_channels=64, device=None, dtype=None, operations=None + ) -> None: + super().__init__() + self.conv1 = operations.Conv2d(in_channels, inter_channels, 3, 1, 1, device=device, dtype=dtype) + self.conv_out = operations.Conv2d(inter_channels, out_channels, 3, 1, 1, device=device, dtype=dtype) + + def forward(self, x): + return self.conv_out(self.conv1(x)) diff --git a/comfy/bg_removal_model.py b/comfy/bg_removal_model.py new file mode 100644 index 000000000..7877afd7f --- /dev/null +++ b/comfy/bg_removal_model.py @@ -0,0 +1,78 @@ +from .utils import load_torch_file +import os +import json +import torch +import logging + +import comfy.ops +import comfy.model_patcher +import comfy.model_management +import comfy.clip_model +import comfy.background_removal.birefnet + +BG_REMOVAL_MODELS = { + "birefnet": comfy.background_removal.birefnet.BiRefNet +} + +class BackgroundRemovalModel(): + def __init__(self, json_config): + with open(json_config) as f: + config = json.load(f) + + self.image_size = config.get("image_size", 1024) + self.image_mean = config.get("image_mean", [0.0, 0.0, 0.0]) + self.image_std = config.get("image_std", [1.0, 1.0, 1.0]) + self.model_type = config.get("model_type", "birefnet") + self.config = config.copy() + model_class = BG_REMOVAL_MODELS.get(self.model_type) + + self.load_device = comfy.model_management.text_encoder_device() + offload_device = comfy.model_management.text_encoder_offload_device() + self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) + self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast) + self.model.eval() + + self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) + + def load_sd(self, sd): + return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic()) + + def get_sd(self): + return self.model.state_dict() + + def encode_image(self, image): + comfy.model_management.load_model_gpu(self.patcher) + H, W = image.shape[1], image.shape[2] + pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=False) + out = self.model(pixel_values=pixel_values) + out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False) + + mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) + if mask.ndim == 3: + mask = mask.unsqueeze(0) + if mask.shape[1] != 1: + mask = mask.movedim(-1, 1) + + return mask + + +def load_background_removal_model(sd): + if "bb.layers.1.blocks.0.attn.relative_position_index" in sd: + json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "background_removal"), "birefnet.json") + else: + return None + + bg_model = BackgroundRemovalModel(json_config) + m, u = bg_model.load_sd(sd) + if len(m) > 0: + logging.warning("missing background removal: {}".format(m)) + u = set(u) + keys = list(sd.keys()) + for k in keys: + if k not in u: + sd.pop(k) + return bg_model + +def load(ckpt_path): + sd = load_torch_file(ckpt_path) + return load_background_removal_model(sd) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 87a9d7e9b..d9baee068 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -90,8 +90,8 @@ parser.add_argument("--force-channels-last", action="store_true", help="Force ch parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.") parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.") -parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.") parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.") +parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.") class LatentPreviewMethod(enum.Enum): NoPreviews = "none" @@ -141,8 +141,7 @@ manager_group.add_argument("--enable-manager-legacy-ui", action="store_true", he vram_group = parser.add_mutually_exclusive_group() vram_group.add_argument("--gpu-only", action="store_true", help="Store and run everything (text encoders/CLIP models, etc... on the GPU).") vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.") -vram_group.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.") -vram_group.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.") +vram_group.add_argument("--lowvram", action="store_true", help="Doesn't do anything if dynamic vram is enabled. If dynamic vram isn't being used this option makes the text encoders run on the CPU.") vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.") vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") @@ -238,6 +237,8 @@ database_default_path = os.path.abspath( ) parser.add_argument("--database-url", type=str, default=f"sqlite:///{database_default_path}", help="Specify the database URL, e.g. for an in-memory database you can use 'sqlite:///:memory:'.") parser.add_argument("--enable-assets", action="store_true", help="Enable the assets system (API routes, database synchronization, and background scanning).") +parser.add_argument("--feature-flag", type=str, action='append', default=[], metavar="KEY[=VALUE]", help="Set a server feature flag. Use KEY=VALUE to set an explicit value, or bare KEY to set it to true. Can be specified multiple times. Boolean values (true/false) and numbers are auto-converted. Examples: --feature-flag show_signin_button=true or --feature-flag show_signin_button") +parser.add_argument("--list-feature-flags", action="store_true", help="Print the registry of known CLI-settable feature flags as JSON and exit.") parser.add_argument("--dev-mode", action="store_true", help="Enable developer mode. Activates trainer VRAM profiling (forces batch_size=1, steps=1) and verbose debug logging for weight adapter systems.") diff --git a/comfy/context_windows.py b/comfy/context_windows.py index cb44ee6e8..db57537a2 100644 --- a/comfy/context_windows.py +++ b/comfy/context_windows.py @@ -63,7 +63,11 @@ class IndexListContextWindow(ContextWindowABC): dim = self.dim if dim == 0 and full.shape[dim] == 1: return full - idx = tuple([slice(None)] * dim + [self.index_list]) + indices = self.index_list + anchor_idx = getattr(self, 'causal_anchor_index', None) + if anchor_idx is not None and anchor_idx >= 0: + indices = [anchor_idx] + list(indices) + idx = tuple([slice(None)] * dim + [indices]) window = full[idx] if retain_index_list: idx = tuple([slice(None)] * dim + [retain_index_list]) @@ -113,7 +117,14 @@ def slice_cond(cond_value, window: IndexListContextWindow, x_in: torch.Tensor, d # skip leading latent positions that have no corresponding conditioning (e.g. reference frames) if temporal_offset > 0: - indices = [i - temporal_offset for i in window.index_list[temporal_offset:]] + anchor_idx = getattr(window, 'causal_anchor_index', None) + if anchor_idx is not None and anchor_idx >= 0: + # anchor occupies one of the no-cond positions, so skip one fewer from window.index_list + skip_count = temporal_offset - 1 + else: + skip_count = temporal_offset + + indices = [i - temporal_offset for i in window.index_list[skip_count:]] indices = [i for i in indices if 0 <= i] else: indices = list(window.index_list) @@ -150,7 +161,8 @@ class ContextFuseMethod: ContextResults = collections.namedtuple("ContextResults", ['window_idx', 'sub_conds_out', 'sub_conds', 'window']) class IndexListContextHandler(ContextHandlerABC): def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1, - closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False): + closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False, + causal_window_fix: bool=True): self.context_schedule = context_schedule self.fuse_method = fuse_method self.context_length = context_length @@ -162,6 +174,7 @@ class IndexListContextHandler(ContextHandlerABC): self.freenoise = freenoise self.cond_retain_index_list = [int(x.strip()) for x in cond_retain_index_list.split(",")] if cond_retain_index_list else [] self.split_conds_to_windows = split_conds_to_windows + self.causal_window_fix = causal_window_fix self.callbacks = {} @@ -318,6 +331,14 @@ class IndexListContextHandler(ContextHandlerABC): # allow processing to end between context window executions for faster Cancel comfy.model_management.throw_exception_if_processing_interrupted() + # causal_window_fix: prepend a pre-window frame that will be stripped post-forward + anchor_applied = False + if self.causal_window_fix: + anchor_idx = window.index_list[0] - 1 + if 0 <= anchor_idx < x_in.size(self.dim): + window.causal_anchor_index = anchor_idx + anchor_applied = True + for callback in comfy.patcher_extension.get_all_callbacks(IndexListCallbacks.EVALUATE_CONTEXT_WINDOWS, self.callbacks): callback(self, model, x_in, conds, timestep, model_options, window_idx, window, model_options, device, first_device) @@ -332,6 +353,12 @@ class IndexListContextHandler(ContextHandlerABC): if device is not None: for i in range(len(sub_conds_out)): sub_conds_out[i] = sub_conds_out[i].to(x_in.device) + + # strip causal_window_fix anchor if applied + if anchor_applied: + for i in range(len(sub_conds_out)): + sub_conds_out[i] = sub_conds_out[i].narrow(self.dim, 1, sub_conds_out[i].shape[self.dim] - 1) + results.append(ContextResults(window_idx, sub_conds_out, sub_conds, window)) return results diff --git a/comfy/deploy_environment.py b/comfy/deploy_environment.py new file mode 100644 index 000000000..8c99a3584 --- /dev/null +++ b/comfy/deploy_environment.py @@ -0,0 +1,34 @@ +import functools +import logging +import os + +logger = logging.getLogger(__name__) + +_DEFAULT_DEPLOY_ENV = "local-git" +_ENV_FILENAME = ".comfy_environment" + +# Resolve the ComfyUI install directory (the parent of this `comfy/` package). +# We deliberately avoid `folder_paths.base_path` here because that is overridden +# by the `--base-directory` CLI arg to a user-supplied path, whereas the +# `.comfy_environment` marker is written by launchers/installers next to the +# ComfyUI install itself. +_COMFY_INSTALL_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + + +@functools.cache +def get_deploy_environment() -> str: + env_file = os.path.join(_COMFY_INSTALL_DIR, _ENV_FILENAME) + try: + with open(env_file, encoding="utf-8") as f: + # Cap the read so a malformed or maliciously crafted file (e.g. + # a single huge line with no newline) can't blow up memory. + first_line = f.readline(128).strip() + value = "".join(c for c in first_line if 32 <= ord(c) < 127) + if value: + return value + except FileNotFoundError: + pass + except Exception as e: + logger.error("Failed to read %s: %s", env_file, e) + + return _DEFAULT_DEPLOY_ENV diff --git a/comfy/hooks.py b/comfy/hooks.py index 1a76c7ba4..5458fc3d8 100644 --- a/comfy/hooks.py +++ b/comfy/hooks.py @@ -93,7 +93,7 @@ class Hook: self.hook_scope = hook_scope '''Scope of where this hook should apply in terms of the conds used in sampling run.''' self.custom_should_register = default_should_register - '''Can be overriden with a compatible function to decide if this hook should be registered without the need to override .should_register''' + '''Can be overridden with a compatible function to decide if this hook should be registered without the need to override .should_register''' @property def strength(self): diff --git a/comfy/image_encoders/dino2.py b/comfy/image_encoders/dino2.py index 9b6dace9d..ee86f8309 100644 --- a/comfy/image_encoders/dino2.py +++ b/comfy/image_encoders/dino2.py @@ -106,6 +106,7 @@ class Dino2Encoder(torch.nn.Module): class Dino2PatchEmbeddings(torch.nn.Module): def __init__(self, dim, num_channels=3, patch_size=14, image_size=518, dtype=None, device=None, operations=None): super().__init__() + self.patch_size = patch_size self.projection = operations.Conv2d( in_channels=num_channels, out_channels=dim, @@ -125,17 +126,37 @@ class Dino2Embeddings(torch.nn.Module): super().__init__() patch_size = 14 image_size = 518 + self.patch_size = patch_size self.patch_embeddings = Dino2PatchEmbeddings(dim, patch_size=patch_size, image_size=image_size, dtype=dtype, device=device, operations=operations) self.position_embeddings = torch.nn.Parameter(torch.empty(1, (image_size // patch_size) ** 2 + 1, dim, dtype=dtype, device=device)) - self.cls_token = torch.nn.Parameter(torch.empty(1, 1, dim, dtype=dtype, device=device)) + self.cls_token = torch.nn.Parameter(torch.empty(1, 1, dim, dtype=dtype, device=device)) # mask_token is a pre-training param, kept only so strict loading accepts the key. self.mask_token = torch.nn.Parameter(torch.empty(1, dim, dtype=dtype, device=device)) + def interpolate_pos_encoding(self, x, h_pixels, w_pixels): + pos_embed = comfy.model_management.cast_to_device(self.position_embeddings, x.device, torch.float32) + + class_pos = pos_embed[:, 0:1] + patch_pos = pos_embed[:, 1:] + N = patch_pos.shape[1] + M = int(N ** 0.5) + h0 = h_pixels // self.patch_size + w0 = w_pixels // self.patch_size + scale_factor = ((h0 + 0.1) / M, (w0 + 0.1) / M) # +0.1 matches upstream DINOv2's FP-rounding workaround so the interpolate output size lands on (h0, w0). + + patch_pos = patch_pos.reshape(1, M, M, -1).permute(0, 3, 1, 2) + patch_pos = torch.nn.functional.interpolate(patch_pos, scale_factor=scale_factor, mode="bicubic", antialias=False) + patch_pos = patch_pos.permute(0, 2, 3, 1).flatten(1, 2) + return torch.cat((class_pos, patch_pos), dim=1).to(x.dtype) + def forward(self, pixel_values): x = self.patch_embeddings(pixel_values) - # TODO: mask_token? x = torch.cat((self.cls_token.to(device=x.device, dtype=x.dtype).expand(x.shape[0], -1, -1), x), dim=1) - x = x + comfy.model_management.cast_to_device(self.position_embeddings, x.device, x.dtype) + if x.shape[1] - 1 == self.position_embeddings.shape[1] - 1: + x = x + comfy.model_management.cast_to_device(self.position_embeddings, x.device, x.dtype) + else: + h, w = pixel_values.shape[-2:] + x = x + self.interpolate_pos_encoding(x, h, w) return x @@ -158,3 +179,21 @@ class Dinov2Model(torch.nn.Module): x = self.layernorm(x) pooled_output = x[:, 0, :] return x, i, pooled_output, None + + def get_intermediate_layers(self, pixel_values, indices, apply_norm=True): + x = self.embeddings(pixel_values) + optimized_attention = optimized_attention_for_device(x.device, False, small_input=True) + n_layers = len(self.encoder.layer) + resolved = [(i if i >= 0 else n_layers + i) for i in indices] + target = set(resolved) + max_idx = max(resolved) + n_skip = 1 # skip cls token + cache = {} + for i, layer in enumerate(self.encoder.layer): + x = layer(x, optimized_attention) + if i in target: + normed = self.layernorm(x) if apply_norm else x + cache[i] = (normed[:, n_skip:], normed[:, 0]) + if i >= max_idx: + break + return [cache[i] for i in resolved] diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 6978eb717..11db46d94 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -242,6 +242,7 @@ def sample_euler_ancestral_RF(model, x, sigmas, extra_args=None, callback=None, extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) s_in = x.new_ones([x.shape[0]]) for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) @@ -373,6 +374,7 @@ def sample_dpm_2_ancestral_RF(model, x, sigmas, extra_args=None, callback=None, extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) s_in = x.new_ones([x.shape[0]]) for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) @@ -686,6 +688,7 @@ def sample_dpmpp_2s_ancestral_RF(model, x, sigmas, extra_args=None, callback=Non extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) s_in = x.new_ones([x.shape[0]]) sigma_fn = lambda lbda: (lbda.exp() + 1) ** -1 lambda_fn = lambda sigma: ((1-sigma)/sigma).log() @@ -747,6 +750,7 @@ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=N sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling) lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) @@ -832,6 +836,7 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling') lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) old_denoised = None h, h_last = None, None @@ -889,6 +894,7 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling') lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) denoised_1, denoised_2 = None, None h, h_1, h_2 = None, None, None @@ -1006,23 +1012,39 @@ def sample_ddpm(model, x, sigmas, extra_args=None, callback=None, disable=None, return generic_step_sampler(model, x, sigmas, extra_args, callback, disable, noise_sampler, DDPMSampler_step) @torch.no_grad() -def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None): +def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, s_noise=1.0, s_noise_end=None, noise_clip_std=0.0): + + # s_noise / s_noise_end: per-step noise multiplier, linearly interpolated across steps + # noise_clip_std: clamp injected noise to +/- N stddevs (0 disables). + extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler s_in = x.new_ones([x.shape[0]]) - for i in trange(len(sigmas) - 1, disable=disable): + n_steps = max(1, len(sigmas) - 1) + model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling') + + s_start = float(s_noise) + s_end = s_start if s_noise_end is None else float(s_noise_end) + for i in trange(n_steps, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) if callback is not None: callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) x = denoised if sigmas[i + 1] > 0: - x = model.inner_model.inner_model.model_sampling.noise_scaling(sigmas[i + 1], noise_sampler(sigmas[i], sigmas[i + 1]), x) + noise = noise_sampler(sigmas[i], sigmas[i + 1]) + if noise_clip_std > 0: + clip_val = noise_clip_std * noise.std() + noise = noise.clamp(min=-clip_val, max=clip_val) + t = (i / (n_steps - 1)) if n_steps > 1 else 0.0 + s_noise_i = s_start + (s_end - s_start) * t + if s_noise_i != 1.0: + noise = noise * s_noise_i + x = model_sampling.noise_scaling(sigmas[i + 1], noise, x) return x - @torch.no_grad() def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): # From MIT licensed: https://github.com/Carzit/sd-webui-samplers-scheduler/ @@ -1249,6 +1271,7 @@ def sample_euler_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=No model_sampling = model.inner_model.model_patcher.get_model_object("model_sampling") lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) uncond_denoised = None @@ -1296,6 +1319,7 @@ def sample_dpmpp_2s_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) temp = [0] def post_cfg_function(args): @@ -1371,6 +1395,7 @@ def res_multistep(model, x, sigmas, extra_args=None, callback=None, disable=None extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) s_in = x.new_ones([x.shape[0]]) sigma_fn = lambda t: t.neg().exp() t_fn = lambda sigma: sigma.log().neg() @@ -1504,6 +1529,7 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_noise = s_noise * getattr(model.inner_model.model_patcher.get_model_object('model_sampling'), "noise_scale", 1.0) s_in = x.new_ones([x.shape[0]]) def default_er_sde_noise_scaler(x): @@ -1574,9 +1600,10 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler s_in = x.new_ones([x.shape[0]]) - inject_noise = eta > 0 and s_noise > 0 model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling') + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) + inject_noise = eta > 0 and s_noise > 0 sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling) lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) @@ -1645,9 +1672,10 @@ def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=Non seed = extra_args.get("seed", None) noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler s_in = x.new_ones([x.shape[0]]) - inject_noise = eta > 0 and s_noise > 0 model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling') + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) + inject_noise = eta > 0 and s_noise > 0 sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling) lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) @@ -1713,6 +1741,7 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F s_in = x.new_ones([x.shape[0]]) model_sampling = model.inner_model.model_patcher.get_model_object("model_sampling") + s_noise = s_noise * getattr(model_sampling, "noise_scale", 1.0) sigmas = offset_first_sigma_for_snr(sigmas, model_sampling) lambdas = sigma_to_half_log_snr(sigmas, model_sampling=model_sampling) @@ -1810,3 +1839,119 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F def sample_sa_solver_pece(model, x, sigmas, extra_args=None, callback=None, disable=False, tau_func=None, s_noise=1.0, noise_sampler=None, predictor_order=3, corrector_order=4, simple_order_2=False): """Stochastic Adams Solver with PECE (Predict–Evaluate–Correct–Evaluate) mode (NeurIPS 2023).""" return sample_sa_solver(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, tau_func=tau_func, s_noise=s_noise, noise_sampler=noise_sampler, predictor_order=predictor_order, corrector_order=corrector_order, use_pece=True, simple_order_2=simple_order_2) + + +@torch.no_grad() +def sample_ar_video(model, x, sigmas, extra_args=None, callback=None, disable=None, + num_frame_per_block=1): + """ + Autoregressive video sampler: block-by-block denoising with KV cache + and flow-match re-noising for Causal Forcing / Self-Forcing models. + + Requires a Causal-WAN compatible model (diffusion_model must expose + init_kv_caches / init_crossattn_caches) and 5-D latents [B,C,T,H,W]. + + All AR-loop parameters are passed via the SamplerARVideo node, not read + from the checkpoint or transformer_options. + """ + extra_args = {} if extra_args is None else extra_args + model_options = extra_args.get("model_options", {}) + transformer_options = model_options.get("transformer_options", {}) + + if x.ndim != 5: + raise ValueError( + f"ar_video sampler requires 5-D video latents [B,C,T,H,W], got {x.ndim}-D tensor with shape {x.shape}. " + "This sampler is only compatible with autoregressive video models (e.g. Causal-WAN)." + ) + + inner_model = model.inner_model.inner_model + causal_model = inner_model.diffusion_model + + if not (hasattr(causal_model, "init_kv_caches") and hasattr(causal_model, "init_crossattn_caches")): + raise TypeError( + "ar_video sampler requires a Causal-WAN compatible model whose diffusion_model " + "exposes init_kv_caches() and init_crossattn_caches(). The loaded checkpoint " + "does not support this interface — choose a different sampler." + ) + + seed = extra_args.get("seed", 0) + + bs, c, lat_t, lat_h, lat_w = x.shape + frame_seq_len = -(-lat_h // 2) * -(-lat_w // 2) # ceiling division + num_blocks = -(-lat_t // num_frame_per_block) # ceiling division + device = x.device + model_dtype = inner_model.get_dtype() + + kv_caches = causal_model.init_kv_caches(bs, lat_t * frame_seq_len, device, model_dtype) + crossattn_caches = causal_model.init_crossattn_caches(bs, device, model_dtype) + + output = torch.zeros_like(x) + s_in = x.new_ones([x.shape[0]]) + current_start_frame = 0 + + # I2V: seed KV cache with the initial image latent before the denoising loop + initial_latent = transformer_options.get("ar_config", {}).get("initial_latent", None) + if initial_latent is not None: + initial_latent = inner_model.process_latent_in(initial_latent).to(device=device, dtype=model_dtype) + n_init = initial_latent.shape[2] + output[:, :, :n_init] = initial_latent + + ar_state = {"start_frame": 0, "kv_caches": kv_caches, "crossattn_caches": crossattn_caches} + transformer_options["ar_state"] = ar_state + zero_sigma = sigmas.new_zeros([1]) + _ = model(initial_latent, zero_sigma * s_in, **extra_args) + + current_start_frame = n_init + remaining = lat_t - n_init + num_blocks = -(-remaining // num_frame_per_block) + + num_sigma_steps = len(sigmas) - 1 + total_real_steps = num_blocks * num_sigma_steps + step_count = 0 + + try: + for block_idx in trange(num_blocks, disable=disable): + bf = min(num_frame_per_block, lat_t - current_start_frame) + fs, fe = current_start_frame, current_start_frame + bf + noisy_input = x[:, :, fs:fe] + + ar_state = { + "start_frame": current_start_frame, + "kv_caches": kv_caches, + "crossattn_caches": crossattn_caches, + } + transformer_options["ar_state"] = ar_state + + for i in range(num_sigma_steps): + denoised = model(noisy_input, sigmas[i] * s_in, **extra_args) + + if callback is not None: + scaled_i = step_count * num_sigma_steps // total_real_steps + callback({"x": noisy_input, "i": scaled_i, "sigma": sigmas[i], + "sigma_hat": sigmas[i], "denoised": denoised}) + + if sigmas[i + 1] == 0: + noisy_input = denoised + else: + sigma_next = sigmas[i + 1] + torch.manual_seed(seed + block_idx * 1000 + i) + fresh_noise = torch.randn_like(denoised) + noisy_input = (1.0 - sigma_next) * denoised + sigma_next * fresh_noise + + for cache in kv_caches: + cache["end"] -= bf * frame_seq_len + + step_count += 1 + + output[:, :, fs:fe] = noisy_input + + for cache in kv_caches: + cache["end"] -= bf * frame_seq_len + zero_sigma = sigmas.new_zeros([1]) + _ = model(noisy_input, zero_sigma * s_in, **extra_args) + + current_start_frame += bf + finally: + transformer_options.pop("ar_state", None) + + return output diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 6a57bca1c..d527eec4a 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -9,6 +9,7 @@ class LatentFormat: latent_rgb_factors_reshape = None taesd_decoder_name = None spacial_downscale_ratio = 8 + temporal_downscale_ratio = 1 def process_in(self, latent): return latent * self.scale_factor @@ -224,6 +225,7 @@ class Flux2(LatentFormat): self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851] self.latent_rgb_factors_reshape = lambda t: t.reshape(t.shape[0], 32, 2, 2, t.shape[-2], t.shape[-1]).permute(0, 1, 4, 2, 5, 3).reshape(t.shape[0], 32, t.shape[-2] * 2, t.shape[-1] * 2) + self.taesd_decoder_name = "taef2_decoder" def process_in(self, latent): return latent @@ -234,6 +236,7 @@ class Flux2(LatentFormat): class Mochi(LatentFormat): latent_channels = 12 latent_dimensions = 3 + temporal_downscale_ratio = 6 def __init__(self): self.scale_factor = 1.0 @@ -277,6 +280,7 @@ class LTXV(LatentFormat): latent_channels = 128 latent_dimensions = 3 spacial_downscale_ratio = 32 + temporal_downscale_ratio = 8 def __init__(self): self.latent_rgb_factors = [ @@ -420,6 +424,7 @@ class LTXAV(LTXV): class HunyuanVideo(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 4 scale_factor = 0.476986 latent_rgb_factors = [ [-0.0395, -0.0331, 0.0445], @@ -446,6 +451,7 @@ class HunyuanVideo(LatentFormat): class Cosmos1CV8x8x8(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 8 latent_rgb_factors = [ [ 0.1817, 0.2284, 0.2423], @@ -471,6 +477,7 @@ class Cosmos1CV8x8x8(LatentFormat): class Wan21(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 4 latent_rgb_factors = [ [-0.1299, -0.1692, 0.2932], @@ -733,6 +740,7 @@ class HunyuanVideo15(LatentFormat): latent_channels = 32 latent_dimensions = 3 spacial_downscale_ratio = 16 + temporal_downscale_ratio = 4 scale_factor = 1.03682 taesd_decoder_name = "lighttaehy1_5" @@ -783,3 +791,36 @@ class ZImagePixelSpace(ChromaRadiance): No VAE encoding/decoding — the model operates directly on RGB pixels. """ pass + + +class HiDreamO1Pixel(ChromaRadiance): + """Pixel-space latent format for HiDream-O1. + No VAE — model patches/unpatches raw RGB internally with patch_size=32. + """ + pass + +class CogVideoX(LatentFormat): + """Latent format for CogVideoX-2b (THUDM/CogVideoX-2b). + + scale_factor matches the vae/config.json scaling_factor for the 2b variant. + The 5b-class checkpoints (CogVideoX-5b, CogVideoX-1.5-5B, CogVideoX-Fun-V1.5-*) + use a different value; see CogVideoX1_5 below. + """ + latent_channels = 16 + latent_dimensions = 3 + temporal_downscale_ratio = 4 + + def __init__(self): + self.scale_factor = 1.15258426 + + +class CogVideoX1_5(CogVideoX): + """Latent format for 5b-class CogVideoX checkpoints. + + Covers THUDM/CogVideoX-5b, THUDM/CogVideoX-1.5-5B, and the CogVideoX-Fun + V1.5-5b family (including VOID inpainting). All of these have + scaling_factor=0.7 in their vae/config.json. Auto-selected in + supported_models.CogVideoX_T2V based on transformer hidden dim. + """ + def __init__(self): + self.scale_factor = 0.7 diff --git a/comfy/ldm/ace/ace_step15.py b/comfy/ldm/ace/ace_step15.py index 1d7dc59a8..2ca2d26c4 100644 --- a/comfy/ldm/ace/ace_step15.py +++ b/comfy/ldm/ace/ace_step15.py @@ -611,6 +611,7 @@ class AceStepDiTModel(nn.Module): intermediate_size, patch_size, audio_acoustic_hidden_dim, + condition_dim=None, layer_types=None, sliding_window=128, rms_norm_eps=1e-6, @@ -640,7 +641,7 @@ class AceStepDiTModel(nn.Module): self.time_embed = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations) self.time_embed_r = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations) - self.condition_embedder = Linear(hidden_size, hidden_size, dtype=dtype, device=device) + self.condition_embedder = Linear(condition_dim, hidden_size, dtype=dtype, device=device) if layer_types is None: layer_types = ["full_attention"] * num_layers @@ -1035,6 +1036,9 @@ class AceStepConditionGenerationModel(nn.Module): fsq_dim=2048, fsq_levels=[8, 8, 8, 5, 5, 5], fsq_input_num_quantizers=1, + encoder_hidden_size=2048, + encoder_intermediate_size=6144, + encoder_num_heads=16, audio_model=None, dtype=None, device=None, @@ -1054,24 +1058,24 @@ class AceStepConditionGenerationModel(nn.Module): self.decoder = AceStepDiTModel( in_channels, hidden_size, num_dit_layers, num_heads, num_kv_heads, head_dim, - intermediate_size, patch_size, audio_acoustic_hidden_dim, + intermediate_size, patch_size, audio_acoustic_hidden_dim, condition_dim=encoder_hidden_size, layer_types=layer_types, sliding_window=sliding_window, rms_norm_eps=rms_norm_eps, dtype=dtype, device=device, operations=operations ) self.encoder = AceStepConditionEncoder( - text_hidden_dim, timbre_hidden_dim, hidden_size, num_lyric_layers, num_timbre_layers, - num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps, + text_hidden_dim, timbre_hidden_dim, encoder_hidden_size, num_lyric_layers, num_timbre_layers, + encoder_num_heads, num_kv_heads, head_dim, encoder_intermediate_size, rms_norm_eps, dtype=dtype, device=device, operations=operations ) self.tokenizer = AceStepAudioTokenizer( - audio_acoustic_hidden_dim, hidden_size, pool_window_size, fsq_dim=fsq_dim, fsq_levels=fsq_levels, fsq_input_num_quantizers=fsq_input_num_quantizers, num_layers=num_tokenizer_layers, head_dim=head_dim, rms_norm_eps=rms_norm_eps, + audio_acoustic_hidden_dim, encoder_hidden_size, pool_window_size, fsq_dim=fsq_dim, fsq_levels=fsq_levels, fsq_input_num_quantizers=fsq_input_num_quantizers, num_layers=num_tokenizer_layers, head_dim=head_dim, rms_norm_eps=rms_norm_eps, dtype=dtype, device=device, operations=operations ) self.detokenizer = AudioTokenDetokenizer( - hidden_size, pool_window_size, audio_acoustic_hidden_dim, num_layers=2, head_dim=head_dim, + encoder_hidden_size, pool_window_size, audio_acoustic_hidden_dim, num_layers=2, head_dim=head_dim, dtype=dtype, device=device, operations=operations ) - self.null_condition_emb = nn.Parameter(torch.empty(1, 1, hidden_size, dtype=dtype, device=device)) + self.null_condition_emb = nn.Parameter(torch.empty(1, 1, encoder_hidden_size, dtype=dtype, device=device)) def prepare_condition( self, diff --git a/comfy/ldm/cogvideo/__init__.py b/comfy/ldm/cogvideo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/comfy/ldm/cogvideo/model.py b/comfy/ldm/cogvideo/model.py new file mode 100644 index 000000000..fb475ed53 --- /dev/null +++ b/comfy/ldm/cogvideo/model.py @@ -0,0 +1,573 @@ +# CogVideoX 3D Transformer - ported to ComfyUI native ops +# Architecture reference: diffusers CogVideoXTransformer3DModel +# Style reference: comfy/ldm/wan/model.py + +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +from comfy.ldm.modules.attention import optimized_attention +import comfy.patcher_extension +import comfy.ldm.common_dit + + +def _get_1d_rotary_pos_embed(dim, pos, theta=10000.0): + """Returns (cos, sin) each with shape [seq_len, dim]. + + Frequencies are computed at dim//2 resolution then repeat_interleaved + to full dim, matching CogVideoX's interleaved (real, imag) pair format. + """ + freqs = 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=torch.float32, device=pos.device) / dim)) + angles = torch.outer(pos.float(), freqs.float()) + cos = angles.cos().repeat_interleave(2, dim=-1).float() + sin = angles.sin().repeat_interleave(2, dim=-1).float() + return (cos, sin) + + +def apply_rotary_emb(x, freqs_cos_sin): + """Apply CogVideoX rotary embedding to query or key tensor. + + x: [B, heads, seq_len, head_dim] + freqs_cos_sin: (cos, sin) each [seq_len, head_dim//2] + + Uses interleaved pair rotation (same as diffusers CogVideoX/Flux). + head_dim is reshaped to (-1, 2) pairs, rotated, then flattened back. + """ + cos, sin = freqs_cos_sin + cos = cos[None, None, :, :].to(x.device) + sin = sin[None, None, :, :].to(x.device) + + # Interleaved pairs: [B, H, S, D] -> [B, H, S, D//2, 2] -> (real, imag) + x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3) + + return (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + + +def get_timestep_embedding(timesteps, dim, flip_sin_to_cos=True, downscale_freq_shift=0, scale=1, max_period=10000): + half = dim // 2 + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half) + args = timesteps[:, None].float() * freqs[None] * scale + embedding = torch.cat([torch.sin(args), torch.cos(args)], dim=-1) + if flip_sin_to_cos: + embedding = torch.cat([embedding[:, half:], embedding[:, :half]], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + +def get_3d_sincos_pos_embed(embed_dim, spatial_size, temporal_size, spatial_interpolation_scale=1.0, temporal_interpolation_scale=1.0, device=None): + if isinstance(spatial_size, int): + spatial_size = (spatial_size, spatial_size) + + grid_w = torch.arange(spatial_size[0], dtype=torch.float32, device=device) / spatial_interpolation_scale + grid_h = torch.arange(spatial_size[1], dtype=torch.float32, device=device) / spatial_interpolation_scale + grid_t = torch.arange(temporal_size, dtype=torch.float32, device=device) / temporal_interpolation_scale + + grid_t, grid_h, grid_w = torch.meshgrid(grid_t, grid_h, grid_w, indexing="ij") + + embed_dim_spatial = 2 * (embed_dim // 3) + embed_dim_temporal = embed_dim // 3 + + pos_embed_spatial = _get_2d_sincos_pos_embed(embed_dim_spatial, grid_h, grid_w, device=device) + pos_embed_temporal = _get_1d_sincos_pos_embed(embed_dim_temporal, grid_t[:, 0, 0], device=device) + + T, H, W = grid_t.shape + pos_embed_temporal = pos_embed_temporal.unsqueeze(1).unsqueeze(1).expand(-1, H, W, -1) + pos_embed = torch.cat([pos_embed_temporal, pos_embed_spatial], dim=-1) + + return pos_embed + + +def _get_2d_sincos_pos_embed(embed_dim, grid_h, grid_w, device=None): + T, H, W = grid_h.shape + half_dim = embed_dim // 2 + pos_h = _get_1d_sincos_pos_embed(half_dim, grid_h.reshape(-1), device=device).reshape(T, H, W, half_dim) + pos_w = _get_1d_sincos_pos_embed(half_dim, grid_w.reshape(-1), device=device).reshape(T, H, W, half_dim) + return torch.cat([pos_h, pos_w], dim=-1) + + +def _get_1d_sincos_pos_embed(embed_dim, pos, device=None): + half = embed_dim // 2 + freqs = torch.exp(-math.log(10000.0) * torch.arange(start=0, end=half, dtype=torch.float32, device=device) / half) + args = pos.float().reshape(-1)[:, None] * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if embed_dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + + +class CogVideoXPatchEmbed(nn.Module): + def __init__(self, patch_size=2, patch_size_t=None, in_channels=16, dim=1920, + text_dim=4096, bias=True, sample_width=90, sample_height=60, + sample_frames=49, temporal_compression_ratio=4, + max_text_seq_length=226, spatial_interpolation_scale=1.875, + temporal_interpolation_scale=1.0, use_positional_embeddings=True, + use_learned_positional_embeddings=True, + device=None, dtype=None, operations=None): + super().__init__() + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.dim = dim + self.sample_height = sample_height + self.sample_width = sample_width + self.sample_frames = sample_frames + self.temporal_compression_ratio = temporal_compression_ratio + self.max_text_seq_length = max_text_seq_length + self.spatial_interpolation_scale = spatial_interpolation_scale + self.temporal_interpolation_scale = temporal_interpolation_scale + self.use_positional_embeddings = use_positional_embeddings + self.use_learned_positional_embeddings = use_learned_positional_embeddings + + if patch_size_t is None: + self.proj = operations.Conv2d(in_channels, dim, kernel_size=patch_size, stride=patch_size, bias=bias, device=device, dtype=dtype) + else: + self.proj = operations.Linear(in_channels * patch_size * patch_size * patch_size_t, dim, device=device, dtype=dtype) + + self.text_proj = operations.Linear(text_dim, dim, device=device, dtype=dtype) + + if use_positional_embeddings or use_learned_positional_embeddings: + persistent = use_learned_positional_embeddings + pos_embedding = self._get_positional_embeddings(sample_height, sample_width, sample_frames) + self.register_buffer("pos_embedding", pos_embedding, persistent=persistent) + + def _get_positional_embeddings(self, sample_height, sample_width, sample_frames, device=None): + post_patch_height = sample_height // self.patch_size + post_patch_width = sample_width // self.patch_size + post_time_compression_frames = (sample_frames - 1) // self.temporal_compression_ratio + 1 + if self.patch_size_t is not None: + post_time_compression_frames = post_time_compression_frames // self.patch_size_t + num_patches = post_patch_height * post_patch_width * post_time_compression_frames + + pos_embedding = get_3d_sincos_pos_embed( + self.dim, + (post_patch_width, post_patch_height), + post_time_compression_frames, + self.spatial_interpolation_scale, + self.temporal_interpolation_scale, + device=device, + ) + pos_embedding = pos_embedding.reshape(-1, self.dim) + joint_pos_embedding = pos_embedding.new_zeros( + 1, self.max_text_seq_length + num_patches, self.dim, requires_grad=False + ) + joint_pos_embedding.data[:, self.max_text_seq_length:].copy_(pos_embedding) + return joint_pos_embedding + + def forward(self, text_embeds, image_embeds): + input_dtype = text_embeds.dtype + text_embeds = self.text_proj(text_embeds.to(self.text_proj.weight.dtype)).to(input_dtype) + batch_size, num_frames, channels, height, width = image_embeds.shape + + proj_dtype = self.proj.weight.dtype + if self.patch_size_t is None: + image_embeds = image_embeds.reshape(-1, channels, height, width) + image_embeds = self.proj(image_embeds.to(proj_dtype)).to(input_dtype) + image_embeds = image_embeds.view(batch_size, num_frames, *image_embeds.shape[1:]) + image_embeds = image_embeds.flatten(3).transpose(2, 3) + image_embeds = image_embeds.flatten(1, 2) + else: + p = self.patch_size + p_t = self.patch_size_t + image_embeds = image_embeds.permute(0, 1, 3, 4, 2) + image_embeds = image_embeds.reshape( + batch_size, num_frames // p_t, p_t, height // p, p, width // p, p, channels + ) + image_embeds = image_embeds.permute(0, 1, 3, 5, 7, 2, 4, 6).flatten(4, 7).flatten(1, 3) + image_embeds = self.proj(image_embeds.to(proj_dtype)).to(input_dtype) + + embeds = torch.cat([text_embeds, image_embeds], dim=1).contiguous() + + if self.use_positional_embeddings or self.use_learned_positional_embeddings: + text_seq_length = text_embeds.shape[1] + num_image_patches = image_embeds.shape[1] + + if self.use_learned_positional_embeddings: + image_pos = self.pos_embedding[ + :, self.max_text_seq_length:self.max_text_seq_length + num_image_patches + ].to(device=embeds.device, dtype=embeds.dtype) + else: + image_pos = get_3d_sincos_pos_embed( + self.dim, + (width // self.patch_size, height // self.patch_size), + num_image_patches // ((height // self.patch_size) * (width // self.patch_size)), + self.spatial_interpolation_scale, + self.temporal_interpolation_scale, + device=embeds.device, + ).reshape(1, num_image_patches, self.dim).to(dtype=embeds.dtype) + + # Build joint: zeros for text + sincos for image + joint_pos = torch.zeros(1, text_seq_length + num_image_patches, self.dim, device=embeds.device, dtype=embeds.dtype) + joint_pos[:, text_seq_length:] = image_pos + embeds = embeds + joint_pos + + return embeds + + +class CogVideoXLayerNormZero(nn.Module): + def __init__(self, time_dim, dim, elementwise_affine=True, eps=1e-5, bias=True, + device=None, dtype=None, operations=None): + super().__init__() + self.silu = nn.SiLU() + self.linear = operations.Linear(time_dim, 6 * dim, bias=bias, device=device, dtype=dtype) + self.norm = operations.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine, device=device, dtype=dtype) + + def forward(self, hidden_states, encoder_hidden_states, temb): + shift, scale, gate, enc_shift, enc_scale, enc_gate = self.linear(self.silu(temb)).chunk(6, dim=1) + hidden_states = self.norm(hidden_states) * (1 + scale)[:, None, :] + shift[:, None, :] + encoder_hidden_states = self.norm(encoder_hidden_states) * (1 + enc_scale)[:, None, :] + enc_shift[:, None, :] + return hidden_states, encoder_hidden_states, gate[:, None, :], enc_gate[:, None, :] + + +class CogVideoXAdaLayerNorm(nn.Module): + def __init__(self, time_dim, dim, elementwise_affine=True, eps=1e-5, + device=None, dtype=None, operations=None): + super().__init__() + self.silu = nn.SiLU() + self.linear = operations.Linear(time_dim, 2 * dim, device=device, dtype=dtype) + self.norm = operations.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine, device=device, dtype=dtype) + + def forward(self, x, temb): + temb = self.linear(self.silu(temb)) + shift, scale = temb.chunk(2, dim=1) + x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +class CogVideoXBlock(nn.Module): + def __init__(self, dim, num_heads, head_dim, time_dim, + eps=1e-5, ff_inner_dim=None, ff_bias=True, + device=None, dtype=None, operations=None): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.head_dim = head_dim + + self.norm1 = CogVideoXLayerNormZero(time_dim, dim, eps=eps, device=device, dtype=dtype, operations=operations) + + # Self-attention (joint text + latent) + self.q = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype) + self.k = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype) + self.v = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype) + self.norm_q = operations.LayerNorm(head_dim, eps=1e-6, elementwise_affine=True, device=device, dtype=dtype) + self.norm_k = operations.LayerNorm(head_dim, eps=1e-6, elementwise_affine=True, device=device, dtype=dtype) + self.attn_out = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype) + + self.norm2 = CogVideoXLayerNormZero(time_dim, dim, eps=eps, device=device, dtype=dtype, operations=operations) + + # Feed-forward (GELU approximate) + inner_dim = ff_inner_dim or dim * 4 + self.ff_proj = operations.Linear(dim, inner_dim, bias=ff_bias, device=device, dtype=dtype) + self.ff_out = operations.Linear(inner_dim, dim, bias=ff_bias, device=device, dtype=dtype) + + def forward(self, hidden_states, encoder_hidden_states, temb, image_rotary_emb=None, transformer_options=None): + if transformer_options is None: + transformer_options = {} + text_seq_length = encoder_hidden_states.size(1) + + # Norm & modulate + norm_hidden, norm_encoder, gate_msa, enc_gate_msa = self.norm1(hidden_states, encoder_hidden_states, temb) + + # Joint self-attention + qkv_input = torch.cat([norm_encoder, norm_hidden], dim=1) + b, s, _ = qkv_input.shape + n, d = self.num_heads, self.head_dim + + q = self.q(qkv_input).view(b, s, n, d) + k = self.k(qkv_input).view(b, s, n, d) + v = self.v(qkv_input) + + q = self.norm_q(q).view(b, s, n, d) + k = self.norm_k(k).view(b, s, n, d) + + # Apply rotary embeddings to image tokens only (diffusers format: [B, heads, seq, head_dim]) + if image_rotary_emb is not None: + q_img = q[:, text_seq_length:].transpose(1, 2) # [B, heads, img_seq, head_dim] + k_img = k[:, text_seq_length:].transpose(1, 2) + q_img = apply_rotary_emb(q_img, image_rotary_emb) + k_img = apply_rotary_emb(k_img, image_rotary_emb) + q = torch.cat([q[:, :text_seq_length], q_img.transpose(1, 2)], dim=1) + k = torch.cat([k[:, :text_seq_length], k_img.transpose(1, 2)], dim=1) + + attn_out = optimized_attention( + q.reshape(b, s, n * d), + k.reshape(b, s, n * d), + v, + heads=self.num_heads, + transformer_options=transformer_options, + ) + + attn_out = self.attn_out(attn_out) + + attn_encoder, attn_hidden = attn_out.split([text_seq_length, s - text_seq_length], dim=1) + + hidden_states = hidden_states + gate_msa * attn_hidden + encoder_hidden_states = encoder_hidden_states + enc_gate_msa * attn_encoder + + # Norm & modulate for FF + norm_hidden, norm_encoder, gate_ff, enc_gate_ff = self.norm2(hidden_states, encoder_hidden_states, temb) + + # Feed-forward (GELU on concatenated text + latent) + ff_input = torch.cat([norm_encoder, norm_hidden], dim=1) + ff_output = self.ff_out(F.gelu(self.ff_proj(ff_input), approximate="tanh")) + + hidden_states = hidden_states + gate_ff * ff_output[:, text_seq_length:] + encoder_hidden_states = encoder_hidden_states + enc_gate_ff * ff_output[:, :text_seq_length] + + return hidden_states, encoder_hidden_states + + +class CogVideoXTransformer3DModel(nn.Module): + def __init__(self, + num_attention_heads=30, + attention_head_dim=64, + in_channels=16, + out_channels=16, + flip_sin_to_cos=True, + freq_shift=0, + time_embed_dim=512, + ofs_embed_dim=None, + text_embed_dim=4096, + num_layers=30, + dropout=0.0, + attention_bias=True, + sample_width=90, + sample_height=60, + sample_frames=49, + patch_size=2, + patch_size_t=None, + temporal_compression_ratio=4, + max_text_seq_length=226, + spatial_interpolation_scale=1.875, + temporal_interpolation_scale=1.0, + use_rotary_positional_embeddings=False, + use_learned_positional_embeddings=False, + patch_bias=True, + image_model=None, + device=None, + dtype=None, + operations=None, + ): + super().__init__() + self.dtype = dtype + dim = num_attention_heads * attention_head_dim + self.dim = dim + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + self.in_channels = in_channels + self.out_channels = out_channels + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.max_text_seq_length = max_text_seq_length + self.use_rotary_positional_embeddings = use_rotary_positional_embeddings + + # 1. Patch embedding + self.patch_embed = CogVideoXPatchEmbed( + patch_size=patch_size, + patch_size_t=patch_size_t, + in_channels=in_channels, + dim=dim, + text_dim=text_embed_dim, + bias=patch_bias, + sample_width=sample_width, + sample_height=sample_height, + sample_frames=sample_frames, + temporal_compression_ratio=temporal_compression_ratio, + max_text_seq_length=max_text_seq_length, + spatial_interpolation_scale=spatial_interpolation_scale, + temporal_interpolation_scale=temporal_interpolation_scale, + use_positional_embeddings=not use_rotary_positional_embeddings, + use_learned_positional_embeddings=use_learned_positional_embeddings, + device=device, dtype=torch.float32, operations=operations, + ) + + # 2. Time embedding + self.time_proj_dim = dim + self.time_proj_flip = flip_sin_to_cos + self.time_proj_shift = freq_shift + self.time_embedding_linear_1 = operations.Linear(dim, time_embed_dim, device=device, dtype=dtype) + self.time_embedding_act = nn.SiLU() + self.time_embedding_linear_2 = operations.Linear(time_embed_dim, time_embed_dim, device=device, dtype=dtype) + + # Optional OFS embedding (CogVideoX 1.5 I2V) + self.ofs_proj_dim = ofs_embed_dim + if ofs_embed_dim: + self.ofs_embedding_linear_1 = operations.Linear(ofs_embed_dim, ofs_embed_dim, device=device, dtype=dtype) + self.ofs_embedding_act = nn.SiLU() + self.ofs_embedding_linear_2 = operations.Linear(ofs_embed_dim, ofs_embed_dim, device=device, dtype=dtype) + else: + self.ofs_embedding_linear_1 = None + + # 3. Transformer blocks + self.blocks = nn.ModuleList([ + CogVideoXBlock( + dim=dim, + num_heads=num_attention_heads, + head_dim=attention_head_dim, + time_dim=time_embed_dim, + eps=1e-5, + device=device, dtype=dtype, operations=operations, + ) + for _ in range(num_layers) + ]) + + self.norm_final = operations.LayerNorm(dim, eps=1e-5, elementwise_affine=True, device=device, dtype=dtype) + + # 4. Output + self.norm_out = CogVideoXAdaLayerNorm( + time_dim=time_embed_dim, dim=dim, eps=1e-5, + device=device, dtype=dtype, operations=operations, + ) + + if patch_size_t is None: + output_dim = patch_size * patch_size * out_channels + else: + output_dim = patch_size * patch_size * patch_size_t * out_channels + + self.proj_out = operations.Linear(dim, output_dim, device=device, dtype=dtype) + + self.spatial_interpolation_scale = spatial_interpolation_scale + self.temporal_interpolation_scale = temporal_interpolation_scale + self.temporal_compression_ratio = temporal_compression_ratio + + def forward(self, x, timestep, context, ofs=None, transformer_options=None, **kwargs): + if transformer_options is None: + transformer_options = {} + return comfy.patcher_extension.WrapperExecutor.new_class_executor( + self._forward, + self, + comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options) + ).execute(x, timestep, context, ofs, transformer_options, **kwargs) + + def _forward(self, x, timestep, context, ofs=None, transformer_options=None, **kwargs): + if transformer_options is None: + transformer_options = {} + # ComfyUI passes [B, C, T, H, W] + batch_size, channels, t, h, w = x.shape + + # Pad to patch size (temporal + spatial), same pattern as WAN + p_t = self.patch_size_t if self.patch_size_t is not None else 1 + x = comfy.ldm.common_dit.pad_to_patch_size(x, (p_t, self.patch_size, self.patch_size)) + + # CogVideoX expects [B, T, C, H, W] + x = x.permute(0, 2, 1, 3, 4) + batch_size, num_frames, channels, height, width = x.shape + + # Time embedding + t_emb = get_timestep_embedding(timestep, self.time_proj_dim, self.time_proj_flip, self.time_proj_shift) + t_emb = t_emb.to(dtype=x.dtype) + emb = self.time_embedding_linear_2(self.time_embedding_act(self.time_embedding_linear_1(t_emb))) + + if self.ofs_embedding_linear_1 is not None and ofs is not None: + ofs_emb = get_timestep_embedding(ofs, self.ofs_proj_dim, self.time_proj_flip, self.time_proj_shift) + ofs_emb = ofs_emb.to(dtype=x.dtype) + ofs_emb = self.ofs_embedding_linear_2(self.ofs_embedding_act(self.ofs_embedding_linear_1(ofs_emb))) + emb = emb + ofs_emb + + # Patch embedding + hidden_states = self.patch_embed(context, x) + + text_seq_length = context.shape[1] + encoder_hidden_states = hidden_states[:, :text_seq_length] + hidden_states = hidden_states[:, text_seq_length:] + + # Rotary embeddings (if used) + image_rotary_emb = None + if self.use_rotary_positional_embeddings: + post_patch_height = height // self.patch_size + post_patch_width = width // self.patch_size + if self.patch_size_t is None: + post_time = num_frames + else: + post_time = num_frames // self.patch_size_t + image_rotary_emb = self._get_rotary_emb(post_patch_height, post_patch_width, post_time, device=x.device) + + # Transformer blocks + for i, block in enumerate(self.blocks): + hidden_states, encoder_hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=emb, + image_rotary_emb=image_rotary_emb, + transformer_options=transformer_options, + ) + + hidden_states = self.norm_final(hidden_states) + + # Output projection + hidden_states = self.norm_out(hidden_states, temb=emb) + hidden_states = self.proj_out(hidden_states) + + # Unpatchify + p = self.patch_size + p_t = self.patch_size_t + + if p_t is None: + output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p) + output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4) + else: + output = hidden_states.reshape( + batch_size, (num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p + ) + output = output.permute(0, 1, 5, 4, 2, 6, 3, 7).flatten(6, 7).flatten(4, 5).flatten(1, 2) + + # Back to ComfyUI format [B, C, T, H, W] and crop padding + output = output.permute(0, 2, 1, 3, 4)[:, :, :t, :h, :w] + return output + + def _get_rotary_emb(self, h, w, t, device): + """Compute CogVideoX 3D rotary positional embeddings. + + For CogVideoX 1.5 (patch_size_t != None): uses "slice" mode — grid positions + are integer arange computed at max_size, then sliced to actual size. + For CogVideoX 1.0 (patch_size_t == None): uses "linspace" mode with crop coords + scaled by spatial_interpolation_scale. + """ + d = self.attention_head_dim + dim_t = d // 4 + dim_h = d // 8 * 3 + dim_w = d // 8 * 3 + + if self.patch_size_t is not None: + # CogVideoX 1.5: "slice" mode — positions are simple integer indices + # Compute at max(sample_size, actual_size) then slice to actual + base_h = self.patch_embed.sample_height // self.patch_size + base_w = self.patch_embed.sample_width // self.patch_size + max_h = max(base_h, h) + max_w = max(base_w, w) + + grid_h = torch.arange(max_h, device=device, dtype=torch.float32) + grid_w = torch.arange(max_w, device=device, dtype=torch.float32) + grid_t = torch.arange(t, device=device, dtype=torch.float32) + else: + # CogVideoX 1.0: "linspace" mode with interpolation scale + grid_h = torch.linspace(0, h - 1, h, device=device, dtype=torch.float32) * self.spatial_interpolation_scale + grid_w = torch.linspace(0, w - 1, w, device=device, dtype=torch.float32) * self.spatial_interpolation_scale + grid_t = torch.arange(t, device=device, dtype=torch.float32) + + freqs_t = _get_1d_rotary_pos_embed(dim_t, grid_t) + freqs_h = _get_1d_rotary_pos_embed(dim_h, grid_h) + freqs_w = _get_1d_rotary_pos_embed(dim_w, grid_w) + + t_cos, t_sin = freqs_t + h_cos, h_sin = freqs_h + w_cos, w_sin = freqs_w + + # Slice to actual size (for "slice" mode where grids may be larger) + t_cos, t_sin = t_cos[:t], t_sin[:t] + h_cos, h_sin = h_cos[:h], h_sin[:h] + w_cos, w_sin = w_cos[:w], w_sin[:w] + + # Broadcast and concatenate into [T*H*W, head_dim] + t_cos = t_cos[:, None, None, :].expand(-1, h, w, -1) + t_sin = t_sin[:, None, None, :].expand(-1, h, w, -1) + h_cos = h_cos[None, :, None, :].expand(t, -1, w, -1) + h_sin = h_sin[None, :, None, :].expand(t, -1, w, -1) + w_cos = w_cos[None, None, :, :].expand(t, h, -1, -1) + w_sin = w_sin[None, None, :, :].expand(t, h, -1, -1) + + cos = torch.cat([t_cos, h_cos, w_cos], dim=-1).reshape(t * h * w, -1) + sin = torch.cat([t_sin, h_sin, w_sin], dim=-1).reshape(t * h * w, -1) + return (cos, sin) diff --git a/comfy/ldm/cogvideo/vae.py b/comfy/ldm/cogvideo/vae.py new file mode 100644 index 000000000..d4e6f321e --- /dev/null +++ b/comfy/ldm/cogvideo/vae.py @@ -0,0 +1,566 @@ +# CogVideoX VAE - ported to ComfyUI native ops +# Architecture reference: diffusers AutoencoderKLCogVideoX +# Style reference: comfy/ldm/wan/vae.py + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import comfy.ops +ops = comfy.ops.disable_weight_init + + +class CausalConv3d(nn.Module): + """Causal 3D convolution with temporal padding. + + Uses comfy.ops.Conv3d with autopad='causal_zero' fast path: when input has + a single temporal frame and no cache, the 3D conv weight is sliced to act + as a 2D conv, avoiding computation on zero-padded temporal dimensions. + """ + def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, pad_mode="constant"): + super().__init__() + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) * 3 + + time_kernel, height_kernel, width_kernel = kernel_size + self.time_kernel_size = time_kernel + self.pad_mode = pad_mode + + height_pad = (height_kernel - 1) // 2 + width_pad = (width_kernel - 1) // 2 + self.time_causal_padding = (width_pad, width_pad, height_pad, height_pad, time_kernel - 1, 0) + + stride = stride if isinstance(stride, tuple) else (stride, 1, 1) + dilation = (dilation, 1, 1) + self.conv = ops.Conv3d( + in_channels, out_channels, kernel_size, + stride=stride, dilation=dilation, + padding=(0, height_pad, width_pad), + ) + + def forward(self, x, conv_cache=None): + if self.pad_mode == "replicate": + x = F.pad(x, self.time_causal_padding, mode="replicate") + conv_cache = None + else: + kernel_t = self.time_kernel_size + if kernel_t > 1: + if conv_cache is None and x.shape[2] == 1: + # Fast path: single frame, no cache. All temporal padding + # frames are copies of the input (replicate-style), so the + # 3D conv reduces to a 2D conv with summed temporal kernel. + w = comfy.ops.cast_to_input(self.conv.weight, x) + b = comfy.ops.cast_to_input(self.conv.bias, x) if self.conv.bias is not None else None + w2d = w.sum(dim=2, keepdim=True) + out = F.conv3d(x, w2d, b, + self.conv.stride, self.conv.padding, + self.conv.dilation, self.conv.groups) + return out, None + cached = [conv_cache] if conv_cache is not None else [x[:, :, :1]] * (kernel_t - 1) + x = torch.cat(cached + [x], dim=2) + conv_cache = x[:, :, -self.time_kernel_size + 1:].clone() if self.time_kernel_size > 1 else None + + out = self.conv(x) + return out, conv_cache + + +def _interpolate_zq(zq, target_size): + """Interpolate latent z to target (T, H, W), matching CogVideoX's first-frame-special handling.""" + t = target_size[0] + if t > 1 and t % 2 == 1: + z_first = F.interpolate(zq[:, :, :1], size=(1, target_size[1], target_size[2])) + z_rest = F.interpolate(zq[:, :, 1:], size=(t - 1, target_size[1], target_size[2])) + return torch.cat([z_first, z_rest], dim=2) + return F.interpolate(zq, size=target_size) + + +class SpatialNorm3D(nn.Module): + """Spatially conditioned normalization.""" + def __init__(self, f_channels, zq_channels, groups=32): + super().__init__() + self.norm_layer = ops.GroupNorm(num_channels=f_channels, num_groups=groups, eps=1e-6, affine=True) + self.conv_y = CausalConv3d(zq_channels, f_channels, kernel_size=1, stride=1) + self.conv_b = CausalConv3d(zq_channels, f_channels, kernel_size=1, stride=1) + + def forward(self, f, zq, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + + if zq.shape[-3:] != f.shape[-3:]: + zq = _interpolate_zq(zq, f.shape[-3:]) + + conv_y, new_cache["conv_y"] = self.conv_y(zq, conv_cache=conv_cache.get("conv_y")) + conv_b, new_cache["conv_b"] = self.conv_b(zq, conv_cache=conv_cache.get("conv_b")) + + return self.norm_layer(f) * conv_y + conv_b, new_cache + + +class ResnetBlock3D(nn.Module): + """3D ResNet block with optional spatial norm.""" + def __init__(self, in_channels, out_channels=None, temb_channels=512, groups=32, + eps=1e-6, act_fn="silu", spatial_norm_dim=None, pad_mode="first"): + super().__init__() + out_channels = out_channels or in_channels + self.in_channels = in_channels + self.out_channels = out_channels + self.spatial_norm_dim = spatial_norm_dim + + if act_fn == "silu": + self.nonlinearity = nn.SiLU() + elif act_fn == "swish": + self.nonlinearity = nn.SiLU() + else: + self.nonlinearity = nn.SiLU() + + if spatial_norm_dim is None: + self.norm1 = ops.GroupNorm(num_channels=in_channels, num_groups=groups, eps=eps) + self.norm2 = ops.GroupNorm(num_channels=out_channels, num_groups=groups, eps=eps) + else: + self.norm1 = SpatialNorm3D(in_channels, spatial_norm_dim, groups=groups) + self.norm2 = SpatialNorm3D(out_channels, spatial_norm_dim, groups=groups) + + self.conv1 = CausalConv3d(in_channels, out_channels, kernel_size=3, pad_mode=pad_mode) + + if temb_channels > 0: + self.temb_proj = ops.Linear(temb_channels, out_channels) + + self.conv2 = CausalConv3d(out_channels, out_channels, kernel_size=3, pad_mode=pad_mode) + + if in_channels != out_channels: + self.conv_shortcut = ops.Conv3d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + else: + self.conv_shortcut = None + + def forward(self, x, temb=None, zq=None, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + residual = x + + if zq is not None: + x, new_cache["norm1"] = self.norm1(x, zq, conv_cache=conv_cache.get("norm1")) + else: + x = self.norm1(x) + + x = self.nonlinearity(x) + x, new_cache["conv1"] = self.conv1(x, conv_cache=conv_cache.get("conv1")) + + if temb is not None and hasattr(self, "temb_proj"): + x = x + self.temb_proj(self.nonlinearity(temb))[:, :, None, None, None] + + if zq is not None: + x, new_cache["norm2"] = self.norm2(x, zq, conv_cache=conv_cache.get("norm2")) + else: + x = self.norm2(x) + + x = self.nonlinearity(x) + x, new_cache["conv2"] = self.conv2(x, conv_cache=conv_cache.get("conv2")) + + if self.conv_shortcut is not None: + residual = self.conv_shortcut(residual) + + return x + residual, new_cache + + +class Downsample3D(nn.Module): + """3D downsampling with optional temporal compression.""" + def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=0, compress_time=False): + super().__init__() + self.conv = ops.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) + self.compress_time = compress_time + + def forward(self, x): + if self.compress_time: + b, c, t, h, w = x.shape + x = x.permute(0, 3, 4, 1, 2).reshape(b * h * w, c, t) + if t % 2 == 1: + x_first, x_rest = x[..., 0], x[..., 1:] + if x_rest.shape[-1] > 0: + x_rest = F.avg_pool1d(x_rest, kernel_size=2, stride=2) + x = torch.cat([x_first[..., None], x_rest], dim=-1) + x = x.reshape(b, h, w, c, x.shape[-1]).permute(0, 3, 4, 1, 2) + else: + x = F.avg_pool1d(x, kernel_size=2, stride=2) + x = x.reshape(b, h, w, c, x.shape[-1]).permute(0, 3, 4, 1, 2) + + pad = (0, 1, 0, 1) + x = F.pad(x, pad, mode="constant", value=0) + b, c, t, h, w = x.shape + x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w) + x = self.conv(x) + x = x.reshape(b, t, x.shape[1], x.shape[2], x.shape[3]).permute(0, 2, 1, 3, 4) + return x + + +class Upsample3D(nn.Module): + """3D upsampling with optional temporal decompression.""" + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, compress_time=False): + super().__init__() + self.conv = ops.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) + self.compress_time = compress_time + + def forward(self, x): + if self.compress_time: + if x.shape[2] > 1 and x.shape[2] % 2 == 1: + x_first, x_rest = x[:, :, 0], x[:, :, 1:] + x_first = F.interpolate(x_first, scale_factor=2.0) + x_rest = F.interpolate(x_rest, scale_factor=2.0) + x = torch.cat([x_first[:, :, None, :, :], x_rest], dim=2) + elif x.shape[2] > 1: + x = F.interpolate(x, scale_factor=2.0) + else: + x = x.squeeze(2) + x = F.interpolate(x, scale_factor=2.0) + x = x[:, :, None, :, :] + else: + b, c, t, h, w = x.shape + x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w) + x = F.interpolate(x, scale_factor=2.0) + x = x.reshape(b, t, c, *x.shape[2:]).permute(0, 2, 1, 3, 4) + + b, c, t, h, w = x.shape + x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w) + x = self.conv(x) + x = x.reshape(b, t, *x.shape[1:]).permute(0, 2, 1, 3, 4) + return x + + +class DownBlock3D(nn.Module): + def __init__(self, in_channels, out_channels, temb_channels=0, num_layers=1, + eps=1e-6, act_fn="silu", groups=32, add_downsample=True, + compress_time=False, pad_mode="first"): + super().__init__() + self.resnets = nn.ModuleList([ + ResnetBlock3D( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + groups=groups, eps=eps, act_fn=act_fn, pad_mode=pad_mode, + ) + for i in range(num_layers) + ]) + self.downsamplers = nn.ModuleList([Downsample3D(out_channels, out_channels, compress_time=compress_time)]) if add_downsample else None + + def forward(self, x, temb=None, zq=None, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + for i, resnet in enumerate(self.resnets): + x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}")) + if self.downsamplers is not None: + for ds in self.downsamplers: + x = ds(x) + return x, new_cache + + +class MidBlock3D(nn.Module): + def __init__(self, in_channels, temb_channels=0, num_layers=1, + eps=1e-6, act_fn="silu", groups=32, spatial_norm_dim=None, pad_mode="first"): + super().__init__() + self.resnets = nn.ModuleList([ + ResnetBlock3D( + in_channels=in_channels, out_channels=in_channels, + temb_channels=temb_channels, groups=groups, eps=eps, + act_fn=act_fn, spatial_norm_dim=spatial_norm_dim, pad_mode=pad_mode, + ) + for _ in range(num_layers) + ]) + + def forward(self, x, temb=None, zq=None, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + for i, resnet in enumerate(self.resnets): + x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}")) + return x, new_cache + + +class UpBlock3D(nn.Module): + def __init__(self, in_channels, out_channels, temb_channels=0, num_layers=1, + eps=1e-6, act_fn="silu", groups=32, spatial_norm_dim=16, + add_upsample=True, compress_time=False, pad_mode="first"): + super().__init__() + self.resnets = nn.ModuleList([ + ResnetBlock3D( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + temb_channels=temb_channels, groups=groups, eps=eps, + act_fn=act_fn, spatial_norm_dim=spatial_norm_dim, pad_mode=pad_mode, + ) + for i in range(num_layers) + ]) + self.upsamplers = nn.ModuleList([Upsample3D(out_channels, out_channels, compress_time=compress_time)]) if add_upsample else None + + def forward(self, x, temb=None, zq=None, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + for i, resnet in enumerate(self.resnets): + x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}")) + if self.upsamplers is not None: + for us in self.upsamplers: + x = us(x) + return x, new_cache + + +class Encoder3D(nn.Module): + def __init__(self, in_channels=3, out_channels=16, + block_out_channels=(128, 256, 256, 512), + layers_per_block=3, act_fn="silu", + eps=1e-6, groups=32, pad_mode="first", + temporal_compression_ratio=4): + super().__init__() + temporal_compress_level = int(np.log2(temporal_compression_ratio)) + + self.conv_in = CausalConv3d(in_channels, block_out_channels[0], kernel_size=3, pad_mode=pad_mode) + + self.down_blocks = nn.ModuleList() + output_channel = block_out_channels[0] + for i in range(len(block_out_channels)): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final = i == len(block_out_channels) - 1 + compress_time = i < temporal_compress_level + + self.down_blocks.append(DownBlock3D( + in_channels=input_channel, out_channels=output_channel, + temb_channels=0, num_layers=layers_per_block, + eps=eps, act_fn=act_fn, groups=groups, + add_downsample=not is_final, compress_time=compress_time, + )) + + self.mid_block = MidBlock3D( + in_channels=block_out_channels[-1], temb_channels=0, + num_layers=2, eps=eps, act_fn=act_fn, groups=groups, pad_mode=pad_mode, + ) + + self.norm_out = ops.GroupNorm(groups, block_out_channels[-1], eps=1e-6) + self.conv_act = nn.SiLU() + self.conv_out = CausalConv3d(block_out_channels[-1], 2 * out_channels, kernel_size=3, pad_mode=pad_mode) + + def forward(self, x, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + + x, new_cache["conv_in"] = self.conv_in(x, conv_cache=conv_cache.get("conv_in")) + + for i, block in enumerate(self.down_blocks): + key = f"down_block_{i}" + x, new_cache[key] = block(x, None, None, conv_cache.get(key)) + + x, new_cache["mid_block"] = self.mid_block(x, None, None, conv_cache=conv_cache.get("mid_block")) + + x = self.norm_out(x) + x = self.conv_act(x) + x, new_cache["conv_out"] = self.conv_out(x, conv_cache=conv_cache.get("conv_out")) + + return x, new_cache + + +class Decoder3D(nn.Module): + def __init__(self, in_channels=16, out_channels=3, + block_out_channels=(128, 256, 256, 512), + layers_per_block=3, act_fn="silu", + eps=1e-6, groups=32, pad_mode="first", + temporal_compression_ratio=4): + super().__init__() + reversed_channels = list(reversed(block_out_channels)) + temporal_compress_level = int(np.log2(temporal_compression_ratio)) + + self.conv_in = CausalConv3d(in_channels, reversed_channels[0], kernel_size=3, pad_mode=pad_mode) + + self.mid_block = MidBlock3D( + in_channels=reversed_channels[0], temb_channels=0, + num_layers=2, eps=eps, act_fn=act_fn, groups=groups, + spatial_norm_dim=in_channels, pad_mode=pad_mode, + ) + + self.up_blocks = nn.ModuleList() + output_channel = reversed_channels[0] + for i in range(len(block_out_channels)): + prev_channel = output_channel + output_channel = reversed_channels[i] + is_final = i == len(block_out_channels) - 1 + compress_time = i < temporal_compress_level + + self.up_blocks.append(UpBlock3D( + in_channels=prev_channel, out_channels=output_channel, + temb_channels=0, num_layers=layers_per_block + 1, + eps=eps, act_fn=act_fn, groups=groups, + spatial_norm_dim=in_channels, + add_upsample=not is_final, compress_time=compress_time, + )) + + self.norm_out = SpatialNorm3D(reversed_channels[-1], in_channels, groups=groups) + self.conv_act = nn.SiLU() + self.conv_out = CausalConv3d(reversed_channels[-1], out_channels, kernel_size=3, pad_mode=pad_mode) + + def forward(self, sample, conv_cache=None): + new_cache = {} + conv_cache = conv_cache or {} + + x, new_cache["conv_in"] = self.conv_in(sample, conv_cache=conv_cache.get("conv_in")) + + x, new_cache["mid_block"] = self.mid_block(x, None, sample, conv_cache=conv_cache.get("mid_block")) + + for i, block in enumerate(self.up_blocks): + key = f"up_block_{i}" + x, new_cache[key] = block(x, None, sample, conv_cache=conv_cache.get(key)) + + x, new_cache["norm_out"] = self.norm_out(x, sample, conv_cache=conv_cache.get("norm_out")) + x = self.conv_act(x) + x, new_cache["conv_out"] = self.conv_out(x, conv_cache=conv_cache.get("conv_out")) + + return x, new_cache + + + +class AutoencoderKLCogVideoX(nn.Module): + """CogVideoX VAE. Spatial tiling/slicing handled by ComfyUI's VAE wrapper. + + Uses rolling temporal decode: conv_in + mid_block + temporal up_blocks run + on the full (low-res) tensor, then the expensive spatial-only up_blocks + + norm_out + conv_out are processed in small temporal chunks with conv_cache + carrying causal state between chunks. This keeps peak VRAM proportional to + chunk_size rather than total frame count. + """ + + def __init__(self, + in_channels=3, out_channels=3, + block_out_channels=(128, 256, 256, 512), + latent_channels=16, layers_per_block=3, + act_fn="silu", eps=1e-6, groups=32, + temporal_compression_ratio=4, + ): + super().__init__() + self.latent_channels = latent_channels + self.temporal_compression_ratio = temporal_compression_ratio + + self.encoder = Encoder3D( + in_channels=in_channels, out_channels=latent_channels, + block_out_channels=block_out_channels, layers_per_block=layers_per_block, + act_fn=act_fn, eps=eps, groups=groups, + temporal_compression_ratio=temporal_compression_ratio, + ) + self.decoder = Decoder3D( + in_channels=latent_channels, out_channels=out_channels, + block_out_channels=block_out_channels, layers_per_block=layers_per_block, + act_fn=act_fn, eps=eps, groups=groups, + temporal_compression_ratio=temporal_compression_ratio, + ) + + self.num_latent_frames_batch_size = 2 + self.num_sample_frames_batch_size = 8 + + def encode(self, x): + t = x.shape[2] + frame_batch = self.num_sample_frames_batch_size + remainder = t % frame_batch + conv_cache = None + enc = [] + + # Process remainder frames first so only the first chunk can have an + # odd temporal dimension — where Downsample3D's first-frame-special + # handling in temporal compression is actually correct. + if remainder > 0: + chunk, conv_cache = self.encoder(x[:, :, :remainder], conv_cache=conv_cache) + enc.append(chunk.to(x.device)) + + for start in range(remainder, t, frame_batch): + chunk, conv_cache = self.encoder(x[:, :, start:start + frame_batch], conv_cache=conv_cache) + enc.append(chunk.to(x.device)) + + enc = torch.cat(enc, dim=2) + mean, _ = enc.chunk(2, dim=1) + return mean + + def decode(self, z): + return self._decode_rolling(z) + + def _decode_batched(self, z): + """Original batched decode - processes 2 latent frames through full decoder.""" + t = z.shape[2] + frame_batch = self.num_latent_frames_batch_size + num_batches = max(t // frame_batch, 1) + conv_cache = None + dec = [] + for i in range(num_batches): + remaining = t % frame_batch + start = frame_batch * i + (0 if i == 0 else remaining) + end = frame_batch * (i + 1) + remaining + chunk, conv_cache = self.decoder(z[:, :, start:end], conv_cache=conv_cache) + dec.append(chunk.cpu()) + return torch.cat(dec, dim=2).to(z.device) + + def _decode_rolling(self, z): + """Rolling decode - processes low-res layers on full tensor, then rolls + through expensive high-res layers in temporal chunks.""" + decoder = self.decoder + device = z.device + + # Determine which up_blocks have temporal upsample vs spatial-only. + # Temporal up_blocks are cheap (low res), spatial-only are expensive. + temporal_compress_level = int(np.log2(self.temporal_compression_ratio)) + split_at = temporal_compress_level # first N up_blocks do temporal upsample + + # Phase 1: conv_in + mid_block + temporal up_blocks on full tensor (low/medium res) + x, _ = decoder.conv_in(z) + x, _ = decoder.mid_block(x, None, z) + + for i in range(split_at): + x, _ = decoder.up_blocks[i](x, None, z) + + # Phase 2: remaining spatial-only up_blocks + norm_out + conv_out in temporal chunks + remaining_blocks = list(range(split_at, len(decoder.up_blocks))) + chunk_size = 4 # pixel frames per chunk through high-res layers + t_expanded = x.shape[2] + + if t_expanded <= chunk_size or len(remaining_blocks) == 0: + # Small enough to process in one go + for i in remaining_blocks: + x, _ = decoder.up_blocks[i](x, None, z) + x, _ = decoder.norm_out(x, z) + x = decoder.conv_act(x) + x, _ = decoder.conv_out(x) + return x + + # Expand z temporally once to match Phase 2's time dimension. + # z stays at latent spatial resolution so this is small (~16 MB vs ~1.3 GB + # for the old approach of pre-interpolating to every pixel resolution). + z_time_expanded = _interpolate_zq(z, (t_expanded, z.shape[3], z.shape[4])) + + # Process in temporal chunks, interpolating spatially per-chunk to avoid + # allocating full [B, C, t_expanded, H, W] tensors at each resolution. + dec_out = [] + conv_caches = {} + + for chunk_start in range(0, t_expanded, chunk_size): + chunk_end = min(chunk_start + chunk_size, t_expanded) + x_chunk = x[:, :, chunk_start:chunk_end] + z_t_chunk = z_time_expanded[:, :, chunk_start:chunk_end] + z_spatial_cache = {} + + for i in remaining_blocks: + block = decoder.up_blocks[i] + cache_key = f"up_block_{i}" + hw_key = (x_chunk.shape[3], x_chunk.shape[4]) + if hw_key not in z_spatial_cache: + if z_t_chunk.shape[3] == hw_key[0] and z_t_chunk.shape[4] == hw_key[1]: + z_spatial_cache[hw_key] = z_t_chunk + else: + z_spatial_cache[hw_key] = F.interpolate(z_t_chunk, size=(z_t_chunk.shape[2], hw_key[0], hw_key[1])) + x_chunk, new_cache = block(x_chunk, None, z_spatial_cache[hw_key], conv_cache=conv_caches.get(cache_key)) + conv_caches[cache_key] = new_cache + + hw_key = (x_chunk.shape[3], x_chunk.shape[4]) + if hw_key not in z_spatial_cache: + z_spatial_cache[hw_key] = F.interpolate(z_t_chunk, size=(z_t_chunk.shape[2], hw_key[0], hw_key[1])) + x_chunk, new_cache = decoder.norm_out(x_chunk, z_spatial_cache[hw_key], conv_cache=conv_caches.get("norm_out")) + conv_caches["norm_out"] = new_cache + x_chunk = decoder.conv_act(x_chunk) + x_chunk, new_cache = decoder.conv_out(x_chunk, conv_cache=conv_caches.get("conv_out")) + conv_caches["conv_out"] = new_cache + + dec_out.append(x_chunk.cpu()) + del z_spatial_cache + + del x, z_time_expanded + return torch.cat(dec_out, dim=2).to(device) diff --git a/comfy/ldm/ernie/model.py b/comfy/ldm/ernie/model.py new file mode 100644 index 000000000..eba661aec --- /dev/null +++ b/comfy/ldm/ernie/model.py @@ -0,0 +1,301 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + +from comfy.ldm.modules.attention import optimized_attention +import comfy.model_management + +def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: + assert dim % 2 == 0 + if not comfy.model_management.supports_fp64(pos.device): + device = torch.device("cpu") + else: + device = pos.device + + scale = torch.arange(0, dim, 2, dtype=torch.float64, device=device) / dim + omega = 1.0 / (theta**scale) + out = torch.einsum("...n,d->...nd", pos.to(device), omega) + out = torch.stack([torch.cos(out), torch.sin(out)], dim=0) + return out.to(dtype=torch.float32, device=pos.device) + +def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor: + rot_dim = freqs_cis.shape[-1] + x, x_pass = x_in[..., :rot_dim], x_in[..., rot_dim:] + cos_ = freqs_cis[0] + sin_ = freqs_cis[1] + x1, x2 = x.chunk(2, dim=-1) + x_rotated = torch.cat((-x2, x1), dim=-1) + return torch.cat((x * cos_ + x_rotated * sin_, x_pass), dim=-1) + +class ErnieImageEmbedND3(nn.Module): + def __init__(self, dim: int, theta: int, axes_dim: tuple): + super().__init__() + self.dim = dim + self.theta = theta + self.axes_dim = list(axes_dim) + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1) + emb = emb.unsqueeze(3) # [2, B, S, 1, head_dim//2] + return torch.stack([emb, emb], dim=-1).reshape(*emb.shape[:-1], -1) # [B, S, 1, head_dim] + +class ErnieImagePatchEmbedDynamic(nn.Module): + def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None): + super().__init__() + self.patch_size = patch_size + self.proj = operations.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size, bias=True, device=device, dtype=dtype) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.proj(x) + batch_size, dim, height, width = x.shape + return x.reshape(batch_size, dim, height * width).transpose(1, 2).contiguous() + +class Timesteps(nn.Module): + def __init__(self, num_channels: int, flip_sin_to_cos: bool = False): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + + def forward(self, timesteps: torch.Tensor) -> torch.Tensor: + half_dim = self.num_channels // 2 + exponent = -math.log(10000) * torch.arange(half_dim, dtype=torch.float32, device=timesteps.device) / half_dim + emb = torch.exp(exponent) + emb = timesteps[:, None].float() * emb[None, :] + if self.flip_sin_to_cos: + emb = torch.cat([torch.cos(emb), torch.sin(emb)], dim=-1) + else: + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) + return emb + +class TimestepEmbedding(nn.Module): + def __init__(self, in_channels: int, time_embed_dim: int, operations, device=None, dtype=None): + super().__init__() + Linear = operations.Linear + self.linear_1 = Linear(in_channels, time_embed_dim, bias=True, device=device, dtype=dtype) + self.act = nn.SiLU() + self.linear_2 = Linear(time_embed_dim, time_embed_dim, bias=True, device=device, dtype=dtype) + + def forward(self, sample: torch.Tensor) -> torch.Tensor: + sample = self.linear_1(sample) + sample = self.act(sample) + sample = self.linear_2(sample) + return sample + +class ErnieImageAttention(nn.Module): + def __init__(self, query_dim: int, heads: int, dim_head: int, eps: float = 1e-6, operations=None, device=None, dtype=None): + super().__init__() + self.heads = heads + self.head_dim = dim_head + self.inner_dim = heads * dim_head + + Linear = operations.Linear + RMSNorm = operations.RMSNorm + + self.to_q = Linear(query_dim, self.inner_dim, bias=False, device=device, dtype=dtype) + self.to_k = Linear(query_dim, self.inner_dim, bias=False, device=device, dtype=dtype) + self.to_v = Linear(query_dim, self.inner_dim, bias=False, device=device, dtype=dtype) + + self.norm_q = RMSNorm(dim_head, eps=eps, elementwise_affine=True, device=device, dtype=dtype) + self.norm_k = RMSNorm(dim_head, eps=eps, elementwise_affine=True, device=device, dtype=dtype) + + self.to_out = nn.ModuleList([Linear(self.inner_dim, query_dim, bias=False, device=device, dtype=dtype)]) + + def forward(self, x: torch.Tensor, attention_mask: torch.Tensor = None, image_rotary_emb: torch.Tensor = None) -> torch.Tensor: + B, S, _ = x.shape + + q_flat = self.to_q(x) + k_flat = self.to_k(x) + v_flat = self.to_v(x) + + query = q_flat.view(B, S, self.heads, self.head_dim) + key = k_flat.view(B, S, self.heads, self.head_dim) + + query = self.norm_q(query) + key = self.norm_k(key) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + key = apply_rotary_emb(key, image_rotary_emb) + + q_flat = query.reshape(B, S, -1) + k_flat = key.reshape(B, S, -1) + + hidden_states = optimized_attention(q_flat, k_flat, v_flat, self.heads, mask=attention_mask) + + return self.to_out[0](hidden_states) + +class ErnieImageFeedForward(nn.Module): + def __init__(self, hidden_size: int, ffn_hidden_size: int, operations, device=None, dtype=None): + super().__init__() + Linear = operations.Linear + self.gate_proj = Linear(hidden_size, ffn_hidden_size, bias=False, device=device, dtype=dtype) + self.up_proj = Linear(hidden_size, ffn_hidden_size, bias=False, device=device, dtype=dtype) + self.linear_fc2 = Linear(ffn_hidden_size, hidden_size, bias=False, device=device, dtype=dtype) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.linear_fc2(self.up_proj(x) * F.gelu(self.gate_proj(x))) + +class ErnieImageSharedAdaLNBlock(nn.Module): + def __init__(self, hidden_size: int, num_heads: int, ffn_hidden_size: int, eps: float = 1e-6, operations=None, device=None, dtype=None): + super().__init__() + RMSNorm = operations.RMSNorm + + self.adaLN_sa_ln = RMSNorm(hidden_size, eps=eps, device=device, dtype=dtype) + self.self_attention = ErnieImageAttention( + query_dim=hidden_size, + dim_head=hidden_size // num_heads, + heads=num_heads, + eps=eps, + operations=operations, + device=device, + dtype=dtype + ) + self.adaLN_mlp_ln = RMSNorm(hidden_size, eps=eps, device=device, dtype=dtype) + self.mlp = ErnieImageFeedForward(hidden_size, ffn_hidden_size, operations=operations, device=device, dtype=dtype) + + def forward(self, x, rotary_pos_emb, temb, attention_mask=None): + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = temb + + residual = x + x_norm = self.adaLN_sa_ln(x) + x_norm = x_norm * (1 + scale_msa) + shift_msa + + attn_out = self.self_attention(x_norm, attention_mask=attention_mask, image_rotary_emb=rotary_pos_emb) + x = residual + gate_msa * attn_out + + residual = x + x_norm = self.adaLN_mlp_ln(x) + x_norm = x_norm * (1 + scale_mlp) + shift_mlp + + return residual + gate_mlp * self.mlp(x_norm) + +class ErnieImageAdaLNContinuous(nn.Module): + def __init__(self, hidden_size: int, eps: float = 1e-6, operations=None, device=None, dtype=None): + super().__init__() + LayerNorm = operations.LayerNorm + Linear = operations.Linear + self.norm = LayerNorm(hidden_size, elementwise_affine=False, eps=eps, device=device, dtype=dtype) + self.linear = Linear(hidden_size, hidden_size * 2, device=device, dtype=dtype) + + def forward(self, x: torch.Tensor, conditioning: torch.Tensor) -> torch.Tensor: + scale, shift = self.linear(conditioning).chunk(2, dim=-1) + x = self.norm(x) + x = torch.addcmul(shift.unsqueeze(1), x, 1 + scale.unsqueeze(1)) + return x + +class ErnieImageModel(nn.Module): + def __init__( + self, + hidden_size: int = 4096, + num_attention_heads: int = 32, + num_layers: int = 36, + ffn_hidden_size: int = 12288, + in_channels: int = 128, + out_channels: int = 128, + patch_size: int = 1, + text_in_dim: int = 3072, + rope_theta: int = 256, + rope_axes_dim: tuple = (32, 48, 48), + eps: float = 1e-6, + qk_layernorm: bool = True, + device=None, + dtype=None, + operations=None, + **kwargs + ): + super().__init__() + self.dtype = dtype + self.hidden_size = hidden_size + self.num_heads = num_attention_heads + self.head_dim = hidden_size // num_attention_heads + self.patch_size = patch_size + self.out_channels = out_channels + + Linear = operations.Linear + + self.x_embedder = ErnieImagePatchEmbedDynamic(in_channels, hidden_size, patch_size, operations, device, dtype) + self.text_proj = Linear(text_in_dim, hidden_size, bias=False, device=device, dtype=dtype) if text_in_dim != hidden_size else None + + self.time_proj = Timesteps(hidden_size, flip_sin_to_cos=False) + self.time_embedding = TimestepEmbedding(hidden_size, hidden_size, operations, device, dtype) + + self.pos_embed = ErnieImageEmbedND3(dim=self.head_dim, theta=rope_theta, axes_dim=rope_axes_dim) + + self.adaLN_modulation = nn.Sequential( + nn.SiLU(), + Linear(hidden_size, 6 * hidden_size, device=device, dtype=dtype) + ) + + self.layers = nn.ModuleList([ + ErnieImageSharedAdaLNBlock(hidden_size, num_attention_heads, ffn_hidden_size, eps, operations, device, dtype) + for _ in range(num_layers) + ]) + + self.final_norm = ErnieImageAdaLNContinuous(hidden_size, eps, operations, device, dtype) + self.final_linear = Linear(hidden_size, patch_size * patch_size * out_channels, device=device, dtype=dtype) + + def forward(self, x, timesteps, context, **kwargs): + device, dtype = x.device, x.dtype + B, C, H, W = x.shape + p, Hp, Wp = self.patch_size, H // self.patch_size, W // self.patch_size + N_img = Hp * Wp + + img_bsh = self.x_embedder(x) + + text_bth = context + if self.text_proj is not None and text_bth.numel() > 0: + text_bth = self.text_proj(text_bth) + Tmax = text_bth.shape[1] + + hidden_states = torch.cat([img_bsh, text_bth], dim=1) + + text_ids = torch.zeros((B, Tmax, 3), device=device, dtype=torch.float32) + text_ids[:, :, 0] = torch.linspace(0, Tmax - 1, steps=Tmax, device=x.device, dtype=torch.float32) + index = float(Tmax) + + transformer_options = kwargs.get("transformer_options", {}) + rope_options = transformer_options.get("rope_options", None) + + h_len, w_len = float(Hp), float(Wp) + h_offset, w_offset = 0.0, 0.0 + + if rope_options is not None: + h_len = (h_len - 1.0) * rope_options.get("scale_y", 1.0) + 1.0 + w_len = (w_len - 1.0) * rope_options.get("scale_x", 1.0) + 1.0 + index += rope_options.get("shift_t", 0.0) + h_offset += rope_options.get("shift_y", 0.0) + w_offset += rope_options.get("shift_x", 0.0) + + image_ids = torch.zeros((Hp, Wp, 3), device=device, dtype=torch.float32) + image_ids[:, :, 0] = image_ids[:, :, 1] + index + image_ids[:, :, 1] = image_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=Hp, device=device, dtype=torch.float32).unsqueeze(1) + image_ids[:, :, 2] = image_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=Wp, device=device, dtype=torch.float32).unsqueeze(0) + + image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1) + + rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype) + del image_ids, text_ids + + sample = self.time_proj(timesteps).to(dtype) + c = self.time_embedding(sample) + + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = [ + t.unsqueeze(1).contiguous() for t in self.adaLN_modulation(c).chunk(6, dim=-1) + ] + + temb = [shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp] + for layer in self.layers: + hidden_states = layer(hidden_states, rotary_pos_emb, temb) + + hidden_states = self.final_norm(hidden_states, c).type_as(hidden_states) + + patches = self.final_linear(hidden_states)[:, :N_img, :] + output = ( + patches.view(B, Hp, Wp, p, p, self.out_channels) + .permute(0, 5, 1, 3, 2, 4) + .contiguous() + .view(B, self.out_channels, H, W) + ) + + return output diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py index 824daf5e6..6d0aed827 100644 --- a/comfy/ldm/flux/math.py +++ b/comfy/ldm/flux/math.py @@ -16,7 +16,7 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transforme def rope(pos: Tensor, dim: int, theta: int) -> Tensor: assert dim % 2 == 0 - if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled(): + if not comfy.model_management.supports_fp64(pos.device): device = torch.device("cpu") else: device = pos.device diff --git a/comfy/ldm/hidream_o1/attention.py b/comfy/ldm/hidream_o1/attention.py new file mode 100644 index 000000000..1b68f1771 --- /dev/null +++ b/comfy/ldm/hidream_o1/attention.py @@ -0,0 +1,41 @@ +"""HiDream-O1 two-pass attention: tokens [0, ar_len) are causal, [ar_len, T) +attend full K/V. Splitting Q at the boundary avoids the (B, 1, T, T) additive +mask the general-purpose path would build (~500 MB at T~16K) and lets the +gen half hit the user's preferred backend via optimized_attention. +""" + +import torch + +import comfy.ops +from comfy.ldm.modules.attention import optimized_attention + + +def make_two_pass_attention(ar_len: int, transformer_options=None): + """Build a two-pass attention callable. AR pass uses SDPA-causal directly, gen pass routes through optimized_attention. + The AR pass goes through SDPA directand bypasses wrappers, it is only ~1% of T at typical edit sizes. + """ + + def two_pass_attention(q, k, v, heads, **kwargs): + B, H, T, D = q.shape + + if T < k.shape[2]: # KV-cache hot path: Q is shorter than K/V (cached AR prefix is in K/V only), all fresh Q positions are in the gen region, single full-attention call + out = optimized_attention(q, k, v, heads, mask=None, skip_reshape=True, skip_output_reshape=True, transformer_options=transformer_options) + elif ar_len >= T: + out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=0.0, is_causal=True) + elif ar_len <= 0: + out = optimized_attention(q, k, v, heads, mask=None, skip_reshape=True, skip_output_reshape=True, transformer_options=transformer_options) + else: + out_ar = comfy.ops.scaled_dot_product_attention( + q[:, :, :ar_len], k[:, :, :ar_len], v[:, :, :ar_len], + attn_mask=None, dropout_p=0.0, is_causal=True, + ) + out_gen = optimized_attention( + q[:, :, ar_len:], k, v, heads, + mask=None, skip_reshape=True, skip_output_reshape=True, + transformer_options=transformer_options, + ) + out = torch.cat([out_ar, out_gen], dim=2) + + return out.transpose(1, 2).reshape(B, T, H * D) + + return two_pass_attention diff --git a/comfy/ldm/hidream_o1/conditioning.py b/comfy/ldm/hidream_o1/conditioning.py new file mode 100644 index 000000000..7496f0035 --- /dev/null +++ b/comfy/ldm/hidream_o1/conditioning.py @@ -0,0 +1,230 @@ +"""HiDream-O1 conditioning prep — ref-image dual path + extra_conds assembly. + +Each ref image goes through two paths: a 32x32 patchified stream concatenated +to the noised target, and a Qwen3-VL ViT path producing tokens that scatter +into input_ids at <|image_pad|> positions. +""" + +from typing import List + +import torch + +import comfy.utils +from comfy.text_encoders.qwen_vl import process_qwen2vl_images + +from .utils import (PATCH_SIZE, calculate_dimensions, cond_image_size, ref_max_size, resize_tensor) + +# Qwen3-VL ViT preprocessing constants (preprocessor_config.json). +VIT_PATCH = 16 +VIT_MERGE = 2 +VIT_IMAGE_MEAN = [0.5, 0.5, 0.5] +VIT_IMAGE_STD = [0.5, 0.5, 0.5] + + +def prepare_ref_images( + ref_images: List[torch.Tensor], + target_h: int, + target_w: int, + device: torch.device, + dtype: torch.dtype, +): + """Build the dual-path tensors for K reference images at (target_h, target_w). + + Returns None for K=0, else a dict with ref_patches, ref_pixel_values, + ref_image_grid_thw, per_ref_vit_tokens, per_ref_patch_grids. + """ + K = len(ref_images) + if K == 0: + return None + max_size = ref_max_size(max(target_h, target_w), K) + cis = cond_image_size(K) + + refs_t = [img[0].clamp(0, 1).permute(2, 0, 1).unsqueeze(0).contiguous().float() for img in ref_images] + refs_t = [resize_tensor(t, max_size, PATCH_SIZE) for t in refs_t] + + # 32-patch path. + ref_patches_per = [] + per_ref_patch_grids = [] + for t in refs_t: + t_norm = (t.squeeze(0) - 0.5) / 0.5 # (3, H, W) in [-1, 1] + h_p, w_p = t_norm.shape[-2] // PATCH_SIZE, t_norm.shape[-1] // PATCH_SIZE + per_ref_patch_grids.append((h_p, w_p)) + patches = ( + t_norm.reshape(3, h_p, PATCH_SIZE, w_p, PATCH_SIZE) + .permute(1, 3, 0, 2, 4) + .reshape(h_p * w_p, 3 * PATCH_SIZE * PATCH_SIZE) + ) + ref_patches_per.append(patches) + ref_patches = torch.cat(ref_patches_per, dim=0).unsqueeze(0).to(device=device, dtype=dtype) + + # ViT path. + refs_vlm_t = [] + for t in refs_t: + _, _, h, w = t.shape + cond_w, cond_h = calculate_dimensions(cis, w / h) + cond_w = max(cond_w, VIT_PATCH * VIT_MERGE) + cond_h = max(cond_h, VIT_PATCH * VIT_MERGE) + refs_vlm_t.append(comfy.utils.common_upscale(t, cond_w, cond_h, "lanczos", "disabled")) + + pv_list, grid_list, per_ref_vit_tokens = [], [], [] + for t_v in refs_vlm_t: + pv, grid_thw = process_qwen2vl_images( + t_v.permute(0, 2, 3, 1), + min_pixels=0, max_pixels=10**12, + patch_size=VIT_PATCH, merge_size=VIT_MERGE, + image_mean=VIT_IMAGE_MEAN, image_std=VIT_IMAGE_STD, + ) + grid_thw = grid_thw[0] + pv_list.append(pv.to(device=device, dtype=dtype)) + grid_list.append(grid_thw.to(device=device)) + # Post-merge token count = number of <|image_pad|> tokens this image expands to in input_ids. + gh, gw = int(grid_thw[1].item()), int(grid_thw[2].item()) + per_ref_vit_tokens.append((gh // VIT_MERGE) * (gw // VIT_MERGE)) + + return { + "ref_patches": ref_patches, + "ref_pixel_values": torch.cat(pv_list, dim=0), + "ref_image_grid_thw": torch.stack(grid_list, dim=0), + "per_ref_vit_tokens": per_ref_vit_tokens, + "per_ref_patch_grids": per_ref_patch_grids, + } + + +def build_ref_input_ids( + text_input_ids: torch.Tensor, + per_ref_vit_tokens: List[int], + image_token_id: int, + vision_start_id: int, + vision_end_id: int, +): + """Splice [vision_start, image_pad*N, vision_end] blocks into input_ids + after the [im_start, user, \\n] prefix (matches original chat template). + """ + ids = text_input_ids[0].tolist() + inserted = [] + for n_pad in per_ref_vit_tokens: + inserted.extend([vision_start_id] + [image_token_id] * n_pad + [vision_end_id]) + new_ids = ids[:3] + inserted + ids[3:] # 3 = len([im_start, user, \n]) + return torch.tensor([new_ids], dtype=text_input_ids.dtype, device=text_input_ids.device) + + +def build_extra_conds( + text_input_ids: torch.Tensor, + noise: torch.Tensor, + ref_images: List[torch.Tensor] = None, + target_patch_size: int = 32, +): + """Assemble all conditioning tensors for HiDreamO1Transformer.forward: + input_ids (with ref-vision tokens spliced in for the edit/IP path), + position_ids (MRoPE), token_types, vinput_mask, plus the ref + dual-path tensors when refs are provided. + """ + from .utils import get_rope_index_fix_point + from comfy.text_encoders.hidream_o1 import ( + IMAGE_TOKEN_ID, VISION_START_ID, VISION_END_ID, + ) + + if text_input_ids.dim() == 1: + text_input_ids = text_input_ids.unsqueeze(0) + text_input_ids = text_input_ids.long().to(noise.device) + B = noise.shape[0] + if text_input_ids.shape[0] == 1 and B > 1: + text_input_ids = text_input_ids.expand(B, -1) + + H, W = noise.shape[-2], noise.shape[-1] + h_p, w_p = H // target_patch_size, W // target_patch_size + image_len = h_p * w_p + image_grid_thw_tgt = torch.tensor( + [[1, h_p, w_p]], dtype=torch.long, device=text_input_ids.device, + ) + + out = {} + if ref_images: + ref = prepare_ref_images(ref_images, H, W, device=noise.device, dtype=noise.dtype) + text_input_ids = build_ref_input_ids( + text_input_ids, ref["per_ref_vit_tokens"], + IMAGE_TOKEN_ID, VISION_START_ID, VISION_END_ID, + ) + new_txt_len = text_input_ids.shape[1] + + # Each ref's patchified stream gets a [vision_start, image_pad*N-1] + # block in the position-id stream after the noised target. + ref_grid_lengths = [hp * wp for (hp, wp) in ref["per_ref_patch_grids"]] + tgt_vision = torch.full((1, image_len), IMAGE_TOKEN_ID, + dtype=text_input_ids.dtype, device=text_input_ids.device) + tgt_vision[:, 0] = VISION_START_ID + ref_vision_blocks = [] + for rl in ref_grid_lengths: + blk = torch.full((1, rl), IMAGE_TOKEN_ID, + dtype=text_input_ids.dtype, device=text_input_ids.device) + blk[:, 0] = VISION_START_ID + ref_vision_blocks.append(blk) + ref_vision_cat = torch.cat([tgt_vision] + ref_vision_blocks, dim=1) + input_ids_pad = torch.cat([text_input_ids, ref_vision_cat], dim=-1) + total_ref_patches_len = sum(ref_grid_lengths) + total_len = new_txt_len + image_len + total_ref_patches_len + + # K (ViT, post-merge) + 1 (target) + K (ref-patches) image grids. + K = len(ref_images) + igthw_cond = ref["ref_image_grid_thw"].clone() + igthw_cond[:, 1] //= 2 + igthw_cond[:, 2] //= 2 + image_grid_thw_ref = torch.tensor( + [[1, hp, wp] for (hp, wp) in ref["per_ref_patch_grids"]], + dtype=torch.long, device=text_input_ids.device, + ) + igthw_all = torch.cat([ + igthw_cond.to(text_input_ids.device), + image_grid_thw_tgt, + image_grid_thw_ref, + ], dim=0) + position_ids, _ = get_rope_index_fix_point( + spatial_merge_size=1, + image_token_id=IMAGE_TOKEN_ID, + vision_start_token_id=VISION_START_ID, + input_ids=input_ids_pad, image_grid_thw=igthw_all, + attention_mask=None, + skip_vision_start_token=[0] * K + [1] + [1] * K, + fix_point=4096, + ) + + # tms + target_image + ref_patches are all gen. + tms_pos = new_txt_len - 1 + ar_len = tms_pos + token_types = torch.zeros(B, total_len, dtype=torch.long, device=noise.device) + token_types[:, tms_pos:] = 1 + vinput_mask = torch.zeros(B, total_len, dtype=torch.bool, device=noise.device) + vinput_mask[:, new_txt_len:] = True + + # Leading batch dim sidesteps CONDRegular.process_cond's repeat_to_batch_size truncation + out["ref_pixel_values"] = ref["ref_pixel_values"].unsqueeze(0) + out["ref_image_grid_thw"] = ref["ref_image_grid_thw"].unsqueeze(0) + out["ref_patches"] = ref["ref_patches"] + else: + # T2I: text + noised target only, vision_start replaces the first image token + txt_len = text_input_ids.shape[1] + total_len = txt_len + image_len + vision_tokens = torch.full((B, image_len), IMAGE_TOKEN_ID, + dtype=text_input_ids.dtype, device=text_input_ids.device) + vision_tokens[:, 0] = VISION_START_ID + input_ids_pad = torch.cat([text_input_ids, vision_tokens], dim=-1) + position_ids, _ = get_rope_index_fix_point( + spatial_merge_size=1, + image_token_id=IMAGE_TOKEN_ID, + vision_start_token_id=VISION_START_ID, + input_ids=input_ids_pad, image_grid_thw=image_grid_thw_tgt, + attention_mask=None, + skip_vision_start_token=[1], + ) + ar_len = txt_len - 1 + token_types = torch.zeros(B, total_len, dtype=torch.long, device=noise.device) + token_types[:, ar_len:] = 1 + vinput_mask = torch.zeros(B, total_len, dtype=torch.bool, device=noise.device) + vinput_mask[:, txt_len:] = True + + out["input_ids"] = text_input_ids + out["position_ids"] = position_ids[:, 0].unsqueeze(0) # Collapse position_ids batch and add a leading dim so CONDRegular's batch-resize doesn't truncate the 3-axis MRoPE dim + out["token_types"] = token_types + out["vinput_mask"] = vinput_mask + out["ar_len"] = ar_len + return out diff --git a/comfy/ldm/hidream_o1/model.py b/comfy/ldm/hidream_o1/model.py new file mode 100644 index 000000000..a223e706f --- /dev/null +++ b/comfy/ldm/hidream_o1/model.py @@ -0,0 +1,306 @@ +"""HiDream-O1-Image transformer. + +Pixel-space DiT built on Qwen3-VL: the vision tower (Qwen35VisionModel) +encodes ref images, the Qwen3-VL-8B decoder (Llama2_ with interleaved MRoPE) +processes a unified text+image sequence, and 32x32 patch embed/unembed +shims map raw RGB in and out of LLM hidden space. The Qwen3-VL deepstack +mergers go unused — their weights are dropped at load. +""" + +from dataclasses import dataclass, field +from typing import List, Optional + +import einops +import torch +import torch.nn as nn + +import comfy.patcher_extension +from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder +from comfy.text_encoders.llama import Llama2_ +from comfy.text_encoders.qwen35 import Qwen35VisionModel + +from .attention import make_two_pass_attention + + +IMAGE_TOKEN_ID = 151655 # Qwen3-VL <|image_pad|> +TMS_TOKEN_ID = 151673 # HiDream-O1 <|tms_token|> +PATCH_SIZE = 32 + + +@dataclass +class HiDreamO1TextConfig: + """Qwen3-VL-8B text-decoder dims (matches public Qwen3-VL-8B-Instruct).""" + vocab_size: int = 151936 + hidden_size: int = 4096 + intermediate_size: int = 12288 + num_hidden_layers: int = 36 + num_attention_heads: int = 32 + num_key_value_heads: int = 8 + head_dim: int = 128 + max_position_embeddings: int = 128000 + rms_norm_eps: float = 1e-6 + rope_theta: float = 5000000.0 + rope_scale: Optional[float] = None + rope_dims: List[int] = field(default_factory=lambda: [24, 20, 20]) + interleaved_mrope: bool = True + transformer_type: str = "llama" + rms_norm_add: bool = False + mlp_activation: str = "silu" + qkv_bias: bool = False + q_norm: str = "gemma3" + k_norm: str = "gemma3" + final_norm: bool = True + lm_head: bool = False + stop_tokens: List[int] = field(default_factory=lambda: [151643, 151645]) + + +QWEN3VL_VISION_DEFAULTS = dict( + hidden_size=1152, + num_heads=16, + intermediate_size=4304, + depth=27, + patch_size=16, + temporal_patch_size=2, + in_channels=3, + spatial_merge_size=2, + num_position_embeddings=2304, + deepstack_visual_indexes=(8, 16, 24), + out_hidden_size=4096, # final merger projects directly into LLM hidden +) + + +class BottleneckPatchEmbed(nn.Module): + # 3072 -> 1024 -> 4096 (raw 32x32 RGB patch -> bottleneck -> LLM hidden). + def __init__(self, patch_size=32, in_chans=3, pca_dim=1024, embed_dim=4096, bias=True, device=None, dtype=None, ops=None): + super().__init__() + self.proj1 = ops.Linear(patch_size * patch_size * in_chans, pca_dim, bias=False, device=device, dtype=dtype) + self.proj2 = ops.Linear(pca_dim, embed_dim, bias=bias, device=device, dtype=dtype) + + def forward(self, x): + return self.proj2(self.proj1(x)) + + +class FinalLayer(nn.Module): + # 4096 -> 3072 (LLM hidden -> flat pixel patch). + def __init__(self, hidden_size, patch_size=32, out_channels=3, device=None, dtype=None, ops=None): + super().__init__() + self.linear = ops.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, device=device, dtype=dtype) + + def forward(self, x): + return self.linear(x) + + +class HiDreamO1Transformer(nn.Module): + """HiDream-O1 unified pixel-level transformer.""" + + def __init__(self, image_model=None, dtype=None, device=None, operations=None, + text_config_overrides=None, vision_config_overrides=None, **kwargs): + super().__init__() + self.dtype = dtype + + text_cfg = HiDreamO1TextConfig(**(text_config_overrides or {})) + vision_cfg = dict(QWEN3VL_VISION_DEFAULTS) + if vision_config_overrides: + vision_cfg.update(vision_config_overrides) + vision_cfg["out_hidden_size"] = text_cfg.hidden_size + + self.text_config = text_cfg + self.vision_config = vision_cfg + self.hidden_size = text_cfg.hidden_size + self.patch_size = PATCH_SIZE + self.in_channels = 3 + self.tms_token_id = TMS_TOKEN_ID + + self.visual = Qwen35VisionModel(vision_cfg, device=device, dtype=dtype, ops=operations) + self.language_model = Llama2_(text_cfg, device=device, dtype=dtype, ops=operations) + self.t_embedder1 = TimestepEmbedder( + text_cfg.hidden_size, device=device, dtype=dtype, operations=operations, + ) + self.x_embedder = BottleneckPatchEmbed( + patch_size=self.patch_size, in_chans=self.in_channels, + pca_dim=text_cfg.hidden_size // 4, embed_dim=text_cfg.hidden_size, + bias=True, device=device, dtype=dtype, ops=operations, + ) + self.final_layer2 = FinalLayer( + text_cfg.hidden_size, patch_size=self.patch_size, + out_channels=self.in_channels, device=device, dtype=dtype, ops=operations, + ) + + self._visual_cache = None + self._kv_cache_entries = [] + + def clear_kv_cache(self): + self._kv_cache_entries = [] + self._visual_cache = None + + def forward(self, x, timesteps, context=None, transformer_options={}, **kwargs): + return comfy.patcher_extension.WrapperExecutor.new_class_executor( + self._forward, + self, + comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options) + ).execute(x, timesteps, context, transformer_options, **kwargs) + + def _forward(self, x, timesteps, context=None, transformer_options={}, input_ids=None, attention_mask=None, position_ids=None, + vinput_mask=None, ar_len=None, ref_pixel_values=None, ref_image_grid_thw=None, ref_patches=None, **kwargs): + """Returns flow-match velocity (x - x_pred) / sigma""" + + if input_ids is None or position_ids is None: + raise ValueError("HiDreamO1Transformer requires input_ids and position_ids in conditioning") + + B, _, H, W = x.shape + h_p, w_p = H // self.patch_size, W // self.patch_size + tgt_image_len = h_p * w_p + + z = einops.rearrange( + x, 'B C (H p1) (W p2) -> B (H W) (C p1 p2)', + p1=self.patch_size, p2=self.patch_size, + ) + vinputs = torch.cat([z, ref_patches.to(z.dtype)], dim=1) if ref_patches is not None else z + + inputs_embeds = self.language_model.embed_tokens(input_ids).to(x.dtype) + + if ref_pixel_values is not None and ref_image_grid_thw is not None: + # ViT output is constant across sampling steps within a generation + # identity-key by the input tensor so refs don't recompute every step. + cached = self._visual_cache + if cached is not None and cached[0] is ref_pixel_values: + image_embeds = cached[1] + else: + ref_pv = ref_pixel_values.to(inputs_embeds.device) + ref_grid = ref_image_grid_thw.to(inputs_embeds.device).long() + # extra_conds wraps with a leading batch dim; refs are model-level so [0] always recovers them. + if ref_pv.dim() == 3: + ref_pv = ref_pv[0] + if ref_grid.dim() == 3: + ref_grid = ref_grid[0] + image_embeds = self.visual(ref_pv, ref_grid).to(inputs_embeds.dtype) + self._visual_cache = (ref_pixel_values, image_embeds) + # image_pad positions identical across batch (input_ids shared cond/uncond). + image_idx = (input_ids[0] == IMAGE_TOKEN_ID).nonzero(as_tuple=True)[0] + if image_idx.shape[0] != image_embeds.shape[0]: + raise ValueError( + f"Image-token count {image_idx.shape[0]} != ViT output count " + f"{image_embeds.shape[0]}; check tokenizer/processor alignment." + ) + inputs_embeds[:, image_idx] = image_embeds.unsqueeze(0).expand(B, -1, -1) + + sigma = timesteps.float() / 1000.0 + t_pixeldit = 1.0 - sigma + t_emb = self.t_embedder1(t_pixeldit * 1000, inputs_embeds.dtype) + tms_mask_3d = (input_ids == self.tms_token_id).unsqueeze(-1).expand_as(inputs_embeds) + inputs_embeds = torch.where(tms_mask_3d, t_emb.unsqueeze(1).expand_as(inputs_embeds), inputs_embeds) + + vinputs_embedded = self.x_embedder(vinputs.to(inputs_embeds.dtype)) + inputs_embeds = torch.cat([inputs_embeds, vinputs_embedded], dim=1) + + # extra_conds stores position_ids as (1, 3, T); process_cond repeats dim 0 to B. Take row 0. + freqs_cis = self.language_model.compute_freqs_cis(position_ids[0].to(x.device), x.device) + freqs_cis = tuple(t.to(x.dtype) for t in freqs_cis) + + two_pass_attn = make_two_pass_attention(ar_len, transformer_options=transformer_options) + patches_replace = transformer_options.get("patches_replace", {}) + blocks_replace = patches_replace.get("dit", {}) + transformer_options["total_blocks"] = len(self.language_model.layers) + transformer_options["block_type"] = "double" + + # Cache prefix K/V across steps. Key includes input_ids (prompt), ref_id + # (refs scatter into inputs_embeds), and position_ids (RoPE baked into cached K). + can_cache = not blocks_replace and ar_len > 0 + cache_len = ar_len if can_cache else 0 + ref_id = id(ref_pixel_values) if ref_pixel_values is not None else None + pos_ids_key = position_ids[..., :cache_len] if can_cache else position_ids + cache_entries = self._kv_cache_entries + # Drop stale entries from a previous device (model was unloaded and reloaded). + if cache_entries and cache_entries[0]["input_ids"].device != input_ids.device: + cache_entries = [] + self._kv_cache_entries = [] + kv_cache = None + if can_cache: + for entry in cache_entries: + ck = entry["input_ids"] + ep = entry["position_ids"] + if (entry["cache_len"] == cache_len + and ck.shape == input_ids.shape and torch.equal(ck, input_ids) + and entry["ref_id"] == ref_id + and ep.shape == pos_ids_key.shape and torch.equal(ep, pos_ids_key)): + kv_cache = entry + break + + if kv_cache is not None: + # Hot path: project Q/K/V only for fresh positions; past_key_value prepends cached AR K/V. + hidden_states = inputs_embeds[:, cache_len:] + sliced_freqs = tuple(t[..., cache_len:, :] for t in freqs_cis) + for i, layer in enumerate(self.language_model.layers): + transformer_options["block_index"] = i + K_i, V_i = kv_cache["kv"][i] + hidden_states, _ = layer( + x=hidden_states, attention_mask=None, freqs_cis=sliced_freqs, optimized_attention=two_pass_attn, + past_key_value=(K_i, V_i, cache_len), + ) + else: + # Cold path: run full sequence; if cacheable, snapshot K/V at AR positions. + snapshots = [] if can_cache else None + past_kv_cold = () if can_cache else None + hidden_states = inputs_embeds + for i, layer in enumerate(self.language_model.layers): + transformer_options["block_index"] = i + if ("double_block", i) in blocks_replace: + def block_wrap(args, _layer=layer): + out = {} + out["x"], _ = _layer( + x=args["x"], attention_mask=args.get("attention_mask"), + freqs_cis=args["freqs_cis"], optimized_attention=args["optimized_attention"], + past_key_value=None, + ) + return out + out = blocks_replace[("double_block", i)]( + {"x": hidden_states, "attention_mask": None, + "freqs_cis": freqs_cis, "optimized_attention": two_pass_attn, + "transformer_options": transformer_options}, + {"original_block": block_wrap}, + ) + hidden_states = out["x"] + else: + hidden_states, present_kv = layer( + x=hidden_states, attention_mask=None, + freqs_cis=freqs_cis, optimized_attention=two_pass_attn, + past_key_value=past_kv_cold, + ) + if snapshots is not None: + K, V, _ = present_kv + snapshots.append((K[:, :, :cache_len].contiguous(), + V[:, :, :cache_len].contiguous())) + if snapshots is not None: + # Cap at 2 entries (cond + uncond). Multi-cond workflows LRU-evict. + new_entry = { + "input_ids": input_ids.clone(), + "cache_len": cache_len, + "kv": snapshots, + "ref_id": ref_id, + "position_ids": pos_ids_key.clone(), + } + self._kv_cache_entries = (cache_entries + [new_entry])[-2:] + + if self.language_model.norm is not None: + hidden_states = self.language_model.norm(hidden_states) + + # Slice target-image positions before the final projection so the Linear only runs on tgt_image_len tokens. + # In the hot path hidden_states starts at original position cache_len, so masks/indices shift by cache_len. + sliced_offset = cache_len if kv_cache is not None else 0 + if vinput_mask is not None: + vmask = vinput_mask.to(x.device).bool() + if sliced_offset > 0: + vmask = vmask[:, sliced_offset:] + target_hidden = hidden_states[vmask].view(B, -1, hidden_states.shape[-1])[:, :tgt_image_len] + else: + txt_seq_len = input_ids.shape[1] + start = txt_seq_len - sliced_offset + target_hidden = hidden_states[:, start:start + tgt_image_len] + x_pred_tgt = self.final_layer2(target_hidden) + + # fp32 final subtraction, bf16 here noticeably degrades samples. + x_pred_img = einops.rearrange( + x_pred_tgt, 'B (H W) (C p1 p2) -> B C (H p1) (W p2)', + H=h_p, W=w_p, p1=self.patch_size, p2=self.patch_size, + ) + return (x.float() - x_pred_img.float()) / sigma.view(B, 1, 1, 1).clamp_min(1e-3) diff --git a/comfy/ldm/hidream_o1/utils.py b/comfy/ldm/hidream_o1/utils.py new file mode 100644 index 000000000..5a1249c72 --- /dev/null +++ b/comfy/ldm/hidream_o1/utils.py @@ -0,0 +1,173 @@ +"""HiDream-O1 input-prep helpers: image/resolution math and unified-sequence +RoPE position-id assembly. The fix_point offset in get_rope_index_fix_point +lets the target image and patchified ref images share spatial RoPE positions +despite living at different sequence indices — same 2D image plane. +""" + +import math +from typing import Optional + +import torch + + +PATCH_SIZE = 32 +CONDITION_IMAGE_SIZE = 384 # ViT-side base size for ref images + + +def resize_tensor(img_t, image_size, patch_size=16): + """img_t: (1, 3, H, W) float [0, 1]. Fit to image_size**2 area, patch-aligned, center-cropped.""" + + while min(img_t.shape[-2], img_t.shape[-1]) >= 2 * image_size: # Pre-halves with 2x2 box averaging while the image is still very large + img_t = torch.nn.functional.avg_pool2d(img_t, kernel_size=2, stride=2) + + _, _, height, width = img_t.shape + m = patch_size + s_max = image_size * image_size + scale = math.sqrt(s_max / (width * height)) + + candidates = [ + (round(width * scale) // m * m, round(height * scale) // m * m), + (round(width * scale) // m * m, math.floor(height * scale) // m * m), + (math.floor(width * scale) // m * m, round(height * scale) // m * m), + (math.floor(width * scale) // m * m, math.floor(height * scale) // m * m), + ] + candidates = sorted(candidates, key=lambda x: x[0] * x[1], reverse=True) + new_size = candidates[-1] + for c in candidates: + if c[0] * c[1] <= s_max: + new_size = c + break + + new_w, new_h = new_size + s1 = width / new_w + s2 = height / new_h + if s1 < s2: + resize_w, resize_h = new_w, round(height / s1) + else: + resize_w, resize_h = round(width / s2), new_h + img_t = torch.nn.functional.interpolate(img_t, size=(resize_h, resize_w), mode="bicubic") + top = (resize_h - new_h) // 2 + left = (resize_w - new_w) // 2 + return img_t[..., top:top + new_h, left:left + new_w] + + +def calculate_dimensions(max_size, ratio): + """(W, H) for an aspect ratio fitting in max_size**2 area, 32-aligned.""" + width = math.sqrt(max_size * max_size * ratio) + height = width / ratio + width = int(width / 32) * 32 + height = int(height / 32) * 32 + return width, height + + +def ref_max_size(target_max_dim, k): + """K-dependent ref-image max dim before patchifying.""" + if k == 1: + return target_max_dim + if k == 2: + return target_max_dim * 48 // 64 + if k <= 4: + return target_max_dim // 2 + if k <= 8: + return target_max_dim * 24 // 64 + return target_max_dim // 4 + + +def cond_image_size(k): + """K-dependent ViT-side image size.""" + if k <= 4: + return CONDITION_IMAGE_SIZE + if k <= 8: + return CONDITION_IMAGE_SIZE * 48 // 64 + return CONDITION_IMAGE_SIZE // 2 + + +def get_rope_index_fix_point( + spatial_merge_size: int, + image_token_id: int, + vision_start_token_id: int, + input_ids: Optional[torch.LongTensor] = None, + image_grid_thw: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + skip_vision_start_token=None, + fix_point: int = 4096, +): + mrope_position_deltas = [] + if input_ids is not None and image_grid_thw is not None: + total_input_ids = input_ids + if attention_mask is None: + attention_mask = torch.ones_like(total_input_ids) + position_ids = torch.ones( + 3, input_ids.shape[0], input_ids.shape[1], + dtype=input_ids.dtype, device=input_ids.device, + ) + attention_mask = attention_mask.to(total_input_ids.device) + for i, input_ids_b in enumerate(total_input_ids): + fp = fix_point + image_index = 0 + input_ids_b = input_ids_b[attention_mask[i] == 1] + vision_start_indices = torch.argwhere(input_ids_b == vision_start_token_id).squeeze(1) + vision_tokens = input_ids_b[vision_start_indices + 1] + image_nums = (vision_tokens == image_token_id).sum() + input_tokens = input_ids_b.tolist() + llm_pos_ids_list = [] + st = 0 + remain_images = image_nums + for _ in range(image_nums): + if image_token_id in input_tokens and remain_images > 0: + ed = input_tokens.index(image_token_id, st) + else: + ed = len(input_tokens) + 1 + t = image_grid_thw[image_index][0] + h = image_grid_thw[image_index][1] + w = image_grid_thw[image_index][2] + image_index += 1 + remain_images -= 1 + llm_grid_t = t.item() + llm_grid_h = h.item() // spatial_merge_size + llm_grid_w = w.item() // spatial_merge_size + text_len = ed - st + text_len -= skip_vision_start_token[image_index - 1] + text_len = max(0, text_len) + st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0 + llm_pos_ids_list.append(torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + t_index = torch.arange(llm_grid_t).view(-1, 1).expand(-1, llm_grid_h * llm_grid_w).flatten() + h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(llm_grid_t, -1, llm_grid_w).flatten() + w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(llm_grid_t, llm_grid_h, -1).flatten() + + if skip_vision_start_token[image_index - 1]: + if fp > 0: + fp = fp - st_idx + llm_pos_ids_list.append(torch.stack([t_index, h_index, w_index]) + fp + st_idx) + fp = 0 + else: + llm_pos_ids_list.append(torch.stack([t_index, h_index, w_index]) + text_len + st_idx) + st = ed + llm_grid_t * llm_grid_h * llm_grid_w + + if st < len(input_tokens): + st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0 + text_len = len(input_tokens) - st + llm_pos_ids_list.append(torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1) + position_ids[..., i, attention_mask[i] == 1] = llm_positions.to(position_ids.device) + mrope_position_deltas.append(llm_positions.max() + 1 - len(total_input_ids[i])) + mrope_position_deltas = torch.tensor(mrope_position_deltas, device=input_ids.device).unsqueeze(1) + return position_ids, mrope_position_deltas + + if attention_mask is not None: + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + position_ids = position_ids.unsqueeze(0).expand(3, -1, -1).to(attention_mask.device) + max_position_ids = position_ids.max(0, keepdim=False)[0].max(-1, keepdim=True)[0] + mrope_position_deltas = max_position_ids + 1 - attention_mask.shape[-1] + else: + position_ids = ( + torch.arange(input_ids.shape[1], device=input_ids.device) + .view(1, 1, -1).expand(3, input_ids.shape[0], -1) + ) + mrope_position_deltas = torch.zeros( + [input_ids.shape[0], 1], device=input_ids.device, dtype=input_ids.dtype, + ) + return position_ids, mrope_position_deltas diff --git a/comfy/ldm/lightricks/av_model.py b/comfy/ldm/lightricks/av_model.py index 6f2ba41ef..bc09fb77e 100644 --- a/comfy/ldm/lightricks/av_model.py +++ b/comfy/ldm/lightricks/av_model.py @@ -16,31 +16,31 @@ from comfy.ldm.lightricks.model import ( from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector import comfy.ldm.common_dit +import comfy.model_prefetch class CompressedTimestep: """Store video timestep embeddings in compressed form using per-frame indexing.""" __slots__ = ('data', 'batch_size', 'num_frames', 'patches_per_frame', 'feature_dim') - def __init__(self, tensor: torch.Tensor, patches_per_frame: int): + def __init__(self, tensor: torch.Tensor, patches_per_frame: int, per_frame: bool = False): """ - tensor: [batch_size, num_tokens, feature_dim] tensor where num_tokens = num_frames * patches_per_frame - patches_per_frame: Number of spatial patches per frame (height * width in latent space), or None to disable compression + tensor: [batch, num_tokens, feature_dim] (per-token, default) or + [batch, num_frames, feature_dim] (per_frame=True, already compressed). + patches_per_frame: spatial patches per frame; pass None to disable compression. """ - self.batch_size, num_tokens, self.feature_dim = tensor.shape - - # Check if compression is valid (num_tokens must be divisible by patches_per_frame) - if patches_per_frame is not None and num_tokens % patches_per_frame == 0 and num_tokens >= patches_per_frame: + self.batch_size, n, self.feature_dim = tensor.shape + if per_frame: self.patches_per_frame = patches_per_frame - self.num_frames = num_tokens // patches_per_frame - - # Reshape to [batch, frames, patches_per_frame, feature_dim] and store one value per frame - # All patches in a frame are identical, so we only keep the first one - reshaped = tensor.view(self.batch_size, self.num_frames, patches_per_frame, self.feature_dim) - self.data = reshaped[:, :, 0, :].contiguous() # [batch, frames, feature_dim] + self.num_frames = n + self.data = tensor + elif patches_per_frame is not None and n >= patches_per_frame and n % patches_per_frame == 0: + self.patches_per_frame = patches_per_frame + self.num_frames = n // patches_per_frame + # All patches in a frame are identical — keep only the first. + self.data = tensor.view(self.batch_size, self.num_frames, patches_per_frame, self.feature_dim)[:, :, 0, :].contiguous() else: - # Not divisible or too small - store directly without compression self.patches_per_frame = 1 - self.num_frames = num_tokens + self.num_frames = n self.data = tensor def expand(self): @@ -715,32 +715,35 @@ class LTXAVModel(LTXVModel): def _prepare_timestep(self, timestep, batch_size, hidden_dtype, **kwargs): """Prepare timestep embeddings.""" - # TODO: some code reuse is needed here. grid_mask = kwargs.get("grid_mask", None) - if grid_mask is not None: - timestep = timestep[:, grid_mask] - - timestep_scaled = timestep * self.timestep_scale_multiplier - - v_timestep, v_embedded_timestep = self.adaln_single( - timestep_scaled.flatten(), - {"resolution": None, "aspect_ratio": None}, - batch_size=batch_size, - hidden_dtype=hidden_dtype, - ) - - # Calculate patches_per_frame from orig_shape: [batch, channels, frames, height, width] - # Video tokens are arranged as (frames * height * width), so patches_per_frame = height * width orig_shape = kwargs.get("orig_shape") has_spatial_mask = kwargs.get("has_spatial_mask", None) v_patches_per_frame = None if not has_spatial_mask and orig_shape is not None and len(orig_shape) == 5: - # orig_shape[3] = height, orig_shape[4] = width (in latent space) v_patches_per_frame = orig_shape[3] * orig_shape[4] - # Reshape to [batch_size, num_tokens, dim] and compress for storage - v_timestep = CompressedTimestep(v_timestep.view(batch_size, -1, v_timestep.shape[-1]), v_patches_per_frame) - v_embedded_timestep = CompressedTimestep(v_embedded_timestep.view(batch_size, -1, v_embedded_timestep.shape[-1]), v_patches_per_frame) + # Used by compute_prompt_timestep and the audio cross-attention paths. + timestep_scaled = (timestep[:, grid_mask] if grid_mask is not None else timestep) * self.timestep_scale_multiplier + + # When patches in a frame share a timestep (no spatial mask), project one row per frame instead of one per token + per_frame_path = v_patches_per_frame is not None and (timestep.numel() // batch_size) % v_patches_per_frame == 0 + if per_frame_path: + per_frame = timestep.reshape(batch_size, -1, v_patches_per_frame)[:, :, 0] + if grid_mask is not None: + # All-or-nothing per frame when has_spatial_mask=False. + per_frame = per_frame[:, grid_mask[::v_patches_per_frame]] + ts_input = per_frame * self.timestep_scale_multiplier + else: + ts_input = timestep_scaled + + v_timestep, v_embedded_timestep = self.adaln_single( + ts_input.flatten(), + {"resolution": None, "aspect_ratio": None}, + batch_size=batch_size, + hidden_dtype=hidden_dtype, + ) + v_timestep = CompressedTimestep(v_timestep.view(batch_size, -1, v_timestep.shape[-1]), v_patches_per_frame, per_frame=per_frame_path) + v_embedded_timestep = CompressedTimestep(v_embedded_timestep.view(batch_size, -1, v_embedded_timestep.shape[-1]), v_patches_per_frame, per_frame=per_frame_path) v_prompt_timestep = compute_prompt_timestep( self.prompt_adaln_single, timestep_scaled, batch_size, hidden_dtype @@ -907,9 +910,11 @@ class LTXAVModel(LTXVModel): """Process transformer blocks for LTXAV.""" patches_replace = transformer_options.get("patches_replace", {}) blocks_replace = patches_replace.get("dit", {}) + prefetch_queue = comfy.model_prefetch.make_prefetch_queue(list(self.transformer_blocks), vx.device, transformer_options) # Process transformer blocks for i, block in enumerate(self.transformer_blocks): + comfy.model_prefetch.prefetch_queue_pop(prefetch_queue, vx.device, block) if ("double_block", i) in blocks_replace: def block_wrap(args): @@ -982,6 +987,8 @@ class LTXAVModel(LTXVModel): a_prompt_timestep=a_prompt_timestep, ) + comfy.model_prefetch.prefetch_queue_pop(prefetch_queue, vx.device, None) + return [vx, ax] def _process_output(self, x, embedded_timestep, keyframe_idxs, **kwargs): diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py index bfbc08357..e0a4a0f9b 100644 --- a/comfy/ldm/lightricks/model.py +++ b/comfy/ldm/lightricks/model.py @@ -358,6 +358,61 @@ def apply_split_rotary_emb(input_tensor, cos, sin): return output.swapaxes(1, 2).reshape(B, T, -1) if needs_reshape else output +class GuideAttentionMask: + """Holds the two per-group masks for LTXV guide self-attention. + _attention_with_guide_mask splits queries into noisy and tracked-guide + groups, so the largest mask is (1, 1, tracked_count, T). + """ + __slots__ = ("guide_start", "tracked_count", "noisy_mask", "tracked_mask") + + def __init__(self, total_tokens, guide_start, tracked_count, tracked_weights): + device = tracked_weights.device + dtype = tracked_weights.dtype + finfo = torch.finfo(dtype) + + pos = tracked_weights > 0 + log_w = torch.full_like(tracked_weights, finfo.min) + log_w[pos] = torch.log(tracked_weights[pos].clamp(min=finfo.tiny)) + + self.guide_start = guide_start + self.tracked_count = tracked_count + + self.noisy_mask = torch.zeros((1, 1, 1, total_tokens), device=device, dtype=dtype) + self.noisy_mask[:, :, :, guide_start:guide_start + tracked_count] = log_w.view(1, 1, 1, -1) + + self.tracked_mask = torch.zeros((1, 1, tracked_count, total_tokens), device=device, dtype=dtype) + self.tracked_mask[:, :, :, :guide_start] = log_w.view(1, 1, -1, 1) + + +def _attention_with_guide_mask(q, k, v, heads, guide_mask, attn_precision, transformer_options): + """Apply the guide mask by partitioning Q into noisy and tracked-guide + groups, so each group needs only its own sub-mask. Avoids materializing + the (1,1,T,T) dense mask. + """ + guide_start = guide_mask.guide_start + tracked_end = guide_start + guide_mask.tracked_count + + out = torch.empty_like(q) + + if guide_start > 0: # In practice currently guides are always after noise, guard for safety if this changes. + out[:, :guide_start, :] = comfy.ldm.modules.attention.optimized_attention( + q[:, :guide_start, :], k, v, heads, mask=guide_mask.noisy_mask, + attn_precision=attn_precision, transformer_options=transformer_options, + low_precision_attention=False, # sageattn mask support is unreliable + ) + out[:, guide_start:tracked_end, :] = comfy.ldm.modules.attention.optimized_attention( + q[:, guide_start:tracked_end, :], k, v, heads, mask=guide_mask.tracked_mask, + attn_precision=attn_precision, transformer_options=transformer_options, + low_precision_attention=False, + ) + if tracked_end < q.shape[1]: # Every guide token is tracked, and nothing comes after them, guard for safety if this changes. + out[:, tracked_end:, :] = comfy.ldm.modules.attention.optimized_attention( + q[:, tracked_end:, :], k, v, heads, + attn_precision=attn_precision, transformer_options=transformer_options, + ) + return out + + class CrossAttention(nn.Module): def __init__( self, @@ -412,8 +467,10 @@ class CrossAttention(nn.Module): if mask is None: out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision, transformer_options=transformer_options) + elif isinstance(mask, GuideAttentionMask): + out = _attention_with_guide_mask(q, k, v, self.heads, mask, attn_precision=self.attn_precision, transformer_options=transformer_options) else: - out = comfy.ldm.modules.attention.optimized_attention_masked(q, k, v, self.heads, mask, attn_precision=self.attn_precision, transformer_options=transformer_options) + out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, mask=mask, attn_precision=self.attn_precision, transformer_options=transformer_options) # Apply per-head gating if enabled if self.to_gate_logits is not None: @@ -1063,7 +1120,9 @@ class LTXVModel(LTXBaseModel): additional_args["resolved_guide_entries"] = resolved_entries keyframe_idxs = keyframe_idxs[..., kf_grid_mask, :] - pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs + + if keyframe_idxs.shape[2] > 0: # Guard for the case of no keyframes surviving + pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs # Total surviving guide tokens (all guides) additional_args["num_guide_tokens"] = keyframe_idxs.shape[2] @@ -1099,12 +1158,12 @@ class LTXVModel(LTXBaseModel): if not resolved_entries: return None - # Check if any attenuation is actually needed - needs_attenuation = any( - e["strength"] < 1.0 or e.get("pixel_mask") is not None + # strength != 1.0 means we want to either attenuate (< 1) or amplify (> 1) guide attention. + needs_mask = any( + e["strength"] != 1.0 or e.get("pixel_mask") is not None for e in resolved_entries ) - if not needs_attenuation: + if not needs_mask: return None # Build per-guide-token weights for all tracked guide tokens. @@ -1159,16 +1218,11 @@ class LTXVModel(LTXBaseModel): # Concatenate per-token weights for all tracked guides tracked_weights = torch.cat(all_weights, dim=1) # (1, total_tracked) - # Check if any weight is actually < 1.0 (otherwise no attenuation needed) - if (tracked_weights >= 1.0).all(): + # Skip when every weight is exactly 1.0 (additive bias would be 0). + if (tracked_weights == 1.0).all(): return None - # Build the mask: guide tokens are at the end of the sequence. - # Tracked guides come first (in order), untracked follow. - return self._build_self_attention_mask( - total_tokens, num_guide_tokens, total_tracked, - tracked_weights, guide_start, device, dtype, - ) + return GuideAttentionMask(total_tokens, guide_start, total_tracked, tracked_weights) @staticmethod def _downsample_mask_to_latent(mask, f_lat, h_lat, w_lat): @@ -1234,45 +1288,6 @@ class LTXVModel(LTXBaseModel): return rearrange(latent_mask, "b 1 f h w -> b (f h w)") - @staticmethod - def _build_self_attention_mask(total_tokens, num_guide_tokens, tracked_count, - tracked_weights, guide_start, device, dtype): - """Build a log-space additive self-attention bias mask. - - Attenuates attention between noisy tokens and tracked guide tokens. - Untracked guide tokens (at the end of the guide portion) keep full attention. - - Args: - total_tokens: Total sequence length. - num_guide_tokens: Total guide tokens (all guides) at end of sequence. - tracked_count: Number of tracked guide tokens (first in the guide portion). - tracked_weights: (1, tracked_count) tensor, values in [0, 1]. - guide_start: Index where guide tokens begin in the sequence. - device: Target device. - dtype: Target dtype. - - Returns: - (1, 1, total_tokens, total_tokens) additive bias mask. - 0.0 = full attention, negative = attenuated, finfo.min = effectively fully masked. - """ - finfo = torch.finfo(dtype) - mask = torch.zeros((1, 1, total_tokens, total_tokens), device=device, dtype=dtype) - tracked_end = guide_start + tracked_count - - # Convert weights to log-space bias - w = tracked_weights.to(device=device, dtype=dtype) # (1, tracked_count) - log_w = torch.full_like(w, finfo.min) - positive_mask = w > 0 - if positive_mask.any(): - log_w[positive_mask] = torch.log(w[positive_mask].clamp(min=finfo.tiny)) - - # noisy → tracked guides: each noisy row gets the same per-guide weight - mask[:, :, :guide_start, guide_start:tracked_end] = log_w.view(1, 1, 1, -1) - # tracked guides → noisy: each guide row broadcasts its weight across noisy cols - mask[:, :, guide_start:tracked_end, :guide_start] = log_w.view(1, 1, -1, 1) - - return mask - def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs): """Process transformer blocks for LTXV.""" patches_replace = transformer_options.get("patches_replace", {}) diff --git a/comfy/ldm/lightricks/vae/audio_vae.py b/comfy/ldm/lightricks/vae/audio_vae.py index fa0a00748..dd5320c8f 100644 --- a/comfy/ldm/lightricks/vae/audio_vae.py +++ b/comfy/ldm/lightricks/vae/audio_vae.py @@ -4,9 +4,6 @@ import math import torch import torchaudio -import comfy.model_management -import comfy.model_patcher -import comfy.utils as utils from comfy.ldm.mmaudio.vae.distributions import DiagonalGaussianDistribution from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier from comfy.ldm.lightricks.vae.causal_audio_autoencoder import ( @@ -43,30 +40,6 @@ class AudioVAEComponentConfig: return cls(autoencoder=audio_config, vocoder=vocoder_config) - -class ModelDeviceManager: - """Manages device placement and GPU residency for the composed model.""" - - def __init__(self, module: torch.nn.Module): - load_device = comfy.model_management.get_torch_device() - offload_device = comfy.model_management.vae_offload_device() - self.patcher = comfy.model_patcher.ModelPatcher(module, load_device, offload_device) - - def ensure_model_loaded(self) -> None: - comfy.model_management.free_memory( - self.patcher.model_size(), - self.patcher.load_device, - ) - comfy.model_management.load_model_gpu(self.patcher) - - def move_to_load_device(self, tensor: torch.Tensor) -> torch.Tensor: - return tensor.to(self.patcher.load_device) - - @property - def load_device(self): - return self.patcher.load_device - - class AudioLatentNormalizer: """Applies per-channel statistics in patch space and restores original layout.""" @@ -132,23 +105,17 @@ class AudioPreprocessor: class AudioVAE(torch.nn.Module): """High-level Audio VAE wrapper exposing encode and decode entry points.""" - def __init__(self, state_dict: dict, metadata: dict): + def __init__(self, metadata: dict): super().__init__() component_config = AudioVAEComponentConfig.from_metadata(metadata) - vae_sd = utils.state_dict_prefix_replace(state_dict, {"audio_vae.": ""}, filter_keys=True) - vocoder_sd = utils.state_dict_prefix_replace(state_dict, {"vocoder.": ""}, filter_keys=True) - self.autoencoder = CausalAudioAutoencoder(config=component_config.autoencoder) if "bwe" in component_config.vocoder: self.vocoder = VocoderWithBWE(config=component_config.vocoder) else: self.vocoder = Vocoder(config=component_config.vocoder) - self.autoencoder.load_state_dict(vae_sd, strict=False) - self.vocoder.load_state_dict(vocoder_sd, strict=False) - autoencoder_config = self.autoencoder.get_config() self.normalizer = AudioLatentNormalizer( AudioPatchifier( @@ -168,18 +135,12 @@ class AudioVAE(torch.nn.Module): n_fft=autoencoder_config["n_fft"], ) - self.device_manager = ModelDeviceManager(self) - - def encode(self, audio: dict) -> torch.Tensor: + def encode(self, audio, sample_rate=44100) -> torch.Tensor: """Encode a waveform dictionary into normalized latent tensors.""" - waveform = audio["waveform"] - waveform_sample_rate = audio["sample_rate"] + waveform = audio + waveform_sample_rate = sample_rate input_device = waveform.device - # Ensure that Audio VAE is loaded on the correct device. - self.device_manager.ensure_model_loaded() - - waveform = self.device_manager.move_to_load_device(waveform) expected_channels = self.autoencoder.encoder.in_channels if waveform.shape[1] != expected_channels: if waveform.shape[1] == 1: @@ -190,7 +151,7 @@ class AudioVAE(torch.nn.Module): ) mel_spec = self.preprocessor.waveform_to_mel( - waveform, waveform_sample_rate, device=self.device_manager.load_device + waveform, waveform_sample_rate, device=waveform.device ) latents = self.autoencoder.encode(mel_spec) @@ -204,17 +165,13 @@ class AudioVAE(torch.nn.Module): """Decode normalized latent tensors into an audio waveform.""" original_shape = latents.shape - # Ensure that Audio VAE is loaded on the correct device. - self.device_manager.ensure_model_loaded() - - latents = self.device_manager.move_to_load_device(latents) latents = self.normalizer.denormalize(latents) target_shape = self.target_shape_from_latents(original_shape) mel_spec = self.autoencoder.decode(latents, target_shape=target_shape) waveform = self.run_vocoder(mel_spec) - return self.device_manager.move_to_load_device(waveform) + return waveform def target_shape_from_latents(self, latents_shape): batch, _, time, _ = latents_shape diff --git a/comfy/ldm/models/autoencoder.py b/comfy/ldm/models/autoencoder.py index 4f50810dc..582b44e69 100644 --- a/comfy/ldm/models/autoencoder.py +++ b/comfy/ldm/models/autoencoder.py @@ -155,6 +155,7 @@ class AutoencodingEngineLegacy(AutoencodingEngine): def __init__(self, embed_dim: int, **kwargs): self.max_batch_size = kwargs.pop("max_batch_size", None) ddconfig = kwargs.pop("ddconfig") + decoder_ddconfig = kwargs.pop("decoder_ddconfig", ddconfig) super().__init__( encoder_config={ "target": "comfy.ldm.modules.diffusionmodules.model.Encoder", @@ -162,7 +163,7 @@ class AutoencodingEngineLegacy(AutoencodingEngine): }, decoder_config={ "target": "comfy.ldm.modules.diffusionmodules.model.Decoder", - "params": ddconfig, + "params": decoder_ddconfig, }, **kwargs, ) diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index b193fe5e8..a68cb8439 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -14,6 +14,8 @@ from .sub_quadratic_attention import efficient_dot_product_attention from comfy import model_management +TORCH_HAS_GQA = model_management.torch_version_numeric >= (2, 5) + if model_management.xformers_enabled(): import xformers import xformers.ops @@ -150,7 +152,12 @@ def attention_basic(q, k, v, heads, mask=None, attn_precision=None, skip_reshape b, _, dim_head = q.shape dim_head //= heads - scale = dim_head ** -0.5 + if kwargs.get("enable_gqa", False) and q.shape[-3] != k.shape[-3]: + n_rep = q.shape[-3] // k.shape[-3] + k = k.repeat_interleave(n_rep, dim=-3) + v = v.repeat_interleave(n_rep, dim=-3) + + scale = kwargs.get("scale", dim_head ** -0.5) h = heads if skip_reshape: @@ -219,6 +226,10 @@ def attention_sub_quad(query, key, value, heads, mask=None, attn_precision=None, b, _, dim_head = query.shape dim_head //= heads + if "scale" in kwargs: + # Pre-scale query to match requested scale (cancels internal 1/sqrt(dim_head)) + query = query * (kwargs["scale"] * dim_head ** 0.5) + if skip_reshape: query = query.reshape(b * heads, -1, dim_head) value = value.reshape(b * heads, -1, dim_head) @@ -290,7 +301,7 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape b, _, dim_head = q.shape dim_head //= heads - scale = dim_head ** -0.5 + scale = kwargs.get("scale", dim_head ** -0.5) if skip_reshape: q, k, v = map( @@ -500,8 +511,13 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha if mask.ndim == 3: mask = mask.unsqueeze(1) + # Pass through extra SDPA kwargs (scale, enable_gqa) if provided + # enable_gqa requires PyTorch 2.5+; older versions use manual KV expansion above + sdpa_keys = ("scale", "enable_gqa") if TORCH_HAS_GQA else ("scale",) + sdpa_extra = {k: v for k, v in kwargs.items() if k in sdpa_keys} + if SDP_BATCH_LIMIT >= b: - out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) + out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False, **sdpa_extra) if not skip_output_reshape: out = ( out.transpose(1, 2).reshape(b, -1, heads * dim_head) @@ -519,7 +535,7 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha k[i : i + SDP_BATCH_LIMIT], v[i : i + SDP_BATCH_LIMIT], attn_mask=m, - dropout_p=0.0, is_causal=False + dropout_p=0.0, is_causal=False, **sdpa_extra ).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head) return out diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py index 295310df6..4b92c44cf 100644 --- a/comfy/ldm/modules/diffusionmodules/openaimodel.py +++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py @@ -34,6 +34,16 @@ class TimestepBlock(nn.Module): #This is needed because accelerate makes a copy of transformer_options which breaks "transformer_index" def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, output_shape=None, time_context=None, num_video_frames=None, image_only_indicator=None): for layer in ts: + if "patches" in transformer_options and "forward_timestep_embed_patch" in transformer_options["patches"]: + found_patched = False + for class_type, handler in transformer_options["patches"]["forward_timestep_embed_patch"]: + if isinstance(layer, class_type): + x = handler(layer, x, emb, context, transformer_options, output_shape, time_context, num_video_frames, image_only_indicator) + found_patched = True + break + if found_patched: + continue + if isinstance(layer, VideoResBlock): x = layer(x, emb, num_video_frames, image_only_indicator) elif isinstance(layer, TimestepBlock): @@ -49,15 +59,6 @@ def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, out elif isinstance(layer, Upsample): x = layer(x, output_shape=output_shape) else: - if "patches" in transformer_options and "forward_timestep_embed_patch" in transformer_options["patches"]: - found_patched = False - for class_type, handler in transformer_options["patches"]["forward_timestep_embed_patch"]: - if isinstance(layer, class_type): - x = handler(layer, x, emb, context, transformer_options, output_shape, time_context, num_video_frames, image_only_indicator) - found_patched = True - break - if found_patched: - continue x = layer(x) return x @@ -894,6 +895,12 @@ class UNetModel(nn.Module): h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options, time_context=time_context, num_video_frames=num_video_frames, image_only_indicator=image_only_indicator) h = apply_control(h, control, 'middle') + if "middle_block_after_patch" in transformer_patches: + patch = transformer_patches["middle_block_after_patch"] + for p in patch: + out = p({"h": h, "x": x, "emb": emb, "context": context, "y": y, + "timesteps": timesteps, "transformer_options": transformer_options}) + h = out["h"] for id, module in enumerate(self.output_blocks): transformer_options["block"] = ("output", id) @@ -905,8 +912,9 @@ class UNetModel(nn.Module): for p in patch: h, hsp = p(h, hsp, transformer_options) - h = th.cat([h, hsp], dim=1) - del hsp + if hsp is not None: + h = th.cat([h, hsp], dim=1) + del hsp if len(hs) > 0: output_shape = hs[-1].shape else: diff --git a/comfy/ldm/modules/diffusionmodules/util.py b/comfy/ldm/modules/diffusionmodules/util.py index 233011dc9..aed5c149c 100644 --- a/comfy/ldm/modules/diffusionmodules/util.py +++ b/comfy/ldm/modules/diffusionmodules/util.py @@ -140,7 +140,7 @@ def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): alphas = alphacums[ddim_timesteps] alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) - # according the the formula provided in https://arxiv.org/abs/2010.02502 + # according to the formula provided in https://arxiv.org/abs/2010.02502 sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)) if verbose: logging.info(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}') diff --git a/comfy/ldm/modules/encoders/noise_aug_modules.py b/comfy/ldm/modules/encoders/noise_aug_modules.py index a5d866030..c853e4298 100644 --- a/comfy/ldm/modules/encoders/noise_aug_modules.py +++ b/comfy/ldm/modules/encoders/noise_aug_modules.py @@ -3,12 +3,9 @@ from ..diffusionmodules.openaimodel import Timestep import torch class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): - def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): + def __init__(self, *args, timestep_dim=256, **kwargs): super().__init__(*args, **kwargs) - if clip_stats_path is None: - clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) - else: - clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") + clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) self.register_buffer("data_mean", clip_mean[None, :], persistent=False) self.register_buffer("data_std", clip_std[None, :], persistent=False) self.time_embed = Timestep(timestep_dim) diff --git a/comfy/ldm/modules/sdpose.py b/comfy/ldm/modules/sdpose.py index d67b60b76..1a9585fc2 100644 --- a/comfy/ldm/modules/sdpose.py +++ b/comfy/ldm/modules/sdpose.py @@ -90,7 +90,7 @@ class HeatmapHead(torch.nn.Module): origin_max = np.max(hm[k]) dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32) dr[border:-border, border:-border] = hm[k].copy() - dr = gaussian_filter(dr, sigma=2.0) + dr = gaussian_filter(dr, sigma=2.0, truncate=2.5) hm[k] = dr[border:-border, border:-border].copy() cur_max = np.max(hm[k]) if cur_max > 0: diff --git a/comfy/ldm/moge/geometry.py b/comfy/ldm/moge/geometry.py new file mode 100644 index 000000000..7fdc97871 --- /dev/null +++ b/comfy/ldm/moge/geometry.py @@ -0,0 +1,189 @@ +"""Pure-torch + scipy geometry helpers for MoGe inference and mesh export.""" + +from __future__ import annotations + +from typing import Optional, Tuple + +import numpy as np +import torch +import torch.nn.functional as F + +from scipy.optimize import least_squares + +def normalized_view_plane_uv(width: int, height: int, aspect_ratio: Optional[float] = None, + dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None) -> torch.Tensor: + """Normalized view-plane UV coordinates with corners at +/-(W, H)/diagonal.""" + if aspect_ratio is None: + aspect_ratio = width / height + span_x = aspect_ratio / (1 + aspect_ratio ** 2) ** 0.5 + span_y = 1.0 / (1 + aspect_ratio ** 2) ** 0.5 + u = torch.linspace(-span_x * (width - 1) / width, span_x * (width - 1) / width, width, dtype=dtype, device=device) + v = torch.linspace(-span_y * (height - 1) / height, span_y * (height - 1) / height, height, dtype=dtype, device=device) + u, v = torch.meshgrid(u, v, indexing="xy") + return torch.stack([u, v], dim=-1) + + +def intrinsics_from_focal_center(fx: torch.Tensor, fy: torch.Tensor, cx: torch.Tensor, cy: torch.Tensor) -> torch.Tensor: + """Assemble (..., 3, 3) intrinsics from broadcastable fx, fy, cx, cy.""" + fx, fy, cx, cy = [torch.as_tensor(v) for v in (fx, fy, cx, cy)] + fx, fy, cx, cy = torch.broadcast_tensors(fx, fy, cx, cy) + zero = torch.zeros_like(fx) + one = torch.ones_like(fx) + return torch.stack([ + torch.stack([fx, zero, cx], dim=-1), + torch.stack([zero, fy, cy], dim=-1), + torch.stack([zero, zero, one], dim=-1), + ], dim=-2) + + +def depth_map_to_point_map(depth: torch.Tensor, intrinsics: torch.Tensor) -> torch.Tensor: + """Back-project a (..., H, W) depth map through K^-1 to (..., H, W, 3) camera-space points. + + Intrinsics use normalized image coords (x in [0, 1] left->right, y in [0, 1] top->bottom). + """ + H, W = depth.shape[-2:] + device, dtype = depth.device, depth.dtype + u = (torch.arange(W, dtype=dtype, device=device) + 0.5) / W + v = (torch.arange(H, dtype=dtype, device=device) + 0.5) / H + grid_v, grid_u = torch.meshgrid(v, u, indexing="ij") + pix = torch.stack([grid_u, grid_v, torch.ones_like(grid_u)], dim=-1) + K_inv = torch.linalg.inv(intrinsics) + rays = torch.einsum("...ij,hwj->...hwi", K_inv, pix) + return rays * depth.unsqueeze(-1) + + +def _solve_optimal_shift(uv: np.ndarray, xyz: np.ndarray, + focal: Optional[float] = None) -> Tuple[float, float]: + """LM-solve for z-shift; when focal is None, also recovers the optimal focal.""" + uv = uv.reshape(-1, 2) + xy = xyz[..., :2].reshape(-1, 2) + z = xyz[..., 2].reshape(-1) + + def fn(shift): + xy_proj = xy / (z + shift)[:, None] + f = focal if focal is not None else (xy_proj * uv).sum() / np.square(xy_proj).sum() + return (f * xy_proj - uv).ravel() + + sol = least_squares(fn, x0=0.0, ftol=1e-3, method="lm") + shift = float(np.asarray(sol["x"]).squeeze()) + if focal is None: + xy_proj = xy / (z + shift)[:, None] + focal = float((xy_proj * uv).sum() / np.square(xy_proj).sum()) + return shift, focal + + +def recover_focal_shift(points: torch.Tensor, mask: Optional[torch.Tensor] = None, + focal: Optional[torch.Tensor] = None, downsample_size: Tuple[int, int] = (64, 64) + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Recover the focal length and z-shift that turn points into a metric point map. + + Optical center is at the image center; returned focal is relative to half the image diagonal. + Returns (focal, shift) on the same device/dtype as points. + """ + shape = points.shape + H, W = shape[-3], shape[-2] + points_b = points.reshape(-1, H, W, 3) + mask_b = None if mask is None else mask.reshape(-1, H, W) + focal_b = None if focal is None else focal.reshape(-1) + + uv = normalized_view_plane_uv(W, H, dtype=points.dtype, device=points.device) + + points_lr = F.interpolate(points_b.permute(0, 3, 1, 2), downsample_size, mode="nearest").permute(0, 2, 3, 1) + uv_lr = F.interpolate(uv.unsqueeze(0).permute(0, 3, 1, 2), downsample_size, mode="nearest").squeeze(0).permute(1, 2, 0) + mask_lr = None + if mask_b is not None: + mask_lr = F.interpolate(mask_b.to(torch.float32).unsqueeze(1), downsample_size, mode="nearest").squeeze(1) > 0 + + uv_np = uv_lr.detach().cpu().numpy() + points_np = points_lr.detach().cpu().numpy() + mask_np = None if mask_lr is None else mask_lr.detach().cpu().numpy() + focal_np = None if focal_b is None else focal_b.detach().cpu().numpy() + + out_focal: list = [] + out_shift: list = [] + for i in range(points_b.shape[0]): + if mask_np is None: + xyz_i = points_np[i].reshape(-1, 3) + uv_i = uv_np.reshape(-1, 2) + else: + sel = mask_np[i] + if sel.sum() < 2: + out_focal.append(1.0) + out_shift.append(0.0) + continue + xyz_i = points_np[i][sel] + uv_i = uv_np[sel] + if focal_np is None: + shift_i, focal_i = _solve_optimal_shift(uv_i, xyz_i) + out_focal.append(focal_i) + else: + shift_i, _ = _solve_optimal_shift(uv_i, xyz_i, focal=float(focal_np[i])) + out_shift.append(shift_i) + + shift_t = torch.tensor(out_shift, device=points.device, dtype=points.dtype).reshape(shape[:-3]) + if focal is None: + focal_t = torch.tensor(out_focal, device=points.device, dtype=points.dtype).reshape(shape[:-3]) + else: + focal_t = focal.reshape(shape[:-3]) + return focal_t, shift_t + + +def depth_map_edge(depth: torch.Tensor, atol: Optional[float] = None, rtol: Optional[float] = None, kernel_size: int = 3) -> torch.Tensor: + """Per-pixel boolean: True where the local depth window's max-min span exceeds atol or rtol*depth.""" + shape = depth.shape + d = depth.reshape(-1, 1, *shape[-2:]) + pad = kernel_size // 2 + diff = F.max_pool2d(d, kernel_size, stride=1, padding=pad) + F.max_pool2d(-d, kernel_size, stride=1, padding=pad) + edge = torch.zeros_like(d, dtype=torch.bool) + if atol is not None: + edge |= diff > atol + if rtol is not None: + edge |= (diff / d.clamp_min(1e-6)).nan_to_num_() > rtol + return edge.reshape(*shape) + + +def triangulate_grid_mesh(points: torch.Tensor, mask: Optional[torch.Tensor] = None, decimation: int = 1, discontinuity_threshold: float = 0.04, + depth: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Triangulate a (H, W, 3) point map into (vertices, faces, uvs) on CPU. + + Vertices: pixels with finite coords (passing optional mask). Quads with four valid corners + become two triangles. depth overrides the scalar used for the rtol edge check; pass radial + depth for panoramas (the default points[..., 2] goes negative below the equator). + """ + points = points.detach().cpu() + finite = torch.isfinite(points).all(dim=-1) + if mask is None: + mask = finite + else: + mask = mask.detach().cpu().to(torch.bool) & finite + + if discontinuity_threshold > 0: + d = depth.detach().cpu() if depth is not None else points[..., 2] + # Replace inf with 0 so max-pool doesn't poison neighbourhoods (mask above already excludes those pixels). + d_finite = torch.where(finite, d, torch.zeros_like(d)) + edge = depth_map_edge(d_finite, rtol=discontinuity_threshold) + mask = mask & ~edge + + if decimation > 1: + points = points[::decimation, ::decimation].contiguous() + mask = mask[::decimation, ::decimation].contiguous() + H, W = points.shape[:2] + + flat_mask = mask.reshape(-1) + idx = torch.full((H * W,), -1, dtype=torch.long) + n_valid = int(flat_mask.sum().item()) + idx[flat_mask] = torch.arange(n_valid, dtype=torch.long) + idx = idx.reshape(H, W) + + vertices = points.reshape(-1, 3)[flat_mask].contiguous() + + yy, xx = torch.meshgrid(torch.arange(H), torch.arange(W), indexing="ij") + u = xx.float() / max(W - 1, 1) + v = yy.float() / max(H - 1, 1) + uvs = torch.stack([u, v], dim=-1).reshape(-1, 2)[flat_mask].contiguous() + + a, b, c, d = idx[:-1, :-1], idx[:-1, 1:], idx[1:, 1:], idx[1:, :-1] + quad_ok = (a >= 0) & (b >= 0) & (c >= 0) & (d >= 0) + a, b, c, d = a[quad_ok], b[quad_ok], c[quad_ok], d[quad_ok] + faces = torch.cat([torch.stack([a, b, c], dim=-1), torch.stack([a, c, d], dim=-1)], dim=0).contiguous() + return vertices, faces, uvs diff --git a/comfy/ldm/moge/model.py b/comfy/ldm/moge/model.py new file mode 100644 index 000000000..6876c4af2 --- /dev/null +++ b/comfy/ldm/moge/model.py @@ -0,0 +1,347 @@ +"""MoGe v1 / v2 inference modules and a state-dict-driven builder. + +V1: DINOv2 backbone + multi-output head (points, mask). +V2: DINOv2 encoder + neck + per-output heads (points, mask, normal, optional metric-scale MLP). +""" + +from __future__ import annotations + +from numbers import Number +from typing import Any, Dict, List, Optional, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import comfy.ops +import comfy.model_management +import comfy.model_patcher + +from comfy.image_encoders.dino2 import Dinov2Model + +from .geometry import depth_map_to_point_map, intrinsics_from_focal_center, recover_focal_shift +from .modules import ConvStack, DINOv2Encoder, HeadV1, MLP, _view_plane_uv_grid + + +def _remap_points(points: torch.Tensor) -> torch.Tensor: + """Apply the exp remap: z -> exp(z), xy stays linear and gets scaled by the new z.""" + xy, z = points.split([2, 1], dim=-1) + z = torch.exp(z) + return torch.cat([xy * z, z], dim=-1) + + +def _detect_dinov2(sd: dict, prefix: str) -> Dict[str, Any]: + # All shipped MoGe checkpoints use plain DINOv2 + hidden = sd[prefix + "embeddings.cls_token"].shape[-1] + layer_prefix = prefix + "encoder.layer." + depth = 1 + max(int(k[len(layer_prefix):].split(".")[0]) for k in sd if k.startswith(layer_prefix)) + return { + "hidden_size": hidden, + "num_attention_heads": hidden // 64, + "num_hidden_layers": depth, + "layer_norm_eps": 1e-6, + "use_swiglu_ffn": False, + } + + +class MoGeModelV1(nn.Module): + """MoGe v1: DINOv2 backbone + HeadV1 (points, mask).""" + + image_mean: torch.Tensor + image_std: torch.Tensor + + intermediate_layers = 4 + num_tokens_range: Tuple[Number, Number] = (1200, 2500) + mask_threshold = 0.5 + + def __init__(self, backbone: Dict[str, Any], dim_upsample: List[int] = (256, 128, 128), + num_res_blocks: int = 1, dim_times_res_block_hidden: int = 1, + dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + self.backbone = Dinov2Model(backbone, dtype, device, operations) + self.head = HeadV1(dim_in=backbone["hidden_size"], dim_upsample=list(dim_upsample), + num_res_blocks=num_res_blocks, dim_times_res_block_hidden=dim_times_res_block_hidden, + dtype=dtype, device=device, operations=operations) + self.register_buffer("image_mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) + self.register_buffer("image_std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) + + def forward(self, image: torch.Tensor, num_tokens: int) -> Dict[str, torch.Tensor]: + H, W = image.shape[-2:] + resize = ((num_tokens * 14 ** 2) / (H * W)) ** 0.5 + rh, rw = int(H * resize), int(W * resize) + x = F.interpolate(image, (rh, rw), mode="bicubic", align_corners=False, antialias=True) + x = (x - self.image_mean) / self.image_std + x14 = F.interpolate(x, (rh // 14 * 14, rw // 14 * 14), mode="bilinear", align_corners=False, antialias=True) + + n_layers = len(self.backbone.encoder.layer) + indices = list(range(n_layers - self.intermediate_layers, n_layers)) + feats = self.backbone.get_intermediate_layers(x14, indices, apply_norm=True) + + points, mask = self.head(feats, x) + points = F.interpolate(points.float(), (H, W), mode="bilinear", align_corners=False) + points = _remap_points(points.permute(0, 2, 3, 1)) + + mask = F.interpolate(mask.float(), (H, W), mode="bilinear", align_corners=False).squeeze(1) + + return {"points": points, "mask": mask} + + @classmethod + def from_state_dict(cls, sd, dtype=None, device=None, operations=comfy.ops.manual_cast): + """Detect the v1 head config from sd, build a model, and load weights.""" + n_up = 1 + max(int(k.split(".")[2]) for k in sd if k.startswith("head.upsample_blocks.")) + dim_upsample = [sd[f"head.upsample_blocks.{i}.0.0.weight"].shape[1] for i in range(n_up)] + # Each upsample stage is Sequential[upsampler, *res_blocks]; count res blocks at level 0. + num_res_blocks = max({int(k.split(".")[3]) for k in sd if k.startswith("head.upsample_blocks.0.")}) + hidden_out = sd["head.upsample_blocks.0.1.layers.2.weight"].shape[0] + dim_times = max(hidden_out // dim_upsample[0], 1) + model = cls(backbone=_detect_dinov2(sd, prefix="backbone."), + dim_upsample=dim_upsample, num_res_blocks=num_res_blocks, dim_times_res_block_hidden=dim_times, + dtype=dtype, device=device, operations=operations) + model.load_state_dict(sd, strict=True) + return model + + +class MoGeModelV2(nn.Module): + """MoGe v2: DINOv2 encoder + neck + per-output heads (points/mask/normal/metric-scale).""" + + intermediate_layers = 4 + num_tokens_range: Tuple[Number, Number] = (1200, 3600) + + def __init__(self, + encoder: Dict[str, Any], + neck: Dict[str, Any], + points_head: Dict[str, Any], + mask_head: Dict[str, Any], + scale_head: Dict[str, Any], + normal_head: Optional[Dict[str, Any]] = None, + dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + self.encoder = DINOv2Encoder(**encoder, dtype=dtype, device=device, operations=operations) + self.neck = ConvStack(**neck, dtype=dtype, device=device, operations=operations) + self.points_head = ConvStack(**points_head, dtype=dtype, device=device, operations=operations) + self.mask_head = ConvStack(**mask_head, dtype=dtype, device=device, operations=operations) + self.scale_head = MLP(**scale_head, dtype=dtype, device=device, operations=operations) + if normal_head is not None: + self.normal_head = ConvStack(**normal_head, dtype=dtype, device=device, operations=operations) + + def forward(self, image: torch.Tensor, num_tokens: int) -> Dict[str, torch.Tensor]: + B, _, H, W = image.shape + device, dtype = image.device, image.dtype + aspect_ratio = W / H + base_h = round((num_tokens / aspect_ratio) ** 0.5) + base_w = round((num_tokens * aspect_ratio) ** 0.5) + + feat_top, cls_token = self.encoder(image, base_h, base_w, return_class_token=True) + + # 5-level pyramid: feat at level 0 concatenated with UV, other levels UV-only. + levels = [_view_plane_uv_grid(B, base_h * (2 ** L), base_w * (2 ** L), aspect_ratio, dtype, device) + for L in range(5)] + levels[0] = torch.cat([feat_top, levels[0]], dim=1) + + feats = self.neck(levels) + + def _resize(v): + return F.interpolate(v, (H, W), mode="bilinear", align_corners=False) + + points = _remap_points(_resize(self.points_head(feats)[-1]).permute(0, 2, 3, 1)) + mask = _resize(self.mask_head(feats)[-1]).squeeze(1).sigmoid() + metric_scale = self.scale_head(cls_token).squeeze(1).exp() + + result = {"points": points, "mask": mask, "metric_scale": metric_scale} + if hasattr(self, "normal_head"): + normal = _resize(self.normal_head(feats)[-1]) + result["normal"] = F.normalize(normal.permute(0, 2, 3, 1), dim=-1) + return result + + @classmethod + def from_state_dict(cls, sd, dtype=None, device=None, operations=comfy.ops.manual_cast): + """Detect the v2 encoder/neck/heads config from sd, build a model, and load weights.""" + backbone = _detect_dinov2(sd, prefix="encoder.backbone.") + depth = backbone["num_hidden_layers"] + n = cls.intermediate_layers + encoder = { + "backbone": backbone, + "intermediate_layers": [(depth // n) * (i + 1) - 1 for i in range(n)], + "dim_out": sd["encoder.output_projections.0.weight"].shape[0], + } + # scale_head is an MLP: Sequential of [Linear, ReLU, ..., Linear]; Linear weight is (out, in). + scale_idxs = sorted({int(k.split(".")[1]) for k in sd if k.startswith("scale_head.")}) + scale_first = sd[f"scale_head.{scale_idxs[0]}.weight"] + cfg: Dict[str, Any] = { + "encoder": encoder, + "neck": cls._detect_convstack(sd, "neck."), + "points_head": cls._detect_convstack(sd, "points_head."), + "mask_head": cls._detect_convstack(sd, "mask_head."), + "scale_head": {"dims": [scale_first.shape[1]] + [sd[f"scale_head.{i}.weight"].shape[0] for i in scale_idxs]}, + } + if any(k.startswith("normal_head.") for k in sd): + cfg["normal_head"] = cls._detect_convstack(sd, "normal_head.") + model = cls(**cfg, dtype=dtype, device=device, operations=operations) + model.load_state_dict(sd, strict=True) + return model + + @staticmethod + def _detect_convstack(sd: dict, prefix: str) -> Dict[str, Any]: + """Reconstruct a ConvStack config from the keys under prefix""" + in_keys = [k for k in sd if k.startswith(f"{prefix}input_blocks.") and k.endswith(".weight")] + n = 1 + max(int(k[len(f"{prefix}input_blocks."):].split(".")[0]) for k in in_keys) + + in_shapes = [sd[f"{prefix}input_blocks.{i}.weight"].shape for i in range(n)] + has_out = lambda i: f"{prefix}output_blocks.{i}.weight" in sd + has_norm = f"{prefix}res_blocks.0.0.layers.0.weight" in sd + + def num_res_at(i): + rb_prefix = f"{prefix}res_blocks.{i}." + return len({int(k[len(rb_prefix):].split(".")[0]) for k in sd if k.startswith(rb_prefix)}) + + return { + "dim_in": [s[1] for s in in_shapes], + "dim_res_blocks": [s[0] for s in in_shapes], + "dim_out": [sd[f"{prefix}output_blocks.{i}.weight"].shape[0] if has_out(i) else None for i in range(n)], + "num_res_blocks": [num_res_at(i) for i in range(n)], + "resamplers": ["conv_transpose" if f"{prefix}resamplers.{i}.0.weight" in sd else "bilinear" + for i in range(n - 1)], + "res_block_in_norm": "layer_norm" if has_norm else "none", + "res_block_hidden_norm": "group_norm" if has_norm else "none", + } + + +# Translate the Meta-style DINOv2 keys MoGe ships to the naming ComfyUI DINOv2 port expects, +# and split each fused qkv tensor into Q/K/V. +_DINOV2_TOPLEVEL_RENAMES = { + "patch_embed.proj.weight": "embeddings.patch_embeddings.projection.weight", + "patch_embed.proj.bias": "embeddings.patch_embeddings.projection.bias", + "cls_token": "embeddings.cls_token", + "pos_embed": "embeddings.position_embeddings", + "register_tokens": "embeddings.register_tokens", + "mask_token": "embeddings.mask_token", + "norm.weight": "layernorm.weight", + "norm.bias": "layernorm.bias", +} +_DINOV2_BLOCK_RENAMES = [ + ("ls1.gamma", "layer_scale1.lambda1"), + ("ls2.gamma", "layer_scale2.lambda1"), + ("attn.proj.", "attention.output.dense."), + ("mlp.w12.", "mlp.weights_in."), + ("mlp.w3.", "mlp.weights_out."), +] + + +def _remap_state_dict(sd: dict) -> dict: + if "model" in sd and "model_config" in sd: + sd = sd["model"] + prefix = "encoder.backbone." if any(k.startswith("encoder.backbone.") for k in sd) else "backbone." + out: dict = {} + for k, v in sd.items(): + if not k.startswith(prefix): + out[k] = v + continue + rel = k[len(prefix):] + if rel in _DINOV2_TOPLEVEL_RENAMES: + out[prefix + _DINOV2_TOPLEVEL_RENAMES[rel]] = v + continue + if not rel.startswith("blocks."): + out[k] = v + continue + _, idx, sub = rel.split(".", 2) + if sub in ("attn.qkv.weight", "attn.qkv.bias"): + tail = sub.rsplit(".", 1)[1] + q, kw, vw = v.chunk(3, dim=0) + base = f"{prefix}encoder.layer.{idx}.attention.attention" + out[f"{base}.query.{tail}"] = q + out[f"{base}.key.{tail}"] = kw + out[f"{base}.value.{tail}"] = vw + continue + for old, new in _DINOV2_BLOCK_RENAMES: + sub = sub.replace(old, new) + out[f"{prefix}encoder.layer.{idx}.{sub}"] = v + return out + + +def build_from_state_dict(sd: dict, dtype=None, device=None, operations=comfy.ops.manual_cast) -> nn.Module: + """Dispatch to v1 or v2 based on the DINOv2 backbone prefix.""" + sd = _remap_state_dict(sd) + cls = MoGeModelV2 if any(k.startswith("encoder.backbone.") for k in sd) else MoGeModelV1 + return cls.from_state_dict(sd, dtype=dtype, device=device, operations=operations) + + +class MoGeModel: + """Loaded MoGe model + ComfyUI memory management.""" + + def __init__(self, state_dict: dict): + # text encoder dtype closest match + self.load_device = comfy.model_management.text_encoder_device() + offload_device = comfy.model_management.text_encoder_offload_device() + self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) + + self.model = build_from_state_dict(state_dict, dtype=self.dtype, device=offload_device, operations=comfy.ops.manual_cast).eval() + self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) + self.version = "v2" if hasattr(self.model, "encoder") else "v1" + self.mask_threshold = float(getattr(self.model, "mask_threshold", 0.5)) + nt = getattr(self.model, "num_tokens_range", (1200, 2500 if self.version == "v1" else 3600)) + self.num_tokens_range = (int(nt[0]), int(nt[1])) + + def infer(self, image: torch.Tensor, num_tokens: Optional[int] = None, + resolution_level: int = 9, fov_x: Optional[Union[Number, torch.Tensor]] = None, + force_projection: bool = True, apply_mask: bool = True, + apply_metric_scale: bool = True + ) -> Dict[str, torch.Tensor]: + """Run a single MoGe forward + post-process pass. image is (B, 3, H, W) in [0, 1].""" + comfy.model_management.load_model_gpu(self.patcher) + image = image.to(device=self.load_device, dtype=self.dtype) + H, W = image.shape[-2:] + aspect_ratio = W / H + + if num_tokens is None: + lo, hi = self.num_tokens_range + num_tokens = int(lo + (resolution_level / 9) * (hi - lo)) + + out = self.model.forward(image, num_tokens=num_tokens) + points = out["points"].float() # recover_focal_shift goes through scipy on CPU; needs fp32. + mask_binary = out["mask"] > self.mask_threshold + normal = out.get("normal") + metric_scale = out.get("metric_scale") + + diag = (1 + aspect_ratio ** 2) ** 0.5 + + def focal_from_fov_deg(deg): + fov = torch.as_tensor(deg, device=points.device, dtype=points.dtype) + return aspect_ratio / diag / torch.tan(torch.deg2rad(fov / 2)) + + if fov_x is None: + focal, shift = recover_focal_shift(points, mask_binary) + # Fall back to 60 deg FoV when the least-squares solver flips the focal sign. + bad = ~torch.isfinite(focal) | (focal <= 0) + if bool(bad.any()): + focal = torch.where(bad, focal_from_fov_deg(60.0), focal) + _, shift = recover_focal_shift(points, mask_binary, focal=focal) + else: + focal = focal_from_fov_deg(fov_x).expand(points.shape[0]) + _, shift = recover_focal_shift(points, mask_binary, focal=focal) + + f_diag = focal / 2 * diag + half = torch.tensor(0.5, device=points.device, dtype=points.dtype) + intrinsics = intrinsics_from_focal_center(f_diag / aspect_ratio, f_diag, half, half) + points[..., 2] = points[..., 2] + shift[..., None, None] + # v2 only: filter mask by depth>0 to drop metric-scale negative-depth artifacts. + if self.version == "v2": + mask_binary = mask_binary & (points[..., 2] > 0) + depth = points[..., 2].clone() + + if force_projection: + points = depth_map_to_point_map(depth, intrinsics=intrinsics) + + if apply_metric_scale and metric_scale is not None: + points = points * metric_scale[:, None, None, None] + depth = depth * metric_scale[:, None, None] + + if apply_mask: + points = torch.where(mask_binary[..., None], points, torch.full_like(points, float("inf"))) + depth = torch.where(mask_binary, depth, torch.full_like(depth, float("inf"))) + if normal is not None: + normal = torch.where(mask_binary[..., None], normal, torch.zeros_like(normal)) + + result = {"points": points, "depth": depth, "intrinsics": intrinsics, "mask": mask_binary} + if normal is not None: + result["normal"] = normal + return result diff --git a/comfy/ldm/moge/modules.py b/comfy/ldm/moge/modules.py new file mode 100644 index 000000000..235a59212 --- /dev/null +++ b/comfy/ldm/moge/modules.py @@ -0,0 +1,204 @@ +"""Building blocks for MoGe: residual conv stack, resamplers, MLP, DINOv2 encoder, v1 head.""" + +from __future__ import annotations + +from typing import List, Optional, Sequence, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import comfy.ops +from comfy.image_encoders.dino2 import Dinov2Model + +from .geometry import normalized_view_plane_uv + + +def _conv2d(operations, c_in: int, c_out: int, k: int = 3, *, dtype=None, device=None): + return operations.Conv2d(c_in, c_out, kernel_size=k, padding=k // 2, padding_mode="replicate", dtype=dtype, device=device) + + +def _view_plane_uv_grid(batch: int, height: int, width: int, aspect_ratio: float, dtype, device) -> torch.Tensor: + """Batched normalized view-plane UV grid as a (B, 2, H, W) tensor.""" + uv = normalized_view_plane_uv(width, height, aspect_ratio=aspect_ratio, dtype=dtype, device=device) + return uv.permute(2, 0, 1).unsqueeze(0).expand(batch, -1, -1, -1) + + +def _concat_view_plane_uv(x: torch.Tensor, aspect_ratio: float) -> torch.Tensor: + """Append a 2-channel normalized view-plane UV grid to x along the channel dim.""" + uv = _view_plane_uv_grid(x.shape[0], x.shape[-2], x.shape[-1], aspect_ratio, x.dtype, x.device) + return torch.cat([x, uv], dim=1) + + +class ResidualConvBlock(nn.Module): + def __init__(self, channels: int, hidden_channels: Optional[int] = None, in_norm: str = "layer_norm", hidden_norm: str = "group_norm", + dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + hidden_channels = hidden_channels if hidden_channels is not None else channels + + in_norm_layer = operations.GroupNorm(1, channels, dtype=dtype, device=device) if in_norm == "layer_norm" else nn.Identity() + hidden_norm_layer = (operations.GroupNorm(max(hidden_channels // 32, 1), hidden_channels, dtype=dtype, device=device) + if hidden_norm == "group_norm" else nn.Identity()) + + self.layers = nn.Sequential( + in_norm_layer, nn.ReLU(), _conv2d(operations, channels, hidden_channels, dtype=dtype, device=device), + hidden_norm_layer, nn.ReLU(), _conv2d(operations, hidden_channels, channels, dtype=dtype, device=device), + ) + + def forward(self, x): + return self.layers(x) + x + + +class Resampler(nn.Sequential): + """2x upsampler: ConvTranspose2d(2x2) or bilinear upsample, followed by a 3x3 conv.""" + + def __init__(self, in_channels: int, out_channels: int, type_: str, dtype=None, device=None, operations=comfy.ops.manual_cast): + if type_ == "conv_transpose": + up = operations.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2, dtype=dtype, device=device) + conv_in = out_channels + else: # "bilinear" + up = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) + conv_in = in_channels + super().__init__(up, _conv2d(operations, conv_in, out_channels, dtype=dtype, device=device)) + + +class MLP(nn.Sequential): + def __init__(self, dims: Sequence[int], dtype=None, device=None, operations=comfy.ops.manual_cast): + layers = [] + for d_in, d_out in zip(dims[:-2], dims[1:-1]): + layers.append(operations.Linear(d_in, d_out, dtype=dtype, device=device)) + layers.append(nn.ReLU(inplace=True)) + layers.append(operations.Linear(dims[-2], dims[-1], dtype=dtype, device=device)) + super().__init__(*layers) + + +class ConvStack(nn.Module): + def __init__(self, dim_in: List[Optional[int]], dim_res_blocks: List[int], dim_out: List[Optional[int]], resamplers: List[str], + num_res_blocks: List[int], dim_times_res_block_hidden: int = 1, res_block_in_norm: str = "layer_norm", res_block_hidden_norm: str = "group_norm", + dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + + self.input_blocks = nn.ModuleList([ + (_conv2d(operations, d_in, d_res, k=1, dtype=dtype, device=device) + if d_in is not None else nn.Identity()) + for d_in, d_res in zip(dim_in, dim_res_blocks) + ]) + + self.resamplers = nn.ModuleList([ + Resampler(prev, succ, type_=r, dtype=dtype, device=device, operations=operations) + for prev, succ, r in zip(dim_res_blocks[:-1], dim_res_blocks[1:], resamplers) + ]) + + self.res_blocks = nn.ModuleList([ + nn.Sequential(*[ + ResidualConvBlock(d_res, dim_times_res_block_hidden * d_res, in_norm=res_block_in_norm, hidden_norm=res_block_hidden_norm, dtype=dtype, device=device, operations=operations) + for _ in range(num_res_blocks[i]) + ]) + for i, d_res in enumerate(dim_res_blocks) + ]) + + self.output_blocks = nn.ModuleList([ + (_conv2d(operations, d_res, d_out, k=1, dtype=dtype, device=device) + if d_out is not None else nn.Identity()) + for d_out, d_res in zip(dim_out, dim_res_blocks) + ]) + + def forward(self, in_features: List[Optional[torch.Tensor]]): + out_features = [] + x = None + for i in range(len(self.res_blocks)): + feat = self.input_blocks[i](in_features[i]) if in_features[i] is not None else None + if i == 0: + x = feat + elif feat is not None: + x = x + feat + x = self.res_blocks[i](x) + out_features.append(self.output_blocks[i](x)) + if i < len(self.res_blocks) - 1: + x = self.resamplers[i](x) + return out_features + + +class DINOv2Encoder(nn.Module): + """Comfy DINOv2 backbone with per-layer 1x1 projection heads.""" + + def __init__(self, backbone: dict, intermediate_layers: List[int], dim_out: int, dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + self.intermediate_layers = list(intermediate_layers) + dim_features = backbone["hidden_size"] + self.backbone = Dinov2Model(backbone, dtype, device, operations) + self.output_projections = nn.ModuleList([ + _conv2d(operations, dim_features, dim_out, k=1, dtype=dtype, device=device) + for _ in range(len(self.intermediate_layers)) + ]) + self.register_buffer("image_mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) + self.register_buffer("image_std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) + + def forward(self, image: torch.Tensor, token_rows: int, token_cols: int, + return_class_token: bool = False) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + image_14 = F.interpolate(image, (token_rows * 14, token_cols * 14), mode="bilinear", align_corners=False, antialias=True) + image_14 = (image_14 - self.image_mean) / self.image_std + feats = self.backbone.get_intermediate_layers(image_14, self.intermediate_layers, apply_norm=True) + x = torch.stack([ + proj(feat.permute(0, 2, 1).unflatten(2, (token_rows, token_cols)).contiguous()) + for proj, (feat, _cls) in zip(self.output_projections, feats) + ], dim=1).sum(dim=1) + if return_class_token: + return x, feats[-1][1] + return x + + +class HeadV1(nn.Module): + """v1 head: 4 backbone-feature projections -> shared upsample stack -> per-target output convs (points, mask).""" + + NUM_FEATURES = 4 + DIM_PROJ = 512 + DIM_OUT = (3, 1) # 3 channels for points, 1 for mask + LAST_CONV_CHANNELS = 32 + + def __init__(self, dim_in: int, dim_upsample: List[int] = (256, 128, 128), num_res_blocks: int = 1, dim_times_res_block_hidden: int = 1, + dtype=None, device=None, operations=comfy.ops.manual_cast): + super().__init__() + self.projects = nn.ModuleList([ + _conv2d(operations, dim_in, self.DIM_PROJ, k=1, dtype=dtype, device=device) + for _ in range(self.NUM_FEATURES) + ]) + def upsampler(in_ch, out_ch): + return nn.Sequential( + operations.ConvTranspose2d(in_ch, out_ch, kernel_size=2, stride=2, dtype=dtype, device=device), + _conv2d(operations, out_ch, out_ch, dtype=dtype, device=device), + ) + + in_chs = [self.DIM_PROJ] + list(dim_upsample[:-1]) + self.upsample_blocks = nn.ModuleList([ + nn.Sequential( + upsampler(in_ch + 2, out_ch), + *(ResidualConvBlock(out_ch, dim_times_res_block_hidden * out_ch, dtype=dtype, device=device, operations=operations) + for _ in range(num_res_blocks)) + ) + for in_ch, out_ch in zip(in_chs, dim_upsample) + ]) + self.output_block = nn.ModuleList([ + nn.Sequential( + _conv2d(operations, dim_upsample[-1] + 2, self.LAST_CONV_CHANNELS, dtype=dtype, device=device), + nn.ReLU(inplace=True), + _conv2d(operations, self.LAST_CONV_CHANNELS, d_out, k=1, dtype=dtype, device=device), + ) + for d_out in self.DIM_OUT + ]) + + def forward(self, hidden_states, image: torch.Tensor): + img_h, img_w = image.shape[-2:] + patch_h, patch_w = img_h // 14, img_w // 14 + aspect = img_w / img_h + x = torch.stack([ + proj(feat.permute(0, 2, 1).unflatten(2, (patch_h, patch_w)).contiguous()) + for proj, (feat, _cls) in zip(self.projects, hidden_states) + ], dim=1).sum(dim=1) + + for block in self.upsample_blocks: + x = block(_concat_view_plane_uv(x, aspect)) + + x = F.interpolate(x, (img_h, img_w), mode="bilinear", align_corners=False) + x = _concat_view_plane_uv(x, aspect) + return [block(x) for block in self.output_block] diff --git a/comfy/ldm/moge/panorama.py b/comfy/ldm/moge/panorama.py new file mode 100644 index 000000000..de53ebe68 --- /dev/null +++ b/comfy/ldm/moge/panorama.py @@ -0,0 +1,313 @@ +"""Panorama (equirectangular) inference helpers for MoGe. + +Splits an equirect into 12 perspective views via an icosahedron camera rig, runs +the model per view, and stitches per-view distance maps back into a single +equirect distance map via a multi-scale Poisson + gradient sparse solve. +Image sampling uses F.grid_sample (GPU); the sparse solve uses lsmr (CPU). +""" + +from __future__ import annotations + +from typing import Callable, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn.functional as F + +from scipy.ndimage import convolve, map_coordinates +from scipy.sparse import vstack, csr_array +from scipy.sparse.linalg import lsmr + + +def _icosahedron_directions() -> np.ndarray: + """12 icosahedron-vertex directions (non-normalised, matching upstream's vertex order).""" + A = (1.0 + np.sqrt(5.0)) / 2.0 + return np.array([ + [0, 1, A], [0, -1, A], [0, 1, -A], [0, -1, -A], + [1, A, 0], [-1, A, 0], [1, -A, 0], [-1, -A, 0], + [A, 0, 1], [A, 0, -1], [-A, 0, 1], [-A, 0, -1], + ], dtype=np.float32) + + +def _intrinsics_from_fov(fov_x_rad: float, fov_y_rad: float) -> np.ndarray: + """Normalised-image (unit-square) K matrix.""" + fx = 0.5 / np.tan(fov_x_rad / 2) + fy = 0.5 / np.tan(fov_y_rad / 2) + return np.array([[fx, 0, 0.5], [0, fy, 0.5], [0, 0, 1]], dtype=np.float32) + + +def _extrinsics_look_at(eye: np.ndarray, target: np.ndarray, up: np.ndarray) -> np.ndarray: + """OpenCV-convention world->camera extrinsics for an array of look-at targets (N, 4, 4).""" + eye = np.asarray(eye, dtype=np.float32) + target = np.asarray(target, dtype=np.float32) + up = np.asarray(up, dtype=np.float32) + if target.ndim == 1: + target = target[None] + + fwd = target - eye + fwd = fwd / np.linalg.norm(fwd, axis=-1, keepdims=True).clip(1e-12) + right = np.cross(fwd, up) + right_norm = np.linalg.norm(right, axis=-1, keepdims=True) + # Fall back to an arbitrary perpendicular if forward is parallel to up. + parallel = right_norm.squeeze(-1) < 1e-6 + if parallel.any(): + alt_up = np.array([1, 0, 0], dtype=np.float32) + right = np.where(parallel[:, None], np.cross(fwd, alt_up), right) + right_norm = np.linalg.norm(right, axis=-1, keepdims=True) + right = right / right_norm.clip(1e-12) + new_up = np.cross(fwd, right) + + R = np.stack([right, new_up, fwd], axis=-2) + t = -np.einsum("nij,j->ni", R, eye) + E = np.zeros((R.shape[0], 4, 4), dtype=np.float32) + E[:, :3, :3] = R + E[:, :3, 3] = t + E[:, 3, 3] = 1.0 + return E + + +def get_panorama_cameras() -> Tuple[np.ndarray, List[np.ndarray]]: + """Returns (extrinsics (12, 4, 4), [intrinsics] * 12) for icosahedron views at 90 deg FoV.""" + targets = _icosahedron_directions() + eye = np.zeros(3, dtype=np.float32) + up = np.array([0, 0, 1], dtype=np.float32) + extrinsics = _extrinsics_look_at(eye, targets, up) + K = _intrinsics_from_fov(np.deg2rad(90.0), np.deg2rad(90.0)) + return extrinsics, [K] * len(targets) + + +def spherical_uv_to_directions(uv: np.ndarray) -> np.ndarray: + """Equirect UV in [0, 1] -> 3D unit-direction (Z up).""" + theta = (1 - uv[..., 0]) * (2 * np.pi) + phi = uv[..., 1] * np.pi + return np.stack([ + np.sin(phi) * np.cos(theta), + np.sin(phi) * np.sin(theta), + np.cos(phi), + ], axis=-1).astype(np.float32) + + +def directions_to_spherical_uv(directions: np.ndarray) -> np.ndarray: + """3D direction -> equirect UV in [0, 1].""" + n = np.linalg.norm(directions, axis=-1, keepdims=True).clip(1e-12) + d = directions / n + u = 1 - np.arctan2(d[..., 1], d[..., 0]) / (2 * np.pi) % 1.0 + v = np.arccos(d[..., 2].clip(-1, 1)) / np.pi + return np.stack([u, v], axis=-1).astype(np.float32) + + +def _uv_grid(H: int, W: int) -> np.ndarray: + """Pixel-center UV grid in [0, 1]; (H, W, 2).""" + u = (np.arange(W, dtype=np.float32) + 0.5) / W + v = (np.arange(H, dtype=np.float32) + 0.5) / H + return np.stack(np.meshgrid(u, v, indexing="xy"), axis=-1) + + +def _unproject_cv(uv: np.ndarray, depth: np.ndarray, + extrinsics: np.ndarray, intrinsics: np.ndarray) -> np.ndarray: + """Back-project pixels into world coords (OpenCV convention).""" + pix = np.concatenate([uv, np.ones_like(uv[..., :1])], axis=-1) + K_inv = np.linalg.inv(intrinsics) + cam = pix @ K_inv.T * depth[..., None] + cam_h = np.concatenate([cam, np.ones_like(cam[..., :1])], axis=-1) + E_inv = np.linalg.inv(extrinsics) + return (cam_h @ E_inv.T)[..., :3] + + +def _project_cv(points: np.ndarray, extrinsics: np.ndarray, intrinsics: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """World coords -> (uv, depth) in the camera (OpenCV convention).""" + pts_h = np.concatenate([points, np.ones_like(points[..., :1])], axis=-1) + cam = pts_h @ extrinsics.T + cam_xyz = cam[..., :3] + depth = cam_xyz[..., 2] + proj = cam_xyz @ intrinsics.T + uv = proj[..., :2] / proj[..., 2:3].clip(1e-12) + return uv.astype(np.float32), depth.astype(np.float32) + + +def _grid_sample_uv(img_bchw: torch.Tensor, uv: torch.Tensor, mode: str = "bilinear") -> torch.Tensor: + """Sample img_bchw at UV-in-[0,1] coords uv of shape (B, H, W, 2); replicate-border.""" + grid = uv * 2.0 - 1.0 + return F.grid_sample(img_bchw, grid, mode=mode, padding_mode="border", align_corners=False) + + +def split_panorama_image(image: torch.Tensor, extrinsics: np.ndarray, intrinsics: List[np.ndarray], resolution: int) -> torch.Tensor: + """(3, Hp, Wp) equirect on any device -> (N, 3, R, R) perspective crops on the same device.""" + device = image.device + N = len(extrinsics) + uv = _uv_grid(resolution, resolution) + sample_uvs = [] + for i in range(N): + world = _unproject_cv(uv, np.ones(uv.shape[:-1], dtype=np.float32), extrinsics[i], intrinsics[i]) + sample_uvs.append(directions_to_spherical_uv(world)) + sample_uvs = np.stack(sample_uvs, axis=0) + + img_bchw = image.unsqueeze(0).expand(N, -1, -1, -1).contiguous() + sample_uvs_t = torch.from_numpy(sample_uvs).to(device=device, dtype=image.dtype) + return _grid_sample_uv(img_bchw, sample_uvs_t, mode="bilinear") + + +def _poisson_equation(W: int, H: int, wrap_x: bool = False, wrap_y: bool = False): + """Sparse Laplacian operator over the H x W grid.""" + grid_index = np.arange(H * W).reshape(H, W) + grid_index = np.pad(grid_index, ((0, 0), (1, 1)), mode="wrap" if wrap_x else "edge") + grid_index = np.pad(grid_index, ((1, 1), (0, 0)), mode="wrap" if wrap_y else "edge") + + data = np.array([[-4, 1, 1, 1, 1]], dtype=np.float32).repeat(H * W, axis=0).reshape(-1) + indices = np.stack([ + grid_index[1:-1, 1:-1], + grid_index[:-2, 1:-1], grid_index[2:, 1:-1], + grid_index[1:-1, :-2], grid_index[1:-1, 2:], + ], axis=-1).reshape(-1) + indptr = np.arange(0, H * W * 5 + 1, 5) + return csr_array((data, indices, indptr), shape=(H * W, H * W)) + + +def _grad_equation(W: int, H: int, wrap_x: bool = False, wrap_y: bool = False): + """Sparse forward-difference operator over the H x W grid.""" + grid_index = np.arange(W * H).reshape(H, W) + if wrap_x: + grid_index = np.pad(grid_index, ((0, 0), (0, 1)), mode="wrap") + if wrap_y: + grid_index = np.pad(grid_index, ((0, 1), (0, 0)), mode="wrap") + + data = np.concatenate([ + np.concatenate([ + np.ones((grid_index.shape[0], grid_index.shape[1] - 1), dtype=np.float32).reshape(-1, 1), + -np.ones((grid_index.shape[0], grid_index.shape[1] - 1), dtype=np.float32).reshape(-1, 1), + ], axis=1).reshape(-1), + np.concatenate([ + np.ones((grid_index.shape[0] - 1, grid_index.shape[1]), dtype=np.float32).reshape(-1, 1), + -np.ones((grid_index.shape[0] - 1, grid_index.shape[1]), dtype=np.float32).reshape(-1, 1), + ], axis=1).reshape(-1), + ]) + indices = np.concatenate([ + np.concatenate([grid_index[:, :-1].reshape(-1, 1), grid_index[:, 1:].reshape(-1, 1)], axis=1).reshape(-1), + np.concatenate([grid_index[:-1, :].reshape(-1, 1), grid_index[1:, :].reshape(-1, 1)], axis=1).reshape(-1), + ]) + nx = grid_index.shape[0] * (grid_index.shape[1] - 1) + ny = (grid_index.shape[0] - 1) * grid_index.shape[1] + indptr = np.arange(0, nx * 2 + ny * 2 + 1, 2) + return csr_array((data, indices, indptr), shape=(nx + ny, H * W)) + + +def _scipy_remap_bilinear(img: np.ndarray, sample_pixels: np.ndarray, mode: str = "bilinear") -> np.ndarray: + """Bilinear/nearest sampling at fractional pixel coords; out-of-range clamps to nearest border.""" + H, W = img.shape[:2] + yy = np.clip(sample_pixels[..., 1], 0, H - 1) + xx = np.clip(sample_pixels[..., 0], 0, W - 1) + order = 1 if mode == "bilinear" else 0 + if img.ndim == 2: + return map_coordinates(img, [yy, xx], order=order, mode="nearest").astype(img.dtype) + out = np.stack([ + map_coordinates(img[..., c], [yy, xx], order=order, mode="nearest") + for c in range(img.shape[-1]) + ], axis=-1) + return out.astype(img.dtype) + + +def merge_panorama_depth(width: int, height: int, + distance_maps: List[np.ndarray], pred_masks: List[np.ndarray], + extrinsics: List[np.ndarray], intrinsics: List[np.ndarray], + on_view: Optional[Callable[[], None]] = None, + on_solve_start: Optional[Callable[[int, int], None]] = None, + on_solve_end: Optional[Callable[[int, int], None]] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + """Stitch per-view distance maps into a single equirect distance map. + + Recursive multi-scale solve: solves at half resolution first and uses that as the lsmr init + for the full-resolution solve. Optional callbacks fire per view processed and around each + lsmr solve so callers can drive a progress bar. + """ + + if max(width, height) > 256: + coarse_depth, _ = merge_panorama_depth(width // 2, height // 2, + distance_maps, pred_masks, extrinsics, intrinsics, + on_view=on_view, + on_solve_start=on_solve_start, + on_solve_end=on_solve_end) + t = torch.from_numpy(coarse_depth).unsqueeze(0).unsqueeze(0) + t = F.interpolate(t, size=(height, width), mode="bilinear", align_corners=False) + depth_init = t.squeeze().numpy().astype(np.float32) + else: + depth_init = None + + spherical_directions = spherical_uv_to_directions(_uv_grid(height, width)) + + pano_log_grad_maps, pano_grad_masks = [], [] + pano_log_lap_maps, pano_lap_masks = [], [] + pano_pred_masks: List[np.ndarray] = [] + + for i in range(len(distance_maps)): + proj_uv, proj_depth = _project_cv(spherical_directions, extrinsics[i], intrinsics[i]) + proj_valid = (proj_depth > 0) & (proj_uv > 0).all(axis=-1) & (proj_uv < 1).all(axis=-1) + + Hd, Wd = distance_maps[i].shape[:2] + proj_pixels = np.clip(proj_uv, 0, 1) * np.array([Wd - 1, Hd - 1], dtype=np.float32) + + log_dist = np.log(np.clip(distance_maps[i], 1e-6, None)) + sampled = _scipy_remap_bilinear(log_dist, proj_pixels, mode="bilinear") + pano_log = np.where(proj_valid, sampled, 0.0).astype(np.float32) + + sampled_mask = _scipy_remap_bilinear(pred_masks[i].astype(np.uint8), proj_pixels, mode="nearest") + pano_pred = proj_valid & (sampled_mask > 0) + + # Equirect wraps horizontally but not vertically: wrap pad along x, edge pad along y. + padded = np.pad(pano_log, ((0, 0), (0, 1)), mode="wrap") + gx, gy = padded[:, :-1] - padded[:, 1:], padded[:-1, :] - padded[1:, :] + padded_m = np.pad(pano_pred, ((0, 0), (0, 1)), mode="wrap") + mx, my = padded_m[:, :-1] & padded_m[:, 1:], padded_m[:-1, :] & padded_m[1:, :] + pano_log_grad_maps.append((gx, gy)) + pano_grad_masks.append((mx, my)) + + padded = np.pad(pano_log, ((1, 1), (0, 0)), mode="edge") + padded = np.pad(padded, ((0, 0), (1, 1)), mode="wrap") + lap_kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32) + lap = convolve(padded, lap_kernel)[1:-1, 1:-1] + padded_m = np.pad(pano_pred, ((1, 1), (0, 0)), mode="edge") + padded_m = np.pad(padded_m, ((0, 0), (1, 1)), mode="wrap") + m_kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.uint8) + lap_mask = convolve(padded_m.astype(np.uint8), m_kernel)[1:-1, 1:-1] == 5 + pano_log_lap_maps.append(lap) + pano_lap_masks.append(lap_mask) + pano_pred_masks.append(pano_pred) + + if on_view is not None: + on_view() + + gx = np.stack([m[0] for m in pano_log_grad_maps], axis=0) + gy = np.stack([m[1] for m in pano_log_grad_maps], axis=0) + mx = np.stack([m[0] for m in pano_grad_masks], axis=0) + my = np.stack([m[1] for m in pano_grad_masks], axis=0) + gx_avg = (gx * mx).sum(axis=0) / mx.sum(axis=0).clip(1e-3) + gy_avg = (gy * my).sum(axis=0) / my.sum(axis=0).clip(1e-3) + + laps = np.stack(pano_log_lap_maps, axis=0) + lap_masks = np.stack(pano_lap_masks, axis=0) + lap_avg = (laps * lap_masks).sum(axis=0) / lap_masks.sum(axis=0).clip(1e-3) + + grad_x_mask = mx.any(axis=0).reshape(-1) + grad_y_mask = my.any(axis=0).reshape(-1) + grad_mask = np.concatenate([grad_x_mask, grad_y_mask]) + lap_mask_flat = lap_masks.any(axis=0).reshape(-1) + + A = vstack([ + _grad_equation(width, height, wrap_x=True, wrap_y=False)[grad_mask], + _poisson_equation(width, height, wrap_x=True, wrap_y=False)[lap_mask_flat], + ]) + b = np.concatenate([ + gx_avg.reshape(-1)[grad_x_mask], + gy_avg.reshape(-1)[grad_y_mask], + lap_avg.reshape(-1)[lap_mask_flat], + ]) + x0 = np.log(np.clip(depth_init, 1e-6, None)).reshape(-1) if depth_init is not None else None + + if on_solve_start is not None: + on_solve_start(width, height) + x, *_ = lsmr(A, b, atol=1e-5, btol=1e-5, x0=x0, show=False) + if on_solve_end is not None: + on_solve_end(width, height) + + pano_depth = np.exp(x).reshape(height, width).astype(np.float32) + pano_mask = np.any(pano_pred_masks, axis=0) + return pano_depth, pano_mask diff --git a/comfy/ldm/sam3/detector.py b/comfy/ldm/sam3/detector.py new file mode 100644 index 000000000..23a972ac7 --- /dev/null +++ b/comfy/ldm/sam3/detector.py @@ -0,0 +1,599 @@ +# SAM3 detector: transformer encoder-decoder, segmentation head, geometry encoder, scoring. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.ops import roi_align + +from comfy.ldm.modules.attention import optimized_attention +from comfy.ldm.sam3.tracker import SAM3Tracker, SAM31Tracker +from comfy.ldm.sam3.sam import SAM3VisionBackbone # noqa: used in __init__ +from comfy.ldm.sam3.sam import MLP, PositionEmbeddingSine + +TRACKER_CLASSES = {"SAM3": SAM3Tracker, "SAM31": SAM31Tracker} +from comfy.ops import cast_to_input + + +def box_cxcywh_to_xyxy(x): + cx, cy, w, h = x.unbind(-1) + return torch.stack([cx - 0.5 * w, cy - 0.5 * h, cx + 0.5 * w, cy + 0.5 * h], dim=-1) + + +def gen_sineembed_for_position(pos_tensor, num_feats=256): + """Per-coordinate sinusoidal embedding: (..., N) -> (..., N * num_feats).""" + assert num_feats % 2 == 0 + hdim = num_feats // 2 + freqs = 10000.0 ** (2 * (torch.arange(hdim, dtype=torch.float32, device=pos_tensor.device) // 2) / hdim) + embeds = [] + for c in range(pos_tensor.shape[-1]): + raw = (pos_tensor[..., c].float() * 2 * math.pi).unsqueeze(-1) / freqs + embeds.append(torch.stack([raw[..., 0::2].sin(), raw[..., 1::2].cos()], dim=-1).flatten(-2)) + return torch.cat(embeds, dim=-1).to(pos_tensor.dtype) + + +class SplitMHA(nn.Module): + """Multi-head attention with separate Q/K/V projections (split from fused in_proj_weight).""" + def __init__(self, d_model, num_heads=8, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + self.q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + + def forward(self, q_input, k_input=None, v_input=None, mask=None): + q = self.q_proj(q_input) + if k_input is None: + k = self.k_proj(q_input) + v = self.v_proj(q_input) + else: + k = self.k_proj(k_input) + v = self.v_proj(v_input if v_input is not None else k_input) + if mask is not None and mask.ndim == 2: + mask = mask[:, None, None, :] # [B, T] -> [B, 1, 1, T] for SDPA broadcast + dtype = q.dtype # manual_cast may produce mixed dtypes + out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask, low_precision_attention=False) + return self.out_proj(out) + + +class MLPWithNorm(nn.Module): + """MLP with residual connection and output LayerNorm.""" + def __init__(self, input_dim, hidden_dim, output_dim, num_layers, residual=True, device=None, dtype=None, operations=None): + super().__init__() + dims = [input_dim] + [hidden_dim] * (num_layers - 1) + [output_dim] + self.layers = nn.ModuleList([ + operations.Linear(dims[i], dims[i + 1], device=device, dtype=dtype) + for i in range(num_layers) + ]) + self.out_norm = operations.LayerNorm(output_dim, device=device, dtype=dtype) + self.residual = residual and (input_dim == output_dim) + + def forward(self, x): + orig = x + for i, layer in enumerate(self.layers): + x = layer(x) + if i < len(self.layers) - 1: + x = F.relu(x) + if self.residual: + x = x + orig + return self.out_norm(x) + + +class EncoderLayer(nn.Module): + def __init__(self, d_model=256, num_heads=8, dim_ff=2048, device=None, dtype=None, operations=None): + super().__init__() + self.self_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.cross_attn_image = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype) + self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype) + self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype) + + def forward(self, x, pos, text_memory=None, text_mask=None): + normed = self.norm1(x) + q_k = normed + pos + x = x + self.self_attn(q_k, q_k, normed) + if text_memory is not None: + normed = self.norm2(x) + x = x + self.cross_attn_image(normed, text_memory, text_memory, mask=text_mask) + normed = self.norm3(x) + x = x + self.linear2(F.relu(self.linear1(normed))) + return x + + +class TransformerEncoder(nn.Module): + """Checkpoint: transformer.encoder.layers.N.*""" + def __init__(self, d_model=256, num_heads=8, dim_ff=2048, num_layers=6, device=None, dtype=None, operations=None): + super().__init__() + self.layers = nn.ModuleList([ + EncoderLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations) + for _ in range(num_layers) + ]) + + def forward(self, x, pos, text_memory=None, text_mask=None): + for layer in self.layers: + x = layer(x, pos, text_memory, text_mask) + return x + + +class DecoderLayer(nn.Module): + def __init__(self, d_model=256, num_heads=8, dim_ff=2048, device=None, dtype=None, operations=None): + super().__init__() + self.self_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.cross_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.ca_text = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.catext_norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype) + self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype) + + def forward(self, x, memory, x_pos, memory_pos, text_memory=None, text_mask=None, cross_attn_bias=None): + q_k = x + x_pos + x = self.norm2(x + self.self_attn(q_k, q_k, x)) + if text_memory is not None: + x = self.catext_norm(x + self.ca_text(x + x_pos, text_memory, text_memory, mask=text_mask)) + x = self.norm1(x + self.cross_attn(x + x_pos, memory + memory_pos, memory, mask=cross_attn_bias)) + x = self.norm3(x + self.linear2(F.relu(self.linear1(x)))) + return x + + +class TransformerDecoder(nn.Module): + def __init__(self, d_model=256, num_heads=8, dim_ff=2048, num_layers=6, + num_queries=200, device=None, dtype=None, operations=None): + super().__init__() + self.d_model = d_model + self.num_queries = num_queries + + self.layers = nn.ModuleList([ + DecoderLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations) + for _ in range(num_layers) + ]) + self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.query_embed = operations.Embedding(num_queries, d_model, device=device, dtype=dtype) + self.reference_points = operations.Embedding(num_queries, 4, device=device, dtype=dtype) # Reference points: Embedding(num_queries, 4) — learned anchor boxes + self.ref_point_head = MLP(d_model * 2, d_model, d_model, 2, device=device, dtype=dtype, operations=operations) # ref_point_head input: 512 (4 coords * 128 sine features each) + self.bbox_embed = MLP(d_model, d_model, 4, 3, device=device, dtype=dtype, operations=operations) + + self.boxRPB_embed_x = MLP(2, d_model, num_heads, 2, device=device, dtype=dtype, operations=operations) + self.boxRPB_embed_y = MLP(2, d_model, num_heads, 2, device=device, dtype=dtype, operations=operations) + + self.presence_token = operations.Embedding(1, d_model, device=device, dtype=dtype) + self.presence_token_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations) + self.presence_token_out_norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + + @staticmethod + def _inverse_sigmoid(x): + return torch.log(x / (1 - x + 1e-6) + 1e-6) + + def _compute_box_rpb(self, ref_points, H, W): + """Box rotary position bias: (B, Q, 4) cxcywh -> (B, n_heads, Q+1, H*W) bias.""" + boxes_xyxy = box_cxcywh_to_xyxy(ref_points) + B, Q, _ = boxes_xyxy.shape + coords_h = torch.arange(H, device=ref_points.device, dtype=torch.float32) / H + coords_w = torch.arange(W, device=ref_points.device, dtype=torch.float32) / W + deltas_x = coords_w.view(1, 1, -1, 1) - boxes_xyxy[:, :, None, 0:3:2] + deltas_y = coords_h.view(1, 1, -1, 1) - boxes_xyxy[:, :, None, 1:4:2] + + log2_8 = float(math.log2(8)) + def log_scale(d): + return torch.sign(d * 8) * torch.log2(torch.abs(d * 8) + 1.0) / log2_8 + + rpb_x = self.boxRPB_embed_x(log_scale(deltas_x).to(ref_points.dtype)) + rpb_y = self.boxRPB_embed_y(log_scale(deltas_y).to(ref_points.dtype)) + + bias = (rpb_y.unsqueeze(3) + rpb_x.unsqueeze(2)).flatten(2, 3).permute(0, 3, 1, 2) + pres_bias = torch.zeros(B, bias.shape[1], 1, bias.shape[3], device=bias.device, dtype=bias.dtype) + return torch.cat([pres_bias, bias], dim=2) + + def forward(self, memory, memory_pos, text_memory=None, text_mask=None, H=72, W=72): + B = memory.shape[0] + tgt = cast_to_input(self.query_embed.weight, memory).unsqueeze(0).expand(B, -1, -1) + presence_out = cast_to_input(self.presence_token.weight, memory)[None].expand(B, -1, -1) + ref_points = cast_to_input(self.reference_points.weight, memory).unsqueeze(0).expand(B, -1, -1).sigmoid() + + for layer_idx, layer in enumerate(self.layers): + query_pos = self.ref_point_head(gen_sineembed_for_position(ref_points, self.d_model)) + tgt_with_pres = torch.cat([presence_out, tgt], dim=1) + pos_with_pres = torch.cat([torch.zeros_like(presence_out), query_pos], dim=1) + tgt_with_pres = layer(tgt_with_pres, memory, pos_with_pres, memory_pos, + text_memory, text_mask, self._compute_box_rpb(ref_points, H, W)) + presence_out, tgt = tgt_with_pres[:, :1], tgt_with_pres[:, 1:] + if layer_idx < len(self.layers) - 1: + ref_inv = self._inverse_sigmoid(ref_points) + ref_points = (ref_inv + self.bbox_embed(self.norm(tgt))).sigmoid().detach() + + query_out = self.norm(tgt) + ref_inv = self._inverse_sigmoid(ref_points) + boxes = (ref_inv + self.bbox_embed(query_out)).sigmoid() + presence = self.presence_token_head(self.presence_token_out_norm(presence_out)).squeeze(-1) + return {"decoder_output": query_out, "pred_boxes": boxes, "presence": presence} + + +class Transformer(nn.Module): + def __init__(self, d_model=256, num_heads=8, dim_ff=2048, enc_layers=6, dec_layers=6, + num_queries=200, device=None, dtype=None, operations=None): + super().__init__() + self.encoder = TransformerEncoder(d_model, num_heads, dim_ff, enc_layers, device=device, dtype=dtype, operations=operations) + self.decoder = TransformerDecoder(d_model, num_heads, dim_ff, dec_layers, num_queries, device=device, dtype=dtype, operations=operations) + + +class GeometryEncoder(nn.Module): + def __init__(self, d_model=256, num_heads=8, num_layers=3, roi_size=7, device=None, dtype=None, operations=None): + super().__init__() + self.d_model = d_model + self.roi_size = roi_size + self.pos_enc = PositionEmbeddingSine(num_pos_feats=d_model, normalize=True) + self.points_direct_project = operations.Linear(2, d_model, device=device, dtype=dtype) + self.points_pool_project = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.points_pos_enc_project = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.boxes_direct_project = operations.Linear(4, d_model, device=device, dtype=dtype) + self.boxes_pool_project = operations.Conv2d(d_model, d_model, kernel_size=roi_size, device=device, dtype=dtype) + self.boxes_pos_enc_project = operations.Linear(d_model + 2, d_model, device=device, dtype=dtype) + self.label_embed = operations.Embedding(2, d_model, device=device, dtype=dtype) + self.cls_embed = operations.Embedding(1, d_model, device=device, dtype=dtype) + self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.img_pre_norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.encode = nn.ModuleList([ + EncoderLayer(d_model, num_heads, 2048, device=device, dtype=dtype, operations=operations) + for _ in range(num_layers) + ]) + self.encode_norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.final_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + + def _encode_points(self, coords, labels, img_feat_2d): + """Encode point prompts: direct + pool + pos_enc + label. coords: [B, N, 2] normalized.""" + B, N, _ = coords.shape + embed = self.points_direct_project(coords) + # Pool features from backbone at point locations via grid_sample + grid = (coords * 2 - 1).unsqueeze(2) # [B, N, 1, 2] in [-1, 1] + sampled = F.grid_sample(img_feat_2d, grid, align_corners=False) # [B, C, N, 1] + embed = embed + self.points_pool_project(sampled.squeeze(-1).permute(0, 2, 1)) # [B, N, C] + # Positional encoding of coordinates + x, y = coords[:, :, 0], coords[:, :, 1] # [B, N] + pos_x, pos_y = self.pos_enc._encode_xy(x.flatten(), y.flatten()) + enc = torch.cat([pos_x, pos_y], dim=-1).view(B, N, -1) + embed = embed + self.points_pos_enc_project(cast_to_input(enc, embed)) + embed = embed + cast_to_input(self.label_embed(labels.long()), embed) + return embed + + def _encode_boxes(self, boxes, labels, img_feat_2d): + """Encode box prompts: direct + pool + pos_enc + label. boxes: [B, N, 4] normalized cxcywh.""" + B, N, _ = boxes.shape + embed = self.boxes_direct_project(boxes) + # ROI align from backbone at box regions + H, W = img_feat_2d.shape[-2:] + boxes_xyxy = box_cxcywh_to_xyxy(boxes) + scale = torch.tensor([W, H, W, H], dtype=boxes_xyxy.dtype, device=boxes_xyxy.device) + boxes_scaled = boxes_xyxy * scale + sampled = roi_align(img_feat_2d, boxes_scaled.view(-1, 4).split(N), self.roi_size) + proj = self.boxes_pool_project(sampled).view(B, N, -1) # Conv2d(roi_size) -> [B*N, C, 1, 1] -> [B, N, C] + embed = embed + proj + # Positional encoding of box center + size + cx, cy, w, h = boxes[:, :, 0], boxes[:, :, 1], boxes[:, :, 2], boxes[:, :, 3] + enc = self.pos_enc.encode_boxes(cx.flatten(), cy.flatten(), w.flatten(), h.flatten()) + enc = enc.view(B, N, -1) + embed = embed + self.boxes_pos_enc_project(cast_to_input(enc, embed)) + embed = embed + cast_to_input(self.label_embed(labels.long()), embed) + return embed + + def forward(self, points=None, boxes=None, image_features=None): + """Encode geometry prompts. image_features: [B, HW, C] flattened backbone features.""" + # Prepare 2D image features for pooling + img_feat_2d = None + if image_features is not None: + B = image_features.shape[0] + HW, C = image_features.shape[1], image_features.shape[2] + hw = int(math.sqrt(HW)) + img_normed = self.img_pre_norm(image_features) + img_feat_2d = img_normed.permute(0, 2, 1).view(B, C, hw, hw) + + embeddings = [] + if points is not None: + coords, labels = points + embeddings.append(self._encode_points(coords, labels, img_feat_2d)) + if boxes is not None: + B = boxes.shape[0] + box_labels = torch.ones(B, boxes.shape[1], dtype=torch.long, device=boxes.device) + embeddings.append(self._encode_boxes(boxes, box_labels, img_feat_2d)) + if not embeddings: + return None + geo = torch.cat(embeddings, dim=1) + geo = self.norm(geo) + if image_features is not None: + for layer in self.encode: + geo = layer(geo, torch.zeros_like(geo), image_features) + geo = self.encode_norm(geo) + return self.final_proj(geo) + + +class PixelDecoder(nn.Module): + """Top-down FPN pixel decoder with GroupNorm + ReLU + nearest interpolation.""" + def __init__(self, d_model=256, num_stages=3, device=None, dtype=None, operations=None): + super().__init__() + self.conv_layers = nn.ModuleList([operations.Conv2d(d_model, d_model, kernel_size=3, padding=1, device=device, dtype=dtype) for _ in range(num_stages)]) + self.norms = nn.ModuleList([operations.GroupNorm(8, d_model, device=device, dtype=dtype) for _ in range(num_stages)]) + + def forward(self, backbone_features): + prev = backbone_features[-1] + for i, feat in enumerate(backbone_features[:-1][::-1]): + prev = F.relu(self.norms[i](self.conv_layers[i](feat + F.interpolate(prev, size=feat.shape[-2:], mode="nearest")))) + return prev + + +class MaskPredictor(nn.Module): + def __init__(self, d_model=256, device=None, dtype=None, operations=None): + super().__init__() + self.mask_embed = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations) + + def forward(self, query_embeddings, pixel_features): + mask_embed = self.mask_embed(query_embeddings) + return torch.einsum("bqc,bchw->bqhw", mask_embed, pixel_features) + + +class SegmentationHead(nn.Module): + def __init__(self, d_model=256, num_heads=8, device=None, dtype=None, operations=None): + super().__init__() + self.d_model = d_model + self.pixel_decoder = PixelDecoder(d_model, 3, device=device, dtype=dtype, operations=operations) + self.mask_predictor = MaskPredictor(d_model, device=device, dtype=dtype, operations=operations) + self.cross_attend_prompt = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.cross_attn_norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.instance_seg_head = operations.Conv2d(d_model, d_model, kernel_size=1, device=device, dtype=dtype) + self.semantic_seg_head = operations.Conv2d(d_model, 1, kernel_size=1, device=device, dtype=dtype) + + def forward(self, query_embeddings, backbone_features, encoder_hidden_states=None, prompt=None, prompt_mask=None): + if encoder_hidden_states is not None and prompt is not None: + enc_normed = self.cross_attn_norm(encoder_hidden_states) + enc_cross = self.cross_attend_prompt(enc_normed, prompt, prompt, mask=prompt_mask) + encoder_hidden_states = enc_cross + encoder_hidden_states + + if encoder_hidden_states is not None: + B, H, W = encoder_hidden_states.shape[0], backbone_features[-1].shape[-2], backbone_features[-1].shape[-1] + encoder_visual = encoder_hidden_states[:, :H * W].permute(0, 2, 1).view(B, self.d_model, H, W) + backbone_features = list(backbone_features) + backbone_features[-1] = encoder_visual + + pixel_features = self.pixel_decoder(backbone_features) + instance_features = self.instance_seg_head(pixel_features) + masks = self.mask_predictor(query_embeddings, instance_features) + return masks + + +class DotProductScoring(nn.Module): + def __init__(self, d_model=256, device=None, dtype=None, operations=None): + super().__init__() + self.hs_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.prompt_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.prompt_mlp = MLPWithNorm(d_model, 2048, d_model, 2, device=device, dtype=dtype, operations=operations) + self.scale = 1.0 / (d_model ** 0.5) + + def forward(self, query_embeddings, prompt_embeddings, prompt_mask=None): + prompt = self.prompt_mlp(prompt_embeddings) + if prompt_mask is not None: + weight = prompt_mask.unsqueeze(-1).to(dtype=prompt.dtype) + pooled = (prompt * weight).sum(dim=1) / weight.sum(dim=1).clamp(min=1) + else: + pooled = prompt.mean(dim=1) + hs = self.hs_proj(query_embeddings) + pp = self.prompt_proj(pooled).unsqueeze(-1).to(hs.dtype) + scores = torch.matmul(hs, pp) + return (scores * self.scale).clamp(-12.0, 12.0).squeeze(-1) + + +class SAM3Detector(nn.Module): + def __init__(self, d_model=256, embed_dim=1024, num_queries=200, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + image_model = kwargs.pop("image_model", "SAM3") + for k in ("num_heads", "num_head_channels"): + kwargs.pop(k, None) + multiplex = image_model == "SAM31" + # SAM3: 4 FPN levels, drop last (scalp=1); SAM3.1: 3 levels, use all (scalp=0) + self.scalp = 0 if multiplex else 1 + self.backbone = nn.ModuleDict({ + "vision_backbone": SAM3VisionBackbone(embed_dim=embed_dim, d_model=d_model, multiplex=multiplex, device=device, dtype=dtype, operations=operations, **kwargs), + "language_backbone": nn.ModuleDict({"resizer": operations.Linear(embed_dim, d_model, device=device, dtype=dtype)}), + }) + self.transformer = Transformer(d_model=d_model, num_queries=num_queries, device=device, dtype=dtype, operations=operations) + self.segmentation_head = SegmentationHead(d_model=d_model, device=device, dtype=dtype, operations=operations) + self.geometry_encoder = GeometryEncoder(d_model=d_model, device=device, dtype=dtype, operations=operations) + self.dot_prod_scoring = DotProductScoring(d_model=d_model, device=device, dtype=dtype, operations=operations) + + def _get_backbone_features(self, images): + """Run backbone and return (detector_features, detector_positions, tracker_features, tracker_positions).""" + bb = self.backbone["vision_backbone"] + if bb.multiplex: + all_f, all_p, tf, tp = bb(images, tracker_mode="propagation") + else: + all_f, all_p, tf, tp = bb(images, need_tracker=True) + return all_f, all_p, tf, tp + + @staticmethod + def _run_geo_layer(layer, x, memory, memory_pos): + x = x + layer.self_attn(layer.norm1(x)) + x = x + layer.cross_attn_image(layer.norm2(x), memory + memory_pos, memory) + x = x + layer.linear2(F.relu(layer.linear1(layer.norm3(x)))) + return x + + def _detect(self, features, positions, text_embeddings=None, text_mask=None, + points=None, boxes=None): + """Shared detection: geometry encoding, transformer, scoring, segmentation.""" + B = features[0].shape[0] + # Scalp for encoder (use top-level feature), but keep all levels for segmentation head + seg_features = features + if self.scalp > 0: + features = features[:-self.scalp] + positions = positions[:-self.scalp] + enc_feat, enc_pos = features[-1], positions[-1] + _, _, H, W = enc_feat.shape + img_flat = enc_feat.flatten(2).permute(0, 2, 1) + pos_flat = enc_pos.flatten(2).permute(0, 2, 1) + + has_prompts = text_embeddings is not None or points is not None or boxes is not None + if has_prompts: + geo_enc = self.geometry_encoder + geo_prompts = geo_enc(points=points, boxes=boxes, image_features=img_flat) + geo_cls = geo_enc.norm(geo_enc.final_proj(cast_to_input(geo_enc.cls_embed.weight, img_flat).view(1, 1, -1).expand(B, -1, -1))) + for layer in geo_enc.encode: + geo_cls = self._run_geo_layer(layer, geo_cls, img_flat, pos_flat) + geo_cls = geo_enc.encode_norm(geo_cls) + if text_embeddings is not None and text_embeddings.shape[0] != B: + text_embeddings = text_embeddings.expand(B, -1, -1) + if text_mask is not None and text_mask.shape[0] != B: + text_mask = text_mask.expand(B, -1) + parts = [t for t in [text_embeddings, geo_prompts, geo_cls] if t is not None] + text_embeddings = torch.cat(parts, dim=1) + n_new = text_embeddings.shape[1] - (text_mask.shape[1] if text_mask is not None else 0) + if text_mask is not None: + text_mask = torch.cat([text_mask, torch.ones(B, n_new, dtype=torch.bool, device=text_mask.device)], dim=1) + else: + text_mask = torch.ones(B, text_embeddings.shape[1], dtype=torch.bool, device=text_embeddings.device) + + memory = self.transformer.encoder(img_flat, pos_flat, text_embeddings, text_mask) + dec_out = self.transformer.decoder(memory, pos_flat, text_embeddings, text_mask, H, W) + query_out, pred_boxes = dec_out["decoder_output"], dec_out["pred_boxes"] + + if text_embeddings is not None: + scores = self.dot_prod_scoring(query_out, text_embeddings, text_mask) + else: + scores = torch.zeros(B, query_out.shape[1], device=query_out.device) + + masks = self.segmentation_head(query_out, seg_features, encoder_hidden_states=memory, prompt=text_embeddings, prompt_mask=text_mask) + return box_cxcywh_to_xyxy(pred_boxes), scores, masks, dec_out + + def forward(self, images, text_embeddings=None, text_mask=None, points=None, boxes=None, threshold=0.3, orig_size=None): + features, positions, _, _ = self._get_backbone_features(images) + + if text_embeddings is not None: + text_embeddings = self.backbone["language_backbone"]["resizer"](text_embeddings) + if text_mask is not None: + text_mask = text_mask.bool() + + boxes_xyxy, scores, masks, dec_out = self._detect( + features, positions, text_embeddings, text_mask, points, boxes) + + if orig_size is not None: + oh, ow = orig_size + boxes_xyxy = boxes_xyxy * torch.tensor([ow, oh, ow, oh], device=boxes_xyxy.device, dtype=boxes_xyxy.dtype) + masks = F.interpolate(masks, size=orig_size, mode="bilinear", align_corners=False) + + return { + "boxes": boxes_xyxy, + "scores": scores, + "masks": masks, + "presence": dec_out.get("presence"), + } + + def forward_from_trunk(self, trunk_out, text_embeddings, text_mask): + """Run detection using a pre-computed ViTDet trunk output. + + text_embeddings must already be resized through language_backbone.resizer. + Returns dict with boxes (normalized xyxy), scores, masks at detector resolution. + """ + bb = self.backbone["vision_backbone"] + features = [conv(trunk_out) for conv in bb.convs] + positions = [cast_to_input(bb.position_encoding(f), f) for f in features] + + if text_mask is not None: + text_mask = text_mask.bool() + + boxes_xyxy, scores, masks, _ = self._detect(features, positions, text_embeddings, text_mask) + return {"boxes": boxes_xyxy, "scores": scores, "masks": masks} + + +class SAM3Model(nn.Module): + def __init__(self, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + self.dtype = dtype + image_model = kwargs.get("image_model", "SAM3") + tracker_cls = TRACKER_CLASSES[image_model] + self.detector = SAM3Detector(device=device, dtype=dtype, operations=operations, **kwargs) + self.tracker = tracker_cls(device=device, dtype=dtype, operations=operations, **kwargs) + + def forward(self, images, **kwargs): + return self.detector(images, **kwargs) + + def forward_segment(self, images, point_inputs=None, box_inputs=None, mask_inputs=None): + """Interactive segmentation using SAM decoder with point/box/mask prompts. + + Args: + images: [B, 3, 1008, 1008] preprocessed images + point_inputs: {"point_coords": [B, N, 2], "point_labels": [B, N]} in 1008x1008 pixel space + box_inputs: [B, 2, 2] box corners (top-left, bottom-right) in 1008x1008 pixel space + mask_inputs: [B, 1, H, W] coarse mask logits to refine + Returns: + [B, 1, image_size, image_size] high-res mask logits + """ + bb = self.detector.backbone["vision_backbone"] + if bb.multiplex: + _, _, tracker_features, tracker_positions = bb(images, tracker_mode="interactive") + else: + _, _, tracker_features, tracker_positions = bb(images, need_tracker=True) + if self.detector.scalp > 0: + tracker_features = tracker_features[:-self.detector.scalp] + tracker_positions = tracker_positions[:-self.detector.scalp] + + high_res = list(tracker_features[:-1]) + backbone_feat = tracker_features[-1] + B, C, H, W = backbone_feat.shape + # Add no-memory embedding (init frame path) + no_mem = getattr(self.tracker, 'interactivity_no_mem_embed', None) + if no_mem is None: + no_mem = getattr(self.tracker, 'no_mem_embed', None) + if no_mem is not None: + feat_flat = backbone_feat.flatten(2).permute(0, 2, 1) + feat_flat = feat_flat + cast_to_input(no_mem, feat_flat) + backbone_feat = feat_flat.view(B, H, W, C).permute(0, 3, 1, 2) + + num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1) + _, high_res_masks, _, _ = self.tracker._forward_sam_heads( + backbone_features=backbone_feat, + point_inputs=point_inputs, + mask_inputs=mask_inputs, + box_inputs=box_inputs, + high_res_features=high_res, + multimask_output=(0 < num_pts <= 1), + ) + return high_res_masks + + def forward_video(self, images, initial_masks, pbar=None, text_prompts=None, + new_det_thresh=0.5, max_objects=0, detect_interval=1, + target_device=None, target_dtype=None): + """Track video with optional per-frame text-prompted detection.""" + bb = self.detector.backbone["vision_backbone"] + + def backbone_fn(frame, frame_idx=None): + trunk_out = bb.trunk(frame) + if bb.multiplex: + _, _, tf, tp = bb(frame, tracker_mode="propagation", cached_trunk=trunk_out, tracker_only=True) + else: + _, _, tf, tp = bb(frame, need_tracker=True, cached_trunk=trunk_out, tracker_only=True) + return tf, tp, trunk_out + + detect_fn = None + if text_prompts: + resizer = self.detector.backbone["language_backbone"]["resizer"] + resized = [(resizer(emb), m.bool() if m is not None else None) for emb, m in text_prompts] + def detect_fn(trunk_out): + all_scores, all_masks = [], [] + for emb, mask in resized: + det = self.detector.forward_from_trunk(trunk_out, emb, mask) + all_scores.append(det["scores"]) + all_masks.append(det["masks"]) + return {"scores": torch.cat(all_scores, dim=1), "masks": torch.cat(all_masks, dim=1)} + + if hasattr(self.tracker, 'track_video_with_detection'): + return self.tracker.track_video_with_detection( + backbone_fn, images, initial_masks, detect_fn, + new_det_thresh=new_det_thresh, max_objects=max_objects, + detect_interval=detect_interval, backbone_obj=bb, pbar=pbar, + target_device=target_device, target_dtype=target_dtype) + # SAM3 (non-multiplex) — no detection support, requires initial masks + if initial_masks is None: + raise ValueError("SAM3 (non-multiplex) requires initial_mask for video tracking") + return self.tracker.track_video(backbone_fn, images, initial_masks, pbar=pbar, backbone_obj=bb, + target_device=target_device, target_dtype=target_dtype) diff --git a/comfy/ldm/sam3/sam.py b/comfy/ldm/sam3/sam.py new file mode 100644 index 000000000..75cb457cf --- /dev/null +++ b/comfy/ldm/sam3/sam.py @@ -0,0 +1,425 @@ +# SAM3 shared components: primitives, ViTDet backbone, FPN neck, position encodings. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from comfy.ldm.modules.attention import optimized_attention +from comfy.ldm.flux.math import apply_rope +from comfy.ldm.flux.layers import EmbedND +from comfy.ops import cast_to_input + + +class MLP(nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers, sigmoid_output=False, device=None, dtype=None, operations=None): + super().__init__() + dims = [input_dim] + [hidden_dim] * (num_layers - 1) + [output_dim] + self.layers = nn.ModuleList([operations.Linear(dims[i], dims[i + 1], device=device, dtype=dtype) for i in range(num_layers)]) + self.sigmoid_output = sigmoid_output + + def forward(self, x): + for i, layer in enumerate(self.layers): + x = F.relu(layer(x)) if i < len(self.layers) - 1 else layer(x) + return torch.sigmoid(x) if self.sigmoid_output else x + + +class SAMAttention(nn.Module): + def __init__(self, embedding_dim, num_heads, downsample_rate=1, kv_in_dim=None, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + internal_dim = embedding_dim // downsample_rate + kv_dim = kv_in_dim if kv_in_dim is not None else embedding_dim + self.q_proj = operations.Linear(embedding_dim, internal_dim, device=device, dtype=dtype) + self.k_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype) + self.v_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype) + self.out_proj = operations.Linear(internal_dim, embedding_dim, device=device, dtype=dtype) + + def forward(self, q, k, v): + q = self.q_proj(q) + k = self.k_proj(k) + v = self.v_proj(v) + return self.out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False)) + + +class TwoWayAttentionBlock(nn.Module): + def __init__(self, embedding_dim, num_heads, mlp_dim=2048, attention_downsample_rate=2, skip_first_layer_pe=False, device=None, dtype=None, operations=None): + super().__init__() + self.skip_first_layer_pe = skip_first_layer_pe + self.self_attn = SAMAttention(embedding_dim, num_heads, device=device, dtype=dtype, operations=operations) + self.cross_attn_token_to_image = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations) + self.cross_attn_image_to_token = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations) + self.mlp = nn.Sequential(operations.Linear(embedding_dim, mlp_dim, device=device, dtype=dtype), nn.ReLU(), operations.Linear(mlp_dim, embedding_dim, device=device, dtype=dtype)) + self.norm1 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype) + self.norm3 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype) + self.norm4 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype) + + def forward(self, queries, keys, query_pe, key_pe): + if self.skip_first_layer_pe: + queries = self.norm1(self.self_attn(queries, queries, queries)) + else: + q = queries + query_pe + queries = self.norm1(queries + self.self_attn(q, q, queries)) + q, k = queries + query_pe, keys + key_pe + queries = self.norm2(queries + self.cross_attn_token_to_image(q, k, keys)) + queries = self.norm3(queries + self.mlp(queries)) + q, k = queries + query_pe, keys + key_pe + keys = self.norm4(keys + self.cross_attn_image_to_token(k, q, queries)) + return queries, keys + + +class TwoWayTransformer(nn.Module): + def __init__(self, depth=2, embedding_dim=256, num_heads=8, mlp_dim=2048, attention_downsample_rate=2, device=None, dtype=None, operations=None): + super().__init__() + self.layers = nn.ModuleList([ + TwoWayAttentionBlock(embedding_dim, num_heads, mlp_dim, attention_downsample_rate, + skip_first_layer_pe=(i == 0), device=device, dtype=dtype, operations=operations) + for i in range(depth) + ]) + self.final_attn_token_to_image = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations) + self.norm_final = operations.LayerNorm(embedding_dim, device=device, dtype=dtype) + + def forward(self, image_embedding, image_pe, point_embedding): + queries, keys = point_embedding, image_embedding + for layer in self.layers: + queries, keys = layer(queries, keys, point_embedding, image_pe) + q, k = queries + point_embedding, keys + image_pe + queries = self.norm_final(queries + self.final_attn_token_to_image(q, k, keys)) + return queries, keys + + +class PositionEmbeddingRandom(nn.Module): + """Fourier feature positional encoding with random gaussian projection.""" + def __init__(self, num_pos_feats=64, scale=None): + super().__init__() + self.register_buffer("positional_encoding_gaussian_matrix", (scale or 1.0) * torch.randn(2, num_pos_feats)) + + def _encode(self, normalized_coords): + """Map normalized [0,1] coordinates to fourier features via random projection. Computes in fp32.""" + orig_dtype = normalized_coords.dtype + proj_matrix = self.positional_encoding_gaussian_matrix.to(device=normalized_coords.device, dtype=torch.float32) + projected = 2 * math.pi * (2 * normalized_coords.float() - 1) @ proj_matrix + return torch.cat([projected.sin(), projected.cos()], dim=-1).to(orig_dtype) + + def forward(self, size, device=None): + h, w = size + dev = device if device is not None else self.positional_encoding_gaussian_matrix.device + ones = torch.ones((h, w), device=dev, dtype=torch.float32) + norm_xy = torch.stack([(ones.cumsum(1) - 0.5) / w, (ones.cumsum(0) - 0.5) / h], dim=-1) + return self._encode(norm_xy).permute(2, 0, 1).unsqueeze(0) + + def forward_with_coords(self, pixel_coords, image_size): + norm = pixel_coords.clone() + norm[:, :, 0] /= image_size[1] + norm[:, :, 1] /= image_size[0] + return self._encode(norm) + + +# ViTDet backbone + FPN neck + +def window_partition(x: torch.Tensor, window_size: int): + B, H, W, C = x.shape + pad_h = (window_size - H % window_size) % window_size + pad_w = (window_size - W % window_size) % window_size + if pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) + Hp, Wp = H + pad_h, W + pad_w + x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows, (Hp, Wp) + + +def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw, hw): + Hp, Wp = pad_hw + H, W = hw + B = windows.shape[0] // (Hp * Wp // window_size // window_size) + x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) + if Hp > H or Wp > W: + x = x[:, :H, :W, :].contiguous() + return x + + +def rope_2d(end_x: int, end_y: int, dim: int, theta: float = 10000.0, scale_pos: float = 1.0): + """Generate 2D axial RoPE using flux EmbedND. Returns [1, 1, HW, dim//2, 2, 2].""" + t = torch.arange(end_x * end_y, dtype=torch.float32) + ids = torch.stack([(t % end_x) * scale_pos, + torch.div(t, end_x, rounding_mode="floor") * scale_pos], dim=-1) + return EmbedND(dim=dim, theta=theta, axes_dim=[dim // 2, dim // 2])(ids.unsqueeze(0)) + + +class _ViTMLP(nn.Module): + def __init__(self, dim, mlp_ratio=4.0, device=None, dtype=None, operations=None): + super().__init__() + hidden = int(dim * mlp_ratio) + self.fc1 = operations.Linear(dim, hidden, device=device, dtype=dtype) + self.act = nn.GELU() + self.fc2 = operations.Linear(hidden, dim, device=device, dtype=dtype) + + def forward(self, x): + return self.fc2(self.act(self.fc1(x))) + + +class Attention(nn.Module): + """ViTDet multi-head attention with fused QKV projection.""" + + def __init__(self, dim, num_heads=8, qkv_bias=True, use_rope=False, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + self.head_dim = dim // num_heads + self.use_rope = use_rope + self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, device=device, dtype=dtype) + self.proj = operations.Linear(dim, dim, device=device, dtype=dtype) + + def forward(self, x, freqs_cis=None): + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim) + q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(dim=0) + if self.use_rope and freqs_cis is not None: + q, k = apply_rope(q, k, freqs_cis) + return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True, low_precision_attention=False)) + + +class Block(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4.0, qkv_bias=True, window_size=0, use_rope=False, device=None, dtype=None, operations=None): + super().__init__() + self.window_size = window_size + self.norm1 = operations.LayerNorm(dim, device=device, dtype=dtype) + self.attn = Attention(dim, num_heads, qkv_bias, use_rope, device=device, dtype=dtype, operations=operations) + self.norm2 = operations.LayerNorm(dim, device=device, dtype=dtype) + self.mlp = _ViTMLP(dim, mlp_ratio, device=device, dtype=dtype, operations=operations) + + def forward(self, x, freqs_cis=None): + shortcut = x + x = self.norm1(x) + if self.window_size > 0: + H, W = x.shape[1], x.shape[2] + x, pad_hw = window_partition(x, self.window_size) + x = x.view(x.shape[0], self.window_size * self.window_size, -1) + x = self.attn(x, freqs_cis=freqs_cis) + x = x.view(-1, self.window_size, self.window_size, x.shape[-1]) + x = window_unpartition(x, self.window_size, pad_hw, (H, W)) + else: + B, H, W, C = x.shape + x = x.view(B, H * W, C) + x = self.attn(x, freqs_cis=freqs_cis) + x = x.view(B, H, W, C) + x = shortcut + x + x = x + self.mlp(self.norm2(x)) + return x + + +class PatchEmbed(nn.Module): + def __init__(self, patch_size=14, in_chans=3, embed_dim=1024, device=None, dtype=None, operations=None): + super().__init__() + self.proj = operations.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=False, device=device, dtype=dtype) + + def forward(self, x): + return self.proj(x) + + +class ViTDet(nn.Module): + def __init__(self, img_size=1008, patch_size=14, embed_dim=1024, depth=32, num_heads=16, mlp_ratio=4.625, qkv_bias=True, window_size=24, + global_att_blocks=(7, 15, 23, 31), use_rope=True, pretrain_img_size=336, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + self.img_size = img_size + self.patch_size = patch_size + self.embed_dim = embed_dim + self.num_heads = num_heads + self.global_att_blocks = set(global_att_blocks) + + self.patch_embed = PatchEmbed(patch_size, 3, embed_dim, device=device, dtype=dtype, operations=operations) + + num_patches = (pretrain_img_size // patch_size) ** 2 + 1 # +1 for cls token + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim, device=device, dtype=dtype)) + + self.ln_pre = operations.LayerNorm(embed_dim, device=device, dtype=dtype) + + grid_size = img_size // patch_size + pretrain_grid = pretrain_img_size // patch_size + + self.blocks = nn.ModuleList() + for i in range(depth): + is_global = i in self.global_att_blocks + self.blocks.append(Block( + embed_dim, num_heads, mlp_ratio, qkv_bias, + window_size=0 if is_global else window_size, + use_rope=use_rope, + device=device, dtype=dtype, operations=operations, + )) + + if use_rope: + rope_scale = pretrain_grid / grid_size + self.register_buffer("freqs_cis", rope_2d(grid_size, grid_size, embed_dim // num_heads, scale_pos=rope_scale), persistent=False) + self.register_buffer("freqs_cis_window", rope_2d(window_size, window_size, embed_dim // num_heads), persistent=False) + else: + self.freqs_cis = None + self.freqs_cis_window = None + + def _get_pos_embed(self, num_tokens): + pos = self.pos_embed + if pos.shape[1] == num_tokens: + return pos + cls_pos = pos[:, :1] + spatial_pos = pos[:, 1:] + old_size = int(math.sqrt(spatial_pos.shape[1])) + new_size = int(math.sqrt(num_tokens - 1)) if num_tokens > 1 else old_size + spatial_2d = spatial_pos.reshape(1, old_size, old_size, -1).permute(0, 3, 1, 2) + tiles_h = new_size // old_size + 1 + tiles_w = new_size // old_size + 1 + tiled = spatial_2d.tile([1, 1, tiles_h, tiles_w])[:, :, :new_size, :new_size] + tiled = tiled.permute(0, 2, 3, 1).reshape(1, new_size * new_size, -1) + return torch.cat([cls_pos, tiled], dim=1) + + def forward(self, x): + x = self.patch_embed(x) + B, C, Hp, Wp = x.shape + x = x.permute(0, 2, 3, 1).reshape(B, Hp * Wp, C) + + pos = cast_to_input(self._get_pos_embed(Hp * Wp + 1), x) + x = x + pos[:, 1:Hp * Wp + 1] + + x = x.view(B, Hp, Wp, C) + x = self.ln_pre(x) + + freqs_cis_global = self.freqs_cis + freqs_cis_win = self.freqs_cis_window + if freqs_cis_global is not None: + freqs_cis_global = cast_to_input(freqs_cis_global, x) + if freqs_cis_win is not None: + freqs_cis_win = cast_to_input(freqs_cis_win, x) + + for block in self.blocks: + fc = freqs_cis_win if block.window_size > 0 else freqs_cis_global + x = block(x, freqs_cis=fc) + + return x.permute(0, 3, 1, 2) + + +class FPNScaleConv(nn.Module): + def __init__(self, in_dim, out_dim, scale, device=None, dtype=None, operations=None): + super().__init__() + if scale == 4.0: + self.dconv_2x2_0 = operations.ConvTranspose2d(in_dim, in_dim // 2, kernel_size=2, stride=2, device=device, dtype=dtype) + self.dconv_2x2_1 = operations.ConvTranspose2d(in_dim // 2, in_dim // 4, kernel_size=2, stride=2, device=device, dtype=dtype) + proj_in = in_dim // 4 + elif scale == 2.0: + self.dconv_2x2 = operations.ConvTranspose2d(in_dim, in_dim // 2, kernel_size=2, stride=2, device=device, dtype=dtype) + proj_in = in_dim // 2 + elif scale == 1.0: + proj_in = in_dim + elif scale == 0.5: + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + proj_in = in_dim + self.scale = scale + self.conv_1x1 = operations.Conv2d(proj_in, out_dim, kernel_size=1, device=device, dtype=dtype) + self.conv_3x3 = operations.Conv2d(out_dim, out_dim, kernel_size=3, padding=1, device=device, dtype=dtype) + + def forward(self, x): + if self.scale == 4.0: + x = F.gelu(self.dconv_2x2_0(x)) + x = self.dconv_2x2_1(x) + elif self.scale == 2.0: + x = self.dconv_2x2(x) + elif self.scale == 0.5: + x = self.pool(x) + x = self.conv_1x1(x) + x = self.conv_3x3(x) + return x + + +class PositionEmbeddingSine(nn.Module): + """2D sinusoidal position encoding (DETR-style) with result caching.""" + def __init__(self, num_pos_feats=256, temperature=10000.0, normalize=True, scale=None): + super().__init__() + assert num_pos_feats % 2 == 0 + self.half_dim = num_pos_feats // 2 + self.temperature = temperature + self.normalize = normalize + self.scale = scale if scale is not None else 2 * math.pi + self._cache = {} + + def _sincos(self, vals): + """Encode 1D values to interleaved sin/cos features.""" + freqs = self.temperature ** (2 * (torch.arange(self.half_dim, dtype=torch.float32, device=vals.device) // 2) / self.half_dim) + raw = vals[..., None] * self.scale / freqs + return torch.stack((raw[..., 0::2].sin(), raw[..., 1::2].cos()), dim=-1).flatten(-2) + + def _encode_xy(self, x, y): + """Encode normalized x, y coordinates to sinusoidal features. Returns (pos_x, pos_y) each [N, half_dim].""" + dim_t = self.temperature ** (2 * (torch.arange(self.half_dim, dtype=torch.float32, device=x.device) // 2) / self.half_dim) + pos_x = x[:, None] * self.scale / dim_t + pos_y = y[:, None] * self.scale / dim_t + pos_x = torch.stack((pos_x[:, 0::2].sin(), pos_x[:, 1::2].cos()), dim=2).flatten(1) + pos_y = torch.stack((pos_y[:, 0::2].sin(), pos_y[:, 1::2].cos()), dim=2).flatten(1) + return pos_x, pos_y + + def encode_boxes(self, cx, cy, w, h): + """Encode box center + size to [N, d_model+2] features.""" + pos_x, pos_y = self._encode_xy(cx, cy) + return torch.cat((pos_y, pos_x, h[:, None], w[:, None]), dim=1) + + def forward(self, x): + B, C, H, W = x.shape + key = (H, W, x.device) + if key not in self._cache: + gy = torch.arange(H, dtype=torch.float32, device=x.device) + gx = torch.arange(W, dtype=torch.float32, device=x.device) + if self.normalize: + gy, gx = gy / (H - 1 + 1e-6), gx / (W - 1 + 1e-6) + yy, xx = torch.meshgrid(gy, gx, indexing="ij") + self._cache[key] = torch.cat((self._sincos(yy), self._sincos(xx)), dim=-1).permute(2, 0, 1).unsqueeze(0) + return self._cache[key].expand(B, -1, -1, -1) + + +class SAM3VisionBackbone(nn.Module): + def __init__(self, embed_dim=1024, d_model=256, multiplex=False, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + self.trunk = ViTDet(embed_dim=embed_dim, device=device, dtype=dtype, operations=operations, **kwargs) + self.position_encoding = PositionEmbeddingSine(num_pos_feats=d_model, normalize=True) + self.multiplex = multiplex + + fpn_args = dict(device=device, dtype=dtype, operations=operations) + if multiplex: + scales = [4.0, 2.0, 1.0] + self.convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales]) + self.propagation_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales]) + self.interactive_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales]) + else: + scales = [4.0, 2.0, 1.0, 0.5] + self.convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales]) + self.sam2_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales]) + + def forward(self, images, need_tracker=False, tracker_mode=None, cached_trunk=None, tracker_only=False): + backbone_out = cached_trunk if cached_trunk is not None else self.trunk(images) + + if tracker_only: + # Skip detector FPN when only tracker features are needed (video tracking) + if self.multiplex: + tracker_convs = self.propagation_convs if tracker_mode == "propagation" else self.interactive_convs + else: + tracker_convs = self.sam2_convs + tracker_features = [conv(backbone_out) for conv in tracker_convs] + tracker_positions = [cast_to_input(self.position_encoding(f), f) for f in tracker_features] + return None, None, tracker_features, tracker_positions + + features = [conv(backbone_out) for conv in self.convs] + positions = [cast_to_input(self.position_encoding(f), f) for f in features] + + if self.multiplex: + if tracker_mode == "propagation": + tracker_convs = self.propagation_convs + elif tracker_mode == "interactive": + tracker_convs = self.interactive_convs + else: + return features, positions, None, None + elif need_tracker: + tracker_convs = self.sam2_convs + else: + return features, positions, None, None + + tracker_features = [conv(backbone_out) for conv in tracker_convs] + tracker_positions = [cast_to_input(self.position_encoding(f), f) for f in tracker_features] + return features, positions, tracker_features, tracker_positions diff --git a/comfy/ldm/sam3/tracker.py b/comfy/ldm/sam3/tracker.py new file mode 100644 index 000000000..8456e90a6 --- /dev/null +++ b/comfy/ldm/sam3/tracker.py @@ -0,0 +1,1802 @@ +# SAM3 video tracker: memory encoder, memory attention, SAM mask decoder/prompt encoder. + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from tqdm import tqdm + +try: + import cv2 + _HAS_CV2 = True +except ImportError: + from scipy import ndimage + _HAS_CV2 = False + +import comfy.model_management +from comfy.ldm.modules.attention import optimized_attention +from comfy.ldm.sam3.sam import rope_2d, PositionEmbeddingSine +from comfy.ops import cast_to_input +from comfy.ldm.flux.math import apply_rope1 +from comfy.ldm.cascade.common import LayerNorm2d_op +from comfy.ldm.sam3.sam import MLP, PositionEmbeddingRandom +from comfy.ldm.sam3.sam import TwoWayTransformer as SAMTwoWayTransformer + +NO_OBJ_SCORE = -1024.0 + + +def to_spatial(x, H, W): + """Reshape (B, H*W, C) → (B, C, H, W).""" + return x.view(x.shape[0], H, W, -1).permute(0, 3, 1, 2) + +class MultiplexState: + """Tracks object-to-slot assignments for multiplex tracking. Provides mux/demux operations.""" + + def __init__(self, num_objects, multiplex_count, device, dtype): + self.multiplex_count = multiplex_count + self.device = device + self.dtype = dtype + self._build(num_objects) + + def mux(self, x): + """[N_obj, ...] -> [num_buckets, multiplex_count, ...]""" + out_shape = (self.num_buckets, self.multiplex_count) + x.shape[1:] + return (self.mux_matrix.to(device=x.device, dtype=x.dtype) @ x.reshape(self.total_valid_entries, -1)).view(out_shape) + + def demux(self, x): + """[num_buckets, multiplex_count, ...] -> [N_obj, ...]""" + out_shape = (self.total_valid_entries,) + x.shape[2:] + flat = x.reshape(self.num_buckets * self.multiplex_count, -1) + return (self.demux_matrix.to(device=x.device, dtype=x.dtype) @ flat).view(out_shape) + + def get_valid_object_mask(self): + """[num_buckets, multiplex_count] bool tensor, True for valid slots.""" + return (self.mux_matrix.sum(dim=1) > 0).reshape(self.num_buckets, self.multiplex_count) + + def _build(self, num_objects): + M = self.multiplex_count + self.num_buckets = (num_objects + M - 1) // M + self.total_valid_entries = num_objects + total_slots = self.num_buckets * M + self.mux_matrix = torch.zeros(total_slots, num_objects, device=self.device, dtype=self.dtype) + self.demux_matrix = torch.zeros(num_objects, total_slots, device=self.device, dtype=self.dtype) + oids = torch.arange(num_objects, device=self.device) + slots = (oids // M) * M + (oids % M) + self.mux_matrix[slots, oids] = 1.0 + self.demux_matrix[oids, slots] = 1.0 + + def add_objects(self, n_new): + """Grow multiplex state for n_new additional objects.""" + self._build(self.total_valid_entries + n_new) + +def _compute_mask_overlap(masks_a, masks_b): + """Max of IoU and IoM (intersection over minimum area). More robust to size differences.""" + a_flat = (masks_a > 0).float().flatten(1) + b_flat = (masks_b > 0).float().flatten(1) + intersection = a_flat @ b_flat.T + area_a = a_flat.sum(1, keepdim=True) + area_b = b_flat.sum(1, keepdim=True).T + iou = intersection / (area_a + area_b - intersection).clamp(min=1) + iom = intersection / torch.min(area_a.expand_as(iou), area_b.expand_as(iou)).clamp(min=1) + return torch.max(iou, iom) + + +def _nms_masks(masks, scores, thresh=0.5): + """Mask-based NMS using IoU+IoM overlap. Returns (filtered_masks, filtered_scores).""" + order = scores.argsort(descending=True) + masks, scores = masks[order], scores[order] + keep = [] + for i in range(masks.shape[0]): + if keep: + if _compute_mask_overlap(masks[i:i+1], masks[torch.tensor(keep, device=masks.device)]).max() >= thresh: + continue + keep.append(i) + return masks[keep], scores[keep] + + +def _get_connected_components(mask_bin): + """Get connected component labels and areas. mask_bin: [B, 1, H, W] uint8.""" + labels_list, areas_list = [], [] + for i in range(mask_bin.shape[0]): + m = mask_bin[i, 0].cpu().numpy() + if _HAS_CV2: + _, labeled, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8) + areas = stats[labeled, cv2.CC_STAT_AREA].astype('int32') + else: + labeled, num_features = ndimage.label(m) + areas = np.zeros_like(m, dtype=np.int32) + for c in range(1, num_features + 1): + component = labeled == c + areas[component] = component.sum() + labels_list.append(torch.from_numpy(labeled).to(mask_bin.device)) + areas_list.append(torch.from_numpy(areas).to(device=mask_bin.device, dtype=torch.int32)) + return torch.stack(labels_list).unsqueeze(1), torch.stack(areas_list).unsqueeze(1) + + +def fill_holes_in_mask_scores(mask, max_area=0): + """Remove small foreground sprinkles and fill small background holes using connected components.""" + if max_area <= 0: + return mask + + # Fill holes: small connected components in background → foreground + mask_bg = (mask <= 0).to(torch.uint8) + _, areas_bg = _get_connected_components(mask_bg) + small_bg = mask_bg.bool() & (areas_bg <= max_area) + mask = torch.where(small_bg, 0.1, mask) + + # Remove sprinkles: small connected components in foreground → background + # Only remove if area < min(max_area, half of total foreground area) + mask_fg = (mask > 0).to(torch.uint8) + fg_area_thresh = mask_fg.sum(dim=(2, 3), keepdim=True, dtype=torch.int32) + fg_area_thresh.floor_divide_(2).clamp_(max=max_area) + _, areas_fg = _get_connected_components(mask_fg) + small_fg = mask_fg.bool() & (areas_fg <= fg_area_thresh) + mask = torch.where(small_fg, -0.1, mask) + + return mask + + +def apply_rope_memory(q, k, freqs, num_heads, num_k_exclude_rope=0): + """Apply 2D axial RoPE to memory attention using flux rope format. + + Args: + q: [B, Nq, C] projected queries (current frame features) + k: [B, Nk, C] projected keys (memory tokens) + freqs: [1, Nq, dim//2, 2, 2] flux-format rotation matrices for one frame + num_heads: number of attention heads + num_k_exclude_rope: number of trailing k tokens to skip RoPE (object pointers) + """ + B, Nq, C = q.shape + head_dim = C // num_heads + + # freqs shape: [1, 1, Nq, dim//2, 2, 2] (heads broadcast dim already included) + q_h = q.view(B, Nq, num_heads, head_dim).transpose(1, 2) + q_h = apply_rope1(q_h, freqs) + q = q_h.transpose(1, 2).reshape(B, Nq, C) + + # Apply RoPE to k (excluding last num_k_exclude_rope tokens) + Nk = k.shape[1] + num_k_rope = Nk - num_k_exclude_rope + if num_k_rope > 0: + # Repeat freqs for multiple frames of spatial memory + Nf = freqs.shape[2] # spatial positions in one frame + if num_k_rope > Nf: + r = (num_k_rope + Nf - 1) // Nf + pe_k = freqs.repeat(1, 1, r, 1, 1, 1)[:, :, :num_k_rope] + else: + pe_k = freqs[:, :, :num_k_rope] + + k_h = k[:, :num_k_rope].view(B, num_k_rope, num_heads, head_dim).transpose(1, 2) + k_h = apply_rope1(k_h, pe_k) + k = k.clone() + k[:, :num_k_rope] = k_h.transpose(1, 2).reshape(B, num_k_rope, C) + + return q, k + + +def get_1d_sine_pe(pos_inds, dim, temperature=10000): + """1D sinusoidal positional encoding for temporal positions.""" + pe_dim = dim // 2 + dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device) + dim_t = temperature ** (2 * (dim_t // 2) / pe_dim) + pos_embed = pos_inds.unsqueeze(-1) / dim_t + return torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1) + + +def _pad_to_buckets(tensor, target_buckets): + """Pad a [num_buckets, ...] tensor to target_buckets along dim 0 if needed.""" + if tensor.shape[0] >= target_buckets: + return tensor + pad_shape = (target_buckets - tensor.shape[0],) + tensor.shape[1:] + return torch.cat([tensor, torch.zeros(pad_shape, device=tensor.device, dtype=tensor.dtype)], dim=0) + + +def pack_masks(masks): + """Pack binary masks [*, H, W] to bit-packed [*, H, W//8] uint8. W must be divisible by 8.""" + binary = masks > 0 + shifts = torch.arange(8, device=masks.device) + return (binary.view(*masks.shape[:-1], -1, 8) * (1 << shifts)).sum(-1).byte() + + +def unpack_masks(packed): + """Unpack bit-packed [*, H, W//8] uint8 to bool [*, H, W*8].""" + bits = torch.tensor([1, 2, 4, 8, 16, 32, 64, 128], dtype=torch.uint8, device=packed.device) + return (packed.unsqueeze(-1) & bits).bool().view(*packed.shape[:-1], -1) + + +def _prep_frame(images, idx, device, dt, size): + """Slice CPU full-res frames, transfer to GPU in target dtype, and resize to (size, size).""" + return comfy.utils.common_upscale(images[idx].to(device=device, dtype=dt), size, size, "bicubic", crop="disabled") + + +def _compute_backbone(backbone_fn, frame, frame_idx=None): + """Compute backbone features for a single frame. Returns (vision_feats, vision_pos, feat_sizes, features, trunk_out).""" + features, positions, trunk_out = backbone_fn(frame, frame_idx=frame_idx) + feat_sizes = [(x.shape[-2], x.shape[-1]) for x in features] + vision_feats = [x.flatten(2).permute(0, 2, 1) for x in features] + vision_pos = [x.flatten(2).permute(0, 2, 1) for x in positions] + return vision_feats, vision_pos, feat_sizes, features, trunk_out + + +def collect_memory_tokens(output_dict, frame_idx, num_maskmem, maskmem_tpos_enc, device, + collect_image_feats=False, tpos_v2=False, num_buckets=None): + """Collect spatial memory, position encodings, and optionally image features from past frames.""" + to_cat_memory, to_cat_memory_pos = [], [] + to_cat_image_feat, to_cat_image_pos = [], [] + + def _append(out, tpos_idx): + feats = out["maskmem_features"].to(device) + if num_buckets is not None: + feats = _pad_to_buckets(feats, num_buckets) + to_cat_memory.append(feats.flatten(2).permute(0, 2, 1)) + enc = out["maskmem_pos_enc"][-1].to(device).flatten(2).permute(0, 2, 1) + if num_buckets is not None: + enc = _pad_to_buckets(enc, num_buckets) + tpos = cast_to_input(maskmem_tpos_enc[tpos_idx], enc) + to_cat_memory_pos.append(enc + tpos) + if collect_image_feats and "image_features" in out: + to_cat_image_feat.append(out["image_features"].to(device)) + to_cat_image_pos.append(out["image_pos_enc"].to(device) + tpos) + + cond_outputs = output_dict["cond_frame_outputs"] + for t, out in cond_outputs.items(): + if tpos_v2: + t_pos = frame_idx - t + tpos_idx = num_maskmem - t_pos - 1 if 0 < t_pos < num_maskmem else num_maskmem - 1 + else: + tpos_idx = num_maskmem - 1 + _append(out, tpos_idx) + + for t_pos in range(1, num_maskmem): + out = output_dict["non_cond_frame_outputs"].get(frame_idx - (num_maskmem - t_pos), None) + if out is None or out.get("maskmem_features") is None: + continue + _append(out, num_maskmem - t_pos - 1) + + return to_cat_memory, to_cat_memory_pos, to_cat_image_feat, to_cat_image_pos, cond_outputs + + +def compute_tpos_enc(rel_pos_list, device, d_model, proj_layer, dtype=None, max_abs_pos=None): + """Temporal position encoding for object pointers.""" + pos_enc = torch.tensor(rel_pos_list, dtype=torch.float32, device=device) / max((max_abs_pos or 2) - 1, 1) + pos_enc = get_1d_sine_pe(pos_enc, dim=d_model) + if dtype is not None: + pos_enc = pos_enc.to(dtype) + return proj_layer(pos_enc) + + +def forward_sam_heads(backbone_features, prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn, + image_size, point_inputs=None, mask_inputs=None, box_inputs=None, + high_res_features=None, multimask_output=False): + """Shared SAM prompt encoder + mask decoder forward for both SAM3 and SAM3.1 trackers.""" + device = backbone_features.device + # Batch size from inputs (mask_inputs may have N_obj > 1 while backbone is batch 1) + if mask_inputs is not None: + B = mask_inputs.shape[0] + elif box_inputs is not None: + B = box_inputs.shape[0] + elif point_inputs is not None: + B = point_inputs["point_coords"].shape[0] + else: + B = backbone_features.shape[0] + + if point_inputs is not None: + sam_point_coords = point_inputs["point_coords"] + sam_point_labels = point_inputs["point_labels"] + else: + sam_point_coords = torch.zeros(B, 1, 2, device=device) + sam_point_labels = -torch.ones(B, 1, dtype=torch.int32, device=device) + + if mask_inputs is not None: + prompt_size = (prompt_encoder.image_embedding_size[0] * 4, prompt_encoder.image_embedding_size[1] * 4) + if mask_inputs.shape[-2:] != prompt_size: + sam_mask_prompt = F.interpolate(mask_inputs, size=prompt_size, mode="bilinear", align_corners=False, antialias=True) + else: + sam_mask_prompt = mask_inputs + else: + sam_mask_prompt = None + + sparse, dense = prompt_encoder(points=(sam_point_coords, sam_point_labels), boxes=box_inputs, masks=sam_mask_prompt) + sparse = cast_to_input(sparse, backbone_features) + dense = cast_to_input(dense, backbone_features) + image_pe = cast_to_input(prompt_encoder.get_dense_pe(), backbone_features) + + low_res_multimasks, ious, sam_output_tokens, object_score_logits = mask_decoder( + image_embeddings=backbone_features, image_pe=image_pe, + sparse_prompt_embeddings=sparse, dense_prompt_embeddings=dense, + high_res_features=high_res_features, multimask_output=multimask_output, return_all=True, + ) + + is_obj_appearing = object_score_logits > 0 + low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, + torch.tensor(NO_OBJ_SCORE, device=device, dtype=low_res_multimasks.dtype)) + high_res_multimasks = F.interpolate(low_res_multimasks, size=(image_size, image_size), mode="bilinear", align_corners=False) + + sam_output_token = sam_output_tokens[:, 0] + if multimask_output: + best_iou_inds = torch.argmax(ious, dim=-1) + batch_inds = torch.arange(B, device=device) + low_res_masks = low_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1) + high_res_masks = high_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1) + if sam_output_tokens.size(1) > 1: + sam_output_token = sam_output_tokens[batch_inds, best_iou_inds] + else: + low_res_masks, high_res_masks = low_res_multimasks, high_res_multimasks + + obj_ptr = obj_ptr_proj(sam_output_token) + obj_ptr = no_obj_fn(obj_ptr, is_obj_appearing) + + return low_res_masks, high_res_masks, obj_ptr, object_score_logits + + +def use_mask_as_output(backbone_features, high_res_features, mask_inputs, mask_downsample, + prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn, image_size, backbone_stride): + """Shared mask-as-output for both SAM3 and SAM3.1 trackers.""" + out_scale, out_bias = 20.0, -10.0 + mask_inputs_float = cast_to_input(mask_inputs, backbone_features) + high_res_masks = mask_inputs_float * out_scale + out_bias + low_res_masks = F.interpolate(high_res_masks, size=(image_size // backbone_stride * 4,) * 2, + mode="bilinear", align_corners=False, antialias=True) + _, _, obj_ptr, _ = forward_sam_heads( + backbone_features, prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn, + image_size, mask_inputs=mask_downsample(mask_inputs_float), high_res_features=high_res_features, + ) + is_obj_appearing = torch.any(mask_inputs.flatten(1) > 0.0, dim=1)[..., None] + alpha = is_obj_appearing.to(obj_ptr.dtype) + object_score_logits = out_scale * alpha + out_bias + return low_res_masks, high_res_masks, obj_ptr, object_score_logits + + +# Split attention with configurable input dims (for asymmetric cross-attention) +class SplitAttn(nn.Module): + def __init__(self, embed_dim, num_heads=1, kv_dim=None, internal_dim=None, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + kv_dim = kv_dim or embed_dim + internal_dim = internal_dim or embed_dim + self.q_proj = operations.Linear(embed_dim, internal_dim, device=device, dtype=dtype) + self.k_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype) + self.v_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype) + self.out_proj = operations.Linear(internal_dim, embed_dim, device=device, dtype=dtype) + + def forward(self, q, k=None, v=None, rope=None, num_k_exclude_rope=0): + if k is None: + k = q + if v is None: + v = k + q = self.q_proj(q) + k = self.k_proj(k) + v = self.v_proj(v) + if rope is not None: + q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope) + out = optimized_attention(q, k, v, self.num_heads, low_precision_attention=False) + return self.out_proj(out) + + +class MemoryAttnLayer(nn.Module): + def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + self.self_attn = SplitAttn(d_model, num_heads, device=device, dtype=dtype, operations=operations) + self.cross_attn_image = SplitAttn(d_model, num_heads, kv_dim=kv_dim, device=device, dtype=dtype, operations=operations) + self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype) + self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype) + self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype) + + def forward(self, x, memory, memory_pos=None, rope=None, num_k_exclude_rope=0): + x = x + self.self_attn(self.norm1(x), rope=rope) + mem_k = memory + memory_pos if memory_pos is not None else memory + x = x + self.cross_attn_image(self.norm2(x), mem_k, memory, rope=rope, num_k_exclude_rope=num_k_exclude_rope) + normed = self.norm3(x) + x = x + self.linear2(F.relu(self.linear1(normed))) + return x + + +class MemoryAttnEncoder(nn.Module): + def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, num_layers=4, image_size=1008, patch_size=14, + device=None, dtype=None, operations=None): + super().__init__() + self.layers = nn.ModuleList([ + MemoryAttnLayer(d_model, num_heads, kv_dim, dim_ff, device=device, dtype=dtype, operations=operations) + for _ in range(num_layers) + ]) + self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + hw = image_size // patch_size + self.register_buffer("_rope", rope_2d(hw, hw, d_model // num_heads), persistent=False) + + def forward(self, x, memory, src_pos=None, memory_pos=None, num_k_exclude_rope=0): + if src_pos is not None: + x = x + 0.1 * src_pos + + rope = self._rope.to(device=x.device) + for layer in self.layers: + x = layer(x, memory, memory_pos=memory_pos, rope=rope, num_k_exclude_rope=num_k_exclude_rope) + return self.norm(x) + + +class MemoryTransformer(nn.Module): + def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, num_layers=4, device=None, dtype=None, operations=None): + super().__init__() + self.encoder = MemoryAttnEncoder(d_model, num_heads, kv_dim, dim_ff, num_layers, device=device, dtype=dtype, operations=operations) + + +def _upscale_masks(output_upscaling, conv_s0, conv_s1, src_out, high_res_features): + """Shared upscaling for SAM mask decoders: deconv + high-res feature integration.""" + dc1, ln1, act1, dc2, act2 = output_upscaling + if high_res_features is not None: + upscaled = act1(ln1(dc1(src_out) + conv_s1(high_res_features[1]))) + upscaled = act2(dc2(upscaled) + conv_s0(high_res_features[0])) + else: + upscaled = act2(dc2(act1(ln1(dc1(src_out))))) + return upscaled + + +class SAMMaskDecoder(nn.Module): + def __init__(self, d_model=256, num_multimask_outputs=3, device=None, dtype=None, operations=None): + super().__init__() + self.num_mask_tokens = num_multimask_outputs + 1 + + self.transformer = SAMTwoWayTransformer(depth=2, embedding_dim=d_model, num_heads=8, mlp_dim=2048, device=device, dtype=dtype, operations=operations) + + self.iou_token = operations.Embedding(1, d_model, device=device, dtype=dtype) + self.mask_tokens = operations.Embedding(self.num_mask_tokens, d_model, device=device, dtype=dtype) + self.obj_score_token = operations.Embedding(1, d_model, device=device, dtype=dtype) + + # Output upscaling: d_model -> d_model//4 -> d_model//8 at 4x resolution + LN2d = LayerNorm2d_op(operations) + self.output_upscaling = nn.Sequential( + operations.ConvTranspose2d(d_model, d_model // 4, kernel_size=2, stride=2, device=device, dtype=dtype), LN2d(d_model // 4, device=device, dtype=dtype), nn.GELU(), + operations.ConvTranspose2d(d_model // 4, d_model // 8, kernel_size=2, stride=2, device=device, dtype=dtype), nn.GELU(), + ) + + # High-res feature integration + self.conv_s0 = operations.Conv2d(d_model, d_model // 8, kernel_size=1, device=device, dtype=dtype) + self.conv_s1 = operations.Conv2d(d_model, d_model // 4, kernel_size=1, device=device, dtype=dtype) + + # Per-mask hypernetwork MLPs + self.output_hypernetworks_mlps = nn.ModuleList([ + MLP(d_model, d_model, d_model // 8, 3, device=device, dtype=dtype, operations=operations) + for _ in range(self.num_mask_tokens) + ]) + + self.iou_prediction_head = MLP(d_model, d_model, self.num_mask_tokens, 3, device=device, dtype=dtype, operations=operations) + self.pred_obj_score_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations) + + def forward(self, image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, + high_res_features=None, multimask_output=False, return_all=False): + B = sparse_prompt_embeddings.shape[0] + ref = sparse_prompt_embeddings + # Token order: [obj_score(1), iou(1), mask(num_mask_tokens)] + tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref), + cast_to_input(self.iou_token.weight, ref), + cast_to_input(self.mask_tokens.weight, ref)], dim=0) + tokens = torch.cat([tokens.unsqueeze(0).expand(B, -1, -1), sparse_prompt_embeddings], dim=1) + + src = image_embeddings + if src.shape[0] != B: + src = src.expand(B, -1, -1, -1) + src = src + dense_prompt_embeddings + pos_src = image_pe.expand(B, -1, -1, -1) + + b, c, h, w = src.shape + src_flat = src.flatten(2).permute(0, 2, 1) + pos_flat = pos_src.flatten(2).permute(0, 2, 1) + + hs, src_out = self.transformer(src_flat, pos_flat, tokens) + + obj_score_token_out = hs[:, 0, :] + iou_token_out = hs[:, 1, :] + mask_tokens_out = hs[:, 2:2 + self.num_mask_tokens, :] + + src_out = src_out.permute(0, 2, 1).view(b, c, h, w) + upscaled = _upscale_masks(self.output_upscaling, self.conv_s0, self.conv_s1, src_out, high_res_features) + + hyper_in = torch.stack([ + mlp(mask_tokens_out[:, i, :]) for i, mlp in enumerate(self.output_hypernetworks_mlps) + ], dim=1) + + masks = (hyper_in @ upscaled.flatten(2)).view(B, self.num_mask_tokens, upscaled.shape[2], upscaled.shape[3]) + iou_pred = self.iou_prediction_head(iou_token_out) + object_score_logits = self.pred_obj_score_head(obj_score_token_out) + + if multimask_output: + out_masks = masks[:, 1:] + out_iou = iou_pred[:, 1:] + out_tokens = mask_tokens_out[:, 1:] + else: + out_masks = masks[:, 0:1] + out_iou = iou_pred[:, 0:1] + out_tokens = mask_tokens_out[:, 0:1] + + if return_all: + return out_masks, out_iou, out_tokens, object_score_logits + return out_masks, out_iou + + +class SAMPromptEncoder(nn.Module): + def __init__(self, d_model=256, image_embedding_size=(72, 72), input_image_size=(1008, 1008), device=None, dtype=None, operations=None): + super().__init__() + self.embed_dim = d_model + self.image_embedding_size = image_embedding_size + self.input_image_size = input_image_size + + self.pe_layer = PositionEmbeddingRandom(d_model // 2) + self.point_embeddings = nn.ModuleList([ + operations.Embedding(1, d_model, device=device, dtype=dtype) for _ in range(4) + ]) + self.not_a_point_embed = operations.Embedding(1, d_model, device=device, dtype=dtype) + + LN2d = LayerNorm2d_op(operations) + self.mask_downscaling = nn.Sequential( + operations.Conv2d(1, 4, kernel_size=2, stride=2, device=device, dtype=dtype), + LN2d(4, device=device, dtype=dtype), nn.GELU(), + operations.Conv2d(4, 16, kernel_size=2, stride=2, device=device, dtype=dtype), + LN2d(16, device=device, dtype=dtype), nn.GELU(), + operations.Conv2d(16, d_model, kernel_size=1, device=device, dtype=dtype), + ) + self.no_mask_embed = operations.Embedding(1, d_model, device=device, dtype=dtype) + + def get_dense_pe(self): + return self.pe_layer(self.image_embedding_size) + + def forward(self, points=None, boxes=None, masks=None): + ref = points[0] if points is not None else boxes if boxes is not None else masks + B = 1 + sparse = torch.empty((B, 0, self.embed_dim), device=ref.device, dtype=ref.dtype) + + if points is not None: + coords, labels = points + B = coords.shape[0] + # Pad with an extra point (label=-1) when no boxes are provided (matching reference) + if boxes is None: + coords = torch.cat([coords, torch.zeros(B, 1, 2, device=coords.device, dtype=coords.dtype)], dim=1) + labels = torch.cat([labels, -torch.ones(B, 1, device=labels.device, dtype=labels.dtype)], dim=1) + pe = self.pe_layer.forward_with_coords(coords + 0.5, self.input_image_size) + for i in range(4): + pe[labels == i] += cast_to_input(self.point_embeddings[i].weight, ref) + invalid = (labels == -1) + pe[invalid] = 0.0 + pe[invalid] += cast_to_input(self.not_a_point_embed.weight, ref) + sparse = torch.cat([sparse.expand(B, -1, -1), pe], dim=1) + + if boxes is not None: + B = boxes.shape[0] + corners = self.pe_layer.forward_with_coords((boxes.reshape(-1, 2, 2) + 0.5), self.input_image_size) + corners[:, 0] += cast_to_input(self.point_embeddings[2].weight, ref) + corners[:, 1] += cast_to_input(self.point_embeddings[3].weight, ref) + sparse = torch.cat([sparse.expand(B, -1, -1), corners], dim=1) + + if masks is not None: + dense = self.mask_downscaling(masks) + else: + dense = cast_to_input(self.no_mask_embed.weight, ref).reshape(1, -1, 1, 1).expand( + B, -1, self.image_embedding_size[0], self.image_embedding_size[1]) + + return sparse, dense + + +class CXBlock(nn.Module): + def __init__(self, dim=256, kernel_size=7, device=None, dtype=None, operations=None): + super().__init__() + self.dwconv = operations.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim, device=device, dtype=dtype) + self.norm = operations.LayerNorm(dim, device=device, dtype=dtype) + self.pwconv1 = operations.Linear(dim, 4 * dim, device=device, dtype=dtype) + self.pwconv2 = operations.Linear(4 * dim, dim, device=device, dtype=dtype) + self.gamma = nn.Parameter(torch.ones(dim, device=device, dtype=dtype)) + + def forward(self, x): + residual = x + x = self.dwconv(x).permute(0, 2, 3, 1) + x = self.pwconv2(F.gelu(self.pwconv1(self.norm(x)))) + x.mul_(cast_to_input(self.gamma, x)) + return residual + x.permute(0, 3, 1, 2) + + +class MaskDownSampler(nn.Module): + def __init__(self, out_dim=256, in_chans=1, channels=None, interpol_size=(1152, 1152), device=None, dtype=None, operations=None): + super().__init__() + self.interpol_size = list(interpol_size) if interpol_size else None + if channels is None: + channels = [4, 16, 64, out_dim] # SAM3 default + LN2d = LayerNorm2d_op(operations) + layers = [] + prev = in_chans + for ch in channels: + layers += [operations.Conv2d(prev, ch, kernel_size=3, stride=2, padding=1, device=device, dtype=dtype), + LN2d(ch, device=device, dtype=dtype), nn.GELU()] + prev = ch + layers.append(operations.Conv2d(prev, out_dim, kernel_size=1, device=device, dtype=dtype)) + self.encoder = nn.Sequential(*layers) + + def forward(self, x): + if self.interpol_size is not None and list(x.shape[-2:]) != self.interpol_size: + x = F.interpolate(x, size=self.interpol_size, mode="bilinear", align_corners=False, antialias=True) + return self.encoder(x) + + +class Fuser(nn.Module): + def __init__(self, dim=256, num_layers=2, device=None, dtype=None, operations=None): + super().__init__() + self.layers = nn.Sequential(*[CXBlock(dim, device=device, dtype=dtype, operations=operations) for _ in range(num_layers)]) + + def forward(self, x): + return self.layers(x) + + +# --- SAM3.1 Multiplex components --- + +class DecoupledMemoryAttnLayer(nn.Module): + """Decoupled cross-attention layer for SAM3.1: fuses image and memory projections.""" + + def __init__(self, d_model=256, num_heads=1, dim_ff=2048, device=None, dtype=None, operations=None): + super().__init__() + self.num_heads = num_heads + # Self-attention projections (flat, not nested) + self.self_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.self_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.self_attn_v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.self_attn_out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + # Cross-attention projections + self.cross_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.cross_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.cross_attn_v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.cross_attn_out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + # Image cross-attention (q/k only, fused with cross_attn) + self.image_cross_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.image_cross_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype) + # FFN + self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype) + self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype) + self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype) + self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype) + + def forward(self, image, x, memory_image, memory, memory_image_pos=None, + rope=None, num_k_exclude_rope=0): + # Self-attention with RoPE + normed = self.norm1(x) + q = self.self_attn_q_proj(normed) + k = self.self_attn_k_proj(normed) + v = self.self_attn_v_proj(normed) + if rope is not None: + q, k = apply_rope_memory(q, k, rope, self.num_heads, 0) + x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False)) + + # Decoupled cross-attention: fuse image and memory projections + normed = self.norm2(x) + q = self.image_cross_attn_q_proj(image) + self.cross_attn_q_proj(normed) + k = self.image_cross_attn_k_proj(memory_image) + self.cross_attn_k_proj(memory) + if memory_image_pos is not None: + k = k + memory_image_pos + v = self.cross_attn_v_proj(memory) + if rope is not None: + q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope) + x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False)) + + # FFN + x = x + self.linear2(F.gelu(self.linear1(self.norm3(x)))) + return image, x + + +class DecoupledMemoryEncoder(nn.Module): + """Memory attention encoder for SAM3.1 with decoupled cross-attention.""" + + def __init__(self, d_model=256, num_heads=1, dim_ff=2048, num_layers=4, image_size=1008, patch_size=14, + device=None, dtype=None, operations=None): + super().__init__() + self.layers = nn.ModuleList([ + DecoupledMemoryAttnLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations) + for _ in range(num_layers) + ]) + self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype) + hw = image_size // patch_size + self.register_buffer("_rope", rope_2d(hw, hw, d_model // num_heads), persistent=False) + + def forward(self, x, memory, memory_pos=None, src_pos=None, num_k_exclude_rope=0, + memory_image=None, memory_image_pos=None): + image = x # constant residual for decoupled cross-attention + output = x + if src_pos is not None: + output = output + 0.1 * src_pos + + B, _, C = x.shape + rope = self._rope.to(device=x.device) + + # memory_image: raw backbone features from past frames for decoupled cross-attention + if memory_image is None: + # Fallback: use spatial portion of memory (without obj pointers) + num_spatial = memory.shape[1] - num_k_exclude_rope + memory_image = memory[:, :num_spatial] + memory_image_pos = memory_pos[:, :num_spatial] if memory_pos is not None else None + # Pad memory_image to match memory length (zeros for obj pointer tokens) + if memory_image.shape[1] < memory.shape[1]: + pad_len = memory.shape[1] - memory_image.shape[1] + pad = torch.zeros(B, pad_len, C, device=memory.device, dtype=memory.dtype) + memory_image = torch.cat([memory_image, pad], dim=1) + if memory_image_pos is not None: + ptr_pos = memory_pos[:, -pad_len:] if memory_pos is not None else torch.zeros_like(pad) + memory_image_pos = torch.cat([memory_image_pos, ptr_pos], dim=1) + + for layer in self.layers: + image, output = layer(image, output, memory_image, memory, + memory_image_pos=memory_image_pos, rope=rope, + num_k_exclude_rope=num_k_exclude_rope) + + return self.norm(output) + + +class DecoupledMemoryTransformer(nn.Module): + def __init__(self, d_model=256, num_heads=1, dim_ff=2048, num_layers=4, device=None, dtype=None, operations=None): + super().__init__() + self.encoder = DecoupledMemoryEncoder(d_model, num_heads, dim_ff, num_layers, + device=device, dtype=dtype, operations=operations) + + +class MemoryBackbone(nn.Module): + """Memory encoder: downsamples mask, fuses with pixel features, optionally compresses.""" + + def __init__(self, d_model=256, out_dim=None, in_chans=1, channels=None, device=None, dtype=None, operations=None): + super().__init__() + self.mask_downsampler = MaskDownSampler(d_model, in_chans=in_chans, channels=channels, device=device, dtype=dtype, operations=operations) + self.pix_feat_proj = operations.Conv2d(d_model, d_model, kernel_size=1, device=device, dtype=dtype) + self.fuser = Fuser(d_model, num_layers=2, device=device, dtype=dtype, operations=operations) + self.has_out_proj = out_dim is not None and out_dim != d_model + if self.has_out_proj: + self.out_proj = operations.Conv2d(d_model, out_dim, kernel_size=1, device=device, dtype=dtype) + feat_dim = out_dim + else: + feat_dim = d_model + self.position_encoding = PositionEmbeddingSine(num_pos_feats=feat_dim, normalize=True) + + def forward(self, image_features, mask_for_mem, skip_mask_sigmoid=False): + if not skip_mask_sigmoid: + mask_for_mem = mask_for_mem.sigmoid() + mask_features = self.mask_downsampler(cast_to_input(mask_for_mem, image_features)) + if mask_features.shape[-2:] != image_features.shape[-2:]: + mask_features = F.interpolate(mask_features, size=image_features.shape[-2:], mode="bilinear", align_corners=False) + features = self.pix_feat_proj(image_features) + mask_features + features = self.fuser(features) + if self.has_out_proj: + features = self.out_proj(features) + pos = cast_to_input(self.position_encoding(features), features) + return {"vision_features": features, "vision_pos_enc": [pos]} + + +class MultiplexMaskDecoder(nn.Module): + """SAM mask decoder for SAM3.1 multiplex: predicts masks for num_multiplex objects simultaneously. + + Uses multimask_outputs_only=True: num_mask_output_per_object = num_multimask_outputs (no +1). + Hypernetwork MLPs are shared across multiplex objects. + Token order: [obj_score_token(M), iou_token(M), mask_tokens(M*T)]. + """ + + def __init__(self, d_model=256, num_multiplex=16, num_multimask_outputs=3, device=None, dtype=None, operations=None): + super().__init__() + self.num_multiplex = num_multiplex + self.num_mask_output_per_object = num_multimask_outputs # 3 (multimask_outputs_only) + total_mask_tokens = num_multiplex * self.num_mask_output_per_object # 48 + + self.transformer = SAMTwoWayTransformer(depth=2, embedding_dim=d_model, num_heads=8, mlp_dim=2048, device=device, dtype=dtype, operations=operations) + + self.obj_score_token = operations.Embedding(num_multiplex, d_model, device=device, dtype=dtype) + self.iou_token = operations.Embedding(num_multiplex, d_model, device=device, dtype=dtype) + self.mask_tokens = operations.Embedding(total_mask_tokens, d_model, device=device, dtype=dtype) + + LN2d = LayerNorm2d_op(operations) + self.output_upscaling = nn.Sequential( + operations.ConvTranspose2d(d_model, d_model // 4, kernel_size=2, stride=2, device=device, dtype=dtype), + LN2d(d_model // 4, device=device, dtype=dtype), nn.GELU(), + operations.ConvTranspose2d(d_model // 4, d_model // 8, kernel_size=2, stride=2, device=device, dtype=dtype), nn.GELU(), + ) + self.conv_s0 = operations.Conv2d(d_model, d_model // 8, kernel_size=1, device=device, dtype=dtype) + self.conv_s1 = operations.Conv2d(d_model, d_model // 4, kernel_size=1, device=device, dtype=dtype) + + # Shared across all multiplex objects (one per mask output) + self.output_hypernetworks_mlps = nn.ModuleList([ + MLP(d_model, d_model, d_model // 8, 3, device=device, dtype=dtype, operations=operations) + for _ in range(self.num_mask_output_per_object) + ]) + self.iou_prediction_head = MLP(d_model, d_model, self.num_mask_output_per_object, 3, device=device, dtype=dtype, operations=operations) + self.pred_obj_score_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations) + + def forward(self, image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, + high_res_features=None, multimask_output=False, return_all=False, extra_per_object_embeddings=None): + B = sparse_prompt_embeddings.shape[0] + M = self.num_multiplex + T = self.num_mask_output_per_object + + # Token order: [obj_score(M), iou(M), mask(M*T)] + ref = sparse_prompt_embeddings + mask_tokens = cast_to_input(self.mask_tokens.weight, ref) + if extra_per_object_embeddings is not None: + mask_tokens = mask_tokens.view(1, M, T, -1).expand(B, -1, -1, -1) + extra_per_object_embeddings.unsqueeze(2) + mask_tokens = mask_tokens.flatten(1, 2) # [B, M*T, C] + other_tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref), + cast_to_input(self.iou_token.weight, ref)], dim=0).unsqueeze(0).expand(B, -1, -1) + tokens = torch.cat([other_tokens, mask_tokens, sparse_prompt_embeddings], dim=1) + else: + tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref), + cast_to_input(self.iou_token.weight, ref), mask_tokens], dim=0) + tokens = torch.cat([tokens.unsqueeze(0).expand(B, -1, -1), sparse_prompt_embeddings], dim=1) + + src = image_embeddings + if src.shape[0] != B: + src = src.expand(B, -1, -1, -1) + src = src + dense_prompt_embeddings + pos_src = image_pe.expand(B, -1, -1, -1) + + b, c, h, w = src.shape + hs, src_out = self.transformer(src.flatten(2).permute(0, 2, 1), pos_src.flatten(2).permute(0, 2, 1), tokens) + + # Parse output tokens + obj_score_token_out = hs[:, :M] + iou_token_out = hs[:, M:2 * M] + mask_tokens_out = hs[:, 2 * M:2 * M + M * T] + + src_out = src_out.permute(0, 2, 1).view(b, c, h, w) + upscaled = _upscale_masks(self.output_upscaling, self.conv_s0, self.conv_s1, src_out, high_res_features) + + # Reshape mask tokens to [B, M, T, C] and apply shared hypernetwork MLPs per mask output index + mask_tokens_2d = mask_tokens_out.view(B, M, T, -1) + hyper_in = torch.stack([ + self.output_hypernetworks_mlps[i](mask_tokens_2d[:, :, i, :]) # [B, M, C//8] + for i in range(T) + ], dim=2) # [B, M, T, C//8] + + # Generate masks: [B, M*T, H*W] -> [B, M, T, H, W] + masks = torch.bmm(hyper_in.flatten(1, 2), upscaled.flatten(2)).view(b, M, T, upscaled.shape[2], upscaled.shape[3]) + + # IoU and object scores + iou_pred = self.iou_prediction_head(iou_token_out).view(b, M, T) + object_score_logits = self.pred_obj_score_head(obj_score_token_out) # [B, M, 1] + + # multimask_outputs_only: always output all T masks (no singlemask token) + sam_tokens_out = mask_tokens_2d[:, :, 0:1] # [B, M, 1, C] + + if return_all: + return masks, iou_pred, sam_tokens_out, object_score_logits + return masks, iou_pred + + +class SAM3Tracker(nn.Module): + def __init__(self, d_model=256, mem_dim=64, num_maskmem=7, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + + # Memory attention transformer + self.transformer = MemoryTransformer(d_model, num_heads=1, kv_dim=mem_dim, dim_ff=2048, num_layers=4, + device=device, dtype=dtype, operations=operations) + # SAM components + self.sam_mask_decoder = SAMMaskDecoder(d_model, device=device, dtype=dtype, operations=operations) + self.sam_prompt_encoder = SAMPromptEncoder(d_model, device=device, dtype=dtype, operations=operations) + + # Memory backbone + self.maskmem_backbone = MemoryBackbone(d_model, out_dim=mem_dim, device=device, dtype=dtype, operations=operations) + + # Standalone parameters + self.maskmem_tpos_enc = nn.Parameter(torch.zeros(num_maskmem, 1, 1, mem_dim, device=device, dtype=dtype)) + self.no_mem_embed = nn.Parameter(torch.zeros(1, 1, d_model, device=device, dtype=dtype)) + self.register_buffer("no_mem_pos_enc", torch.zeros(1, 1, d_model, device=device, dtype=dtype)) # checkpoint key, unused in forward + self.no_obj_embed_spatial = nn.Parameter(torch.zeros(1, mem_dim, device=device, dtype=dtype)) + self.no_obj_ptr = nn.Parameter(torch.zeros(1, d_model, device=device, dtype=dtype)) + + # Object pointer projection + self.obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations) + self.obj_ptr_tpos_proj = operations.Linear(d_model, mem_dim, device=device, dtype=dtype) + + # Mask downsample: Conv2d stride 4 to reduce GT mask to SAM logit scale + self.mask_downsample = operations.Conv2d(1, 1, kernel_size=4, stride=4, device=device, dtype=dtype) + + # Config + self.d_model = d_model + self.mem_dim = mem_dim + self.num_maskmem = num_maskmem + self.image_size = 1008 + self.backbone_stride = 14 + self.max_obj_ptrs_in_encoder = 16 + self.sigmoid_scale_for_mem_enc = 20.0 + self.sigmoid_bias_for_mem_enc = -10.0 + + def _no_obj_blend(self, obj_ptr, is_obj): + alpha = is_obj.to(obj_ptr.dtype) + return torch.lerp(cast_to_input(self.no_obj_ptr, obj_ptr), obj_ptr, alpha) + + def _forward_sam_heads(self, backbone_features, point_inputs=None, mask_inputs=None, box_inputs=None, + high_res_features=None, multimask_output=False): + return forward_sam_heads(backbone_features, self.sam_prompt_encoder, self.sam_mask_decoder, + self.obj_ptr_proj, self._no_obj_blend, self.image_size, + point_inputs, mask_inputs, box_inputs, high_res_features, multimask_output) + + def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs): + return use_mask_as_output(backbone_features, high_res_features, mask_inputs, + self.mask_downsample, self.sam_prompt_encoder, self.sam_mask_decoder, + self.obj_ptr_proj, self._no_obj_blend, self.image_size, self.backbone_stride) + + def _prepare_memory_conditioned_features(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds, feat_sizes, output_dict, num_frames): + """Fuse current frame features with memory from previous frames.""" + B = current_vision_feats[-1].shape[0] + C = self.d_model + H, W = feat_sizes[-1] + device = current_vision_feats[-1].device + + if self.num_maskmem == 0: + return current_vision_feats[-1].permute(0, 2, 1).view(B, C, H, W) + + if is_init_cond_frame: + # First conditioning frame: no memory yet, add no_mem_embed + pix_feat = current_vision_feats[-1] + cast_to_input(self.no_mem_embed, current_vision_feats[-1]) + return to_spatial(pix_feat, H, W) + + to_cat_memory, to_cat_memory_pos, _, _, cond_outputs = collect_memory_tokens( + output_dict, frame_idx, self.num_maskmem, self.maskmem_tpos_enc, device) + + max_obj_ptrs = min(num_frames, self.max_obj_ptrs_in_encoder) + pos_and_ptrs = [] + for t, out in cond_outputs.items(): + if t <= frame_idx: + pos_and_ptrs.append(((frame_idx - t), out["obj_ptr"].to(device))) + for t_diff in range(1, max_obj_ptrs): + t = frame_idx - t_diff + if t < 0: + break + out = output_dict["non_cond_frame_outputs"].get(t, None) + if out is not None: + pos_and_ptrs.append((t_diff, out["obj_ptr"].to(device))) + + num_obj_ptr_tokens = 0 + if len(pos_and_ptrs) > 0: + pos_list, ptrs_list = zip(*pos_and_ptrs) + obj_ptrs = torch.stack(ptrs_list, dim=1) # [B, N, C=256] + + # Temporal position encoding for pointers + obj_pos = compute_tpos_enc( + list(pos_list), device, self.d_model, self.obj_ptr_tpos_proj, + max_abs_pos=max_obj_ptrs, dtype=current_vision_feats[-1].dtype + ) # [N, mem_dim=64] + obj_pos = obj_pos.unsqueeze(0).expand(B, -1, -1) # [B, N, 64] + + # Split each 256-dim pointer into 4 x 64-dim tokens + if self.mem_dim < C: + N = obj_ptrs.shape[1] + obj_ptrs = obj_ptrs.view(B, N, C // self.mem_dim, self.mem_dim) # [B, N, 4, 64] + obj_ptrs = obj_ptrs.reshape(B, N * (C // self.mem_dim), self.mem_dim) # [B, N*4, 64] + obj_pos = obj_pos.unsqueeze(2).expand(-1, -1, C // self.mem_dim, -1) + obj_pos = obj_pos.reshape(B, N * (C // self.mem_dim), self.mem_dim) # [B, N*4, 64] + + to_cat_memory.append(obj_ptrs) + to_cat_memory_pos.append(obj_pos) + num_obj_ptr_tokens = obj_ptrs.shape[1] + + if len(to_cat_memory) == 0: + # No memory available yet, add no_mem_embed + pix_feat = current_vision_feats[-1] + cast_to_input(self.no_mem_embed, current_vision_feats[-1]) + return to_spatial(pix_feat, H, W) + + # Concatenate all memory and position encodings [B, total_mem, mem_dim=64] + memory = torch.cat(to_cat_memory, dim=1) + memory_pos = torch.cat(to_cat_memory_pos, dim=1) + + # Run memory attention encoder + pix_feat = current_vision_feats[-1] # [B, HW, C] + src_pos = current_vision_pos_embeds[-1] # [B, HW, C] + + pix_feat_with_mem = self.transformer.encoder( + x=pix_feat, + memory=memory, + src_pos=src_pos, + memory_pos=memory_pos, + num_k_exclude_rope=num_obj_ptr_tokens, + ) + return to_spatial(pix_feat_with_mem, H, W) + + def _encode_new_memory(self, pix_feat, pred_masks_high_res, object_score_logits, is_mask_from_pts=False): + """Encode predicted mask into memory features.""" + if is_mask_from_pts: + mask_for_mem = (pred_masks_high_res > 0).to(pix_feat.dtype) + else: + mask_for_mem = torch.sigmoid(pred_masks_high_res) + + mask_for_mem.mul_(self.sigmoid_scale_for_mem_enc).add_(self.sigmoid_bias_for_mem_enc) + + maskmem_out = self.maskmem_backbone(pix_feat, mask_for_mem, skip_mask_sigmoid=True) + maskmem_features = maskmem_out["vision_features"] + maskmem_pos_enc = maskmem_out["vision_pos_enc"] + + # Add no_obj_embed for occluded objects + alpha = (object_score_logits > 0).to(maskmem_features.dtype)[..., None, None] + no_obj = cast_to_input(self.no_obj_embed_spatial, maskmem_features)[..., None, None].expand_as(maskmem_features) + return maskmem_features + (1 - alpha) * no_obj, maskmem_pos_enc + + def track_step(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds, feat_sizes, mask_inputs, output_dict, + num_frames, point_inputs=None): + """Track one frame: fuse with memory, predict mask, encode memory.""" + current_out = {} + + # High-res features for SAM head [stride-8, stride-4] + if len(current_vision_feats) > 1: + high_res_features = [ + x.view(x.shape[0], feat_sizes[i][0], feat_sizes[i][1], -1).permute(0, 3, 1, 2) + for i, x in enumerate(current_vision_feats[:-1]) + ] + else: + high_res_features = None + + # Top-level feature for memory + H, W = feat_sizes[-1] + + if mask_inputs is not None: + # Conditioning frame: use mask directly + pix_feat = to_spatial(current_vision_feats[-1], H, W) + sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs) + else: + # Track frame: fuse with memory, then SAM decoder + pix_feat_with_mem = self._prepare_memory_conditioned_features( + frame_idx=frame_idx, + is_init_cond_frame=is_init_cond_frame, + current_vision_feats=current_vision_feats, + current_vision_pos_embeds=current_vision_pos_embeds, + feat_sizes=feat_sizes, + output_dict=output_dict, + num_frames=num_frames, + ) + # Use multimask for point prompts on init frames (picks best of 3 candidates) + num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1) + multimask_output = is_init_cond_frame and 0 < num_pts <= 1 + sam_outputs = self._forward_sam_heads( + backbone_features=pix_feat_with_mem, + point_inputs=point_inputs, + high_res_features=high_res_features, + multimask_output=multimask_output, + ) + + (low_res_masks, high_res_masks, obj_ptr, object_score_logits) = sam_outputs + + # Clean low-res masks: remove sprinkles and fill holes + low_res_masks = fill_holes_in_mask_scores(low_res_masks, max_area=200) + high_res_masks = F.interpolate(low_res_masks, size=(self.image_size, self.image_size), mode="bilinear", align_corners=False) + + current_out["pred_masks"] = low_res_masks + current_out["pred_masks_high_res"] = high_res_masks + current_out["obj_ptr"] = obj_ptr + current_out["object_score_logits"] = object_score_logits + + # Encode memory + if self.num_maskmem > 0: + pix_feat = to_spatial(current_vision_feats[-1], H, W) + maskmem_features, maskmem_pos_enc = self._encode_new_memory( + pix_feat=pix_feat, + pred_masks_high_res=high_res_masks, + object_score_logits=object_score_logits, + is_mask_from_pts=(point_inputs is not None), + ) + current_out["maskmem_features"] = maskmem_features + current_out["maskmem_pos_enc"] = maskmem_pos_enc + else: + current_out["maskmem_features"] = None + current_out["maskmem_pos_enc"] = None + + return current_out + + def _compute_backbone_frame(self, backbone_fn, frame, frame_idx=None): + vision_feats, vision_pos, feat_sizes, _, _ = _compute_backbone(backbone_fn, frame, frame_idx) + # SAM3: drop last FPN level + return vision_feats[:-1], vision_pos[:-1], feat_sizes[:-1] + + def _track_single_object(self, backbone_fn, images, initial_mask, pbar=None, + target_device=None, target_dtype=None): + """Track one object, computing backbone per frame to save VRAM.""" + N = images.shape[0] + device = target_device if target_device is not None else images.device + dt = target_dtype if target_dtype is not None else images.dtype + size = self.image_size + output_dict = {"cond_frame_outputs": {}, "non_cond_frame_outputs": {}} + all_masks = [] + + for frame_idx in tqdm(range(N), desc="tracking"): + vision_feats, vision_pos, feat_sizes = self._compute_backbone_frame( + backbone_fn, _prep_frame(images, slice(frame_idx, frame_idx + 1), device, dt, size), frame_idx=frame_idx) + mask_input = None + if frame_idx == 0: + mask_input = F.interpolate(initial_mask.to(device=device, dtype=dt), + size=(self.image_size, self.image_size), mode="bilinear", align_corners=False) + mask_input = (mask_input > 0.5).to(dt) + + current_out = self.track_step( + frame_idx=frame_idx, is_init_cond_frame=(frame_idx == 0), + current_vision_feats=vision_feats, current_vision_pos_embeds=vision_pos, + feat_sizes=feat_sizes, mask_inputs=mask_input, output_dict=output_dict, num_frames=N) + + if frame_idx == 0: + output_dict["cond_frame_outputs"][frame_idx] = current_out + else: + output_dict["non_cond_frame_outputs"][frame_idx] = current_out + lookback = max(self.num_maskmem, self.max_obj_ptrs_in_encoder) + for old_idx in list(output_dict["non_cond_frame_outputs"]): + if old_idx < frame_idx - lookback: + del output_dict["non_cond_frame_outputs"][old_idx] + # Move masks to CPU immediately to free VRAM + all_masks.append(current_out["pred_masks_high_res"].to(comfy.model_management.intermediate_device())) + if pbar is not None: + pbar.update(1) + + return torch.cat(all_masks, dim=0) # [N, 1, H, W] + + def track_video(self, backbone_fn, images, initial_masks, pbar=None, + target_device=None, target_dtype=None, **kwargs): + """Track one or more objects across video frames. + + Args: + backbone_fn: callable that returns (sam2_features, sam2_positions, trunk_out) for a frame + images: [N, 3, H, W] CPU full-res video frames (resized per-frame to self.image_size) + initial_masks: [N_obj, 1, H, W] binary masks for first frame (one per object) + pbar: optional progress bar + + Returns: + [N, N_obj, image_size, image_size] predicted mask logits per frame per object + """ + N_obj = initial_masks.shape[0] + per_object = [] + for obj_idx in range(N_obj): + obj_masks = self._track_single_object( + backbone_fn, images, initial_masks[obj_idx:obj_idx + 1], pbar=pbar, + target_device=target_device, target_dtype=target_dtype) + per_object.append(obj_masks) + + return torch.cat(per_object, dim=1) # [N, N_obj, H, W] + + +class SAM31Tracker(nn.Module): + """SAM3.1 multiplex tracker: decoupled memory attention, dual decoder, 16-object multiplex.""" + + def __init__(self, d_model=256, mem_dim=256, num_maskmem=7, num_multiplex=16, device=None, dtype=None, operations=None, **kwargs): + super().__init__() + self.d_model = d_model + self.mem_dim = mem_dim + self.num_maskmem = num_maskmem + self.num_multiplex = num_multiplex + self.image_size = 1008 + self.backbone_stride = 14 + self.max_obj_ptrs_in_encoder = 16 + self.sigmoid_scale_for_mem_enc = 2.0 + self.sigmoid_bias_for_mem_enc = -1.0 + + # Memory attention (decoupled cross-attention, 8 heads matching reference) + self.transformer = DecoupledMemoryTransformer(d_model, num_heads=8, dim_ff=2048, num_layers=4, + device=device, dtype=dtype, operations=operations) + + # Propagation decoder (multiplex: 16 objects, multimask_outputs_only) + self.sam_mask_decoder = MultiplexMaskDecoder(d_model, num_multiplex, num_multimask_outputs=3, + device=device, dtype=dtype, operations=operations) + # Interactive decoder (single object, same as SAM3) + self.interactive_sam_mask_decoder = SAMMaskDecoder(d_model, num_multimask_outputs=3, + device=device, dtype=dtype, operations=operations) + self.interactive_sam_prompt_encoder = SAMPromptEncoder(d_model, device=device, dtype=dtype, operations=operations) + + # Memory backbone (mem_dim=256, no out_proj compression) + self.maskmem_backbone = MemoryBackbone(d_model, in_chans=num_multiplex * 2, channels=[16, 64, 256, 1024], + device=device, dtype=dtype, operations=operations) + + # Standalone parameters + self.maskmem_tpos_enc = nn.Parameter(torch.zeros(num_maskmem, 1, 1, mem_dim, device=device, dtype=dtype)) + self.no_obj_embed_spatial = nn.Parameter(torch.zeros(num_multiplex, mem_dim, device=device, dtype=dtype)) + self.interactivity_no_mem_embed = nn.Parameter(torch.zeros(1, 1, d_model, device=device, dtype=dtype)) + + # Object pointer projection + self.obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations) + self.obj_ptr_tpos_proj = operations.Linear(d_model, mem_dim, device=device, dtype=dtype) + self.no_obj_ptr_linear = operations.Linear(d_model, d_model, device=device, dtype=dtype) + self.interactive_obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations) + + # Interactive mask downsample + self.interactive_mask_downsample = operations.Conv2d(1, 1, kernel_size=4, stride=4, device=device, dtype=dtype) + + # Multiplex validity embeddings + self.output_valid_embed = nn.Parameter(torch.zeros(num_multiplex, d_model, device=device, dtype=dtype)) + self.output_invalid_embed = nn.Parameter(torch.zeros(num_multiplex, d_model, device=device, dtype=dtype)) + + # Position encoding for image (used by multiplex decoder) + self.image_pe_layer = PositionEmbeddingRandom(d_model // 2) + + def _no_obj_blend(self, obj_ptr, is_obj): + alpha = is_obj.to(obj_ptr.dtype) + return torch.lerp(self.no_obj_ptr_linear(obj_ptr), obj_ptr, alpha) + + def _forward_sam_heads(self, backbone_features, point_inputs=None, mask_inputs=None, box_inputs=None, + high_res_features=None, multimask_output=False): + return forward_sam_heads(backbone_features, self.interactive_sam_prompt_encoder, self.interactive_sam_mask_decoder, + self.interactive_obj_ptr_proj, self._no_obj_blend, self.image_size, + point_inputs, mask_inputs, box_inputs, high_res_features, multimask_output) + + def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs): + return use_mask_as_output(backbone_features, high_res_features, mask_inputs, + self.interactive_mask_downsample, self.interactive_sam_prompt_encoder, + self.interactive_sam_mask_decoder, self.interactive_obj_ptr_proj, + self._no_obj_blend, self.image_size, self.backbone_stride) + + def _prepare_memory_conditioned_features(self, frame_idx, is_init_cond_frame, current_vision_feats, + current_vision_pos_embeds, feat_sizes, output_dict, num_frames, + multiplex_state=None): + B = current_vision_feats[-1].shape[0] + C = self.d_model + H, W = feat_sizes[-1] + device = current_vision_feats[-1].device + num_buc = multiplex_state.num_buckets if multiplex_state is not None else None + + if self.num_maskmem == 0: + return current_vision_feats[-1].permute(0, 2, 1).view(B, C, H, W) + + if is_init_cond_frame: + pix_feat = current_vision_feats[-1] + cast_to_input(self.interactivity_no_mem_embed, current_vision_feats[-1]) + return to_spatial(pix_feat, H, W) + + to_cat_memory, to_cat_memory_pos, to_cat_image_feat, to_cat_image_pos, cond_outputs = collect_memory_tokens( + output_dict, frame_idx, self.num_maskmem, self.maskmem_tpos_enc, device, + collect_image_feats=True, tpos_v2=True, num_buckets=num_buc) + + max_obj_ptrs = min(num_frames, self.max_obj_ptrs_in_encoder) + pos_and_ptrs = [] + for t, out in cond_outputs.items(): + if t <= frame_idx and "obj_ptr" in out: + ptr = out["obj_ptr"].to(device) + if num_buc is not None: + ptr = _pad_to_buckets(ptr, num_buc) + pos_and_ptrs.append(((frame_idx - t), ptr)) + for t_diff in range(1, max_obj_ptrs): + t = frame_idx - t_diff + if t < 0: + break + out = output_dict["non_cond_frame_outputs"].get(t, None) + if out is not None and "obj_ptr" in out: + ptr = out["obj_ptr"].to(device) + if num_buc is not None: + ptr = _pad_to_buckets(ptr, num_buc) + pos_and_ptrs.append((t_diff, ptr)) + + num_obj_ptr_tokens = 0 + if len(pos_and_ptrs) > 0: + pos_list, ptrs_list = zip(*pos_and_ptrs) + obj_ptrs = torch.stack(ptrs_list, dim=1) # [num_buckets, N, M, C] + B_ptr = obj_ptrs.shape[0] + N_ptrs = obj_ptrs.shape[1] + M = obj_ptrs.shape[2] + obj_ptrs = obj_ptrs.reshape(B_ptr, N_ptrs * M, -1) + obj_pos = compute_tpos_enc(list(pos_list), device, self.d_model, self.obj_ptr_tpos_proj, + max_abs_pos=max_obj_ptrs, dtype=current_vision_feats[-1].dtype) + obj_pos = obj_pos.unsqueeze(0).expand(B_ptr, -1, -1) + obj_pos = obj_pos.unsqueeze(2).expand(-1, -1, M, -1).reshape(B_ptr, N_ptrs * M, -1) + to_cat_memory.append(obj_ptrs) + to_cat_memory_pos.append(obj_pos) + num_obj_ptr_tokens = obj_ptrs.shape[1] + + if len(to_cat_memory) == 0: + pix_feat = current_vision_feats[-1] + cast_to_input(self.interactivity_no_mem_embed, current_vision_feats[-1]) + return to_spatial(pix_feat, H, W) + + memory = torch.cat(to_cat_memory, dim=1) + memory_pos = torch.cat(to_cat_memory_pos, dim=1) + + # Expand vision features to num_buckets if memory has more buckets than B + mem_B = memory.shape[0] + x = current_vision_feats[-1] + x_pos = current_vision_pos_embeds[-1] + if x.shape[0] < mem_B: + x = x.expand(mem_B, -1, -1) + x_pos = x_pos.expand(mem_B, -1, -1) + + if len(to_cat_image_feat) > 0: + # Decoupled cross-attention: separate image features from memory + memory_image = cast_to_input(torch.cat(to_cat_image_feat, dim=1), x) + memory_image_pos = cast_to_input(torch.cat(to_cat_image_pos, dim=1), x) + if memory_image.shape[0] < mem_B: + memory_image = memory_image.expand(mem_B, -1, -1) + memory_image_pos = memory_image_pos.expand(mem_B, -1, -1) + pix_feat_with_mem = self.transformer.encoder( + x=x, + memory=cast_to_input(memory, x), + memory_pos=cast_to_input(memory_pos, x), + src_pos=cast_to_input(x_pos, x), + num_k_exclude_rope=num_obj_ptr_tokens, + memory_image=memory_image, + memory_image_pos=memory_image_pos, + ) + else: + pix_feat_with_mem = self.transformer.encoder( + x=x, + memory=memory, + memory_pos=memory_pos, + src_pos=x_pos, + num_k_exclude_rope=num_obj_ptr_tokens, + ) + return to_spatial(pix_feat_with_mem, H, W) + + def _encode_new_memory(self, pix_feat, pred_masks_high_res, object_score_logits, is_mask_from_pts=False, + multiplex_state=None, is_conditioning=False, cond_obj_mask=None): + if is_mask_from_pts: + mask_for_mem = (pred_masks_high_res > 0).to(pix_feat.dtype) + else: + mask_for_mem = torch.sigmoid(pred_masks_high_res) + mask_for_mem.mul_(self.sigmoid_scale_for_mem_enc).add_(self.sigmoid_bias_for_mem_enc) + + # Mux masks: [N_obj, 1, H, W] -> [num_buckets, M, H, W] + mux_masks = multiplex_state.mux(mask_for_mem[:, 0]) + + # Conditioning channel: 1.0 = clean mask (trust it), 0.0 = propagation (noisy) + N_obj = mask_for_mem.shape[0] + cond_values = torch.full((N_obj,), 0.0, device=mask_for_mem.device, dtype=mask_for_mem.dtype) + if is_conditioning: + cond_values[:] = 1.0 + elif cond_obj_mask is not None: + cond_values[cond_obj_mask] = 1.0 + cond_spatial = cond_values.view(-1, 1, 1, 1).expand_as(mask_for_mem[:, 0:1, :, :]).squeeze(1) + mux_cond = multiplex_state.mux(cond_spatial) # [num_buckets, M, H, W] + mux_input = torch.cat([mux_masks, mux_cond], dim=1) # [num_buckets, 2*M, H, W] + + maskmem_out = self.maskmem_backbone(pix_feat, mux_input, skip_mask_sigmoid=True) + maskmem_features = maskmem_out["vision_features"] + maskmem_pos_enc = maskmem_out["vision_pos_enc"] + + # Add no_obj_embed_spatial for occluded objects + is_obj = (object_score_logits > 0).float() # [N_obj, 1] + mux_is_obj = multiplex_state.mux(is_obj) # [num_buckets, M, 1] + no_obj_embed = cast_to_input(self.no_obj_embed_spatial, maskmem_features) # [M, C] + no_obj_spatial = no_obj_embed.unsqueeze(0)[..., None, None] # [1, M, C, 1, 1] + # Expand and sum across multiplex slots weighted by (1 - is_obj) + alpha = mux_is_obj[..., None, None] # [num_buckets, M, 1, 1, 1] + per_slot_no_obj = ((1 - alpha) * no_obj_spatial).sum(dim=1) # [num_buckets, C, 1, 1] + maskmem_features = maskmem_features + per_slot_no_obj.expand_as(maskmem_features) + + return maskmem_features, maskmem_pos_enc + + def _forward_propagation(self, backbone_features, high_res_features=None, multiplex_state=None): + """Propagation path using the multiplex SAM decoder (no prompts).""" + B = backbone_features.shape[0] + device = backbone_features.device + + # Suppression embeddings from valid object mask + valid_mask = cast_to_input(multiplex_state.get_valid_object_mask().unsqueeze(-1).float(), backbone_features) + output_valid = cast_to_input(self.output_valid_embed, backbone_features).unsqueeze(0) + output_invalid = cast_to_input(self.output_invalid_embed, backbone_features).unsqueeze(0) + extra_embed = valid_mask * output_valid + (1 - valid_mask) * output_invalid + + image_pe = self.image_pe_layer((backbone_features.shape[-2], backbone_features.shape[-1]), device=backbone_features.device) + image_pe = cast_to_input(image_pe, backbone_features) + + masks, iou_pred, sam_tokens_out, object_score_logits = self.sam_mask_decoder( + image_embeddings=backbone_features, image_pe=image_pe, + sparse_prompt_embeddings=torch.empty(B, 0, self.d_model, device=device, dtype=backbone_features.dtype), + dense_prompt_embeddings=torch.zeros(B, self.d_model, *backbone_features.shape[-2:], device=device, dtype=backbone_features.dtype), + high_res_features=high_res_features, multimask_output=True, return_all=True, + extra_per_object_embeddings=extra_embed.expand(B, -1, -1), + ) + # masks: [B=num_buckets, M, T, H, W] + # Demux to per-object: [N_obj, T, H, W] + masks_obj = multiplex_state.demux(masks) + iou_obj = multiplex_state.demux(iou_pred) + score_obj = multiplex_state.demux(object_score_logits) + tokens_obj = multiplex_state.demux(sam_tokens_out) + + # Select best mask by IoU for each object + best_idx = torch.argmax(iou_obj, dim=-1) # [N_obj] + N_obj = masks_obj.shape[0] + obj_range = torch.arange(N_obj, device=device) + low_res_masks = masks_obj[obj_range, best_idx].unsqueeze(1) # [N_obj, 1, H, W] + # Suppress masks for objects with low confidence + is_obj = score_obj > 0 + low_res_masks = torch.where(is_obj[:, :, None, None], low_res_masks, + torch.tensor(NO_OBJ_SCORE, device=device, dtype=low_res_masks.dtype)) + high_res_masks = F.interpolate(low_res_masks.float(), size=(self.image_size, self.image_size), mode="bilinear", align_corners=False) + + # Object pointer: compute per-object, mux for storage as [num_buckets, M, C] + sam_token = tokens_obj[:, 0] # [N_obj, C] + obj_ptr = self.obj_ptr_proj(sam_token) + is_obj = (score_obj > 0).float() + no_obj = self.no_obj_ptr_linear(obj_ptr) + obj_ptr = is_obj * obj_ptr + (1 - is_obj) * no_obj + obj_ptr_muxed = multiplex_state.mux(obj_ptr) # [num_buckets, M, C] + + return low_res_masks, high_res_masks, obj_ptr_muxed, score_obj + + def track_step(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds, + feat_sizes, mask_inputs, output_dict, num_frames, point_inputs=None, + interactive_high_res=None, interactive_backbone=None, propagation_high_res=None, + multiplex_state=None, run_mem_encoder=True): + current_out = {} + H, W = feat_sizes[-1] + + if mask_inputs is not None: + # Conditioning frame: use interactive features if available, else propagation + if interactive_backbone is not None: + pix_feat = interactive_backbone + # Add no_mem_embed for interactive path + pix_flat = pix_feat.flatten(2) + bf = pix_flat.permute(0, 2, 1) + cast_to_input(self.interactivity_no_mem_embed, pix_flat) + pix_feat = to_spatial(bf, H, W) + hi_res = interactive_high_res + else: + # Fallback: interactive backbone not available (e.g. called outside track_video). + # Propagation features work but may produce lower-quality conditioning. + pix_feat = to_spatial(current_vision_feats[-1], H, W) + hi_res = propagation_high_res + sam_outputs = self._use_mask_as_output(pix_feat, hi_res, mask_inputs) + elif point_inputs is not None: + # Interactive path: use interactive SAM decoder + pix_feat_with_mem = self._prepare_memory_conditioned_features( + frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame, + current_vision_feats=current_vision_feats, current_vision_pos_embeds=current_vision_pos_embeds, + feat_sizes=feat_sizes, output_dict=output_dict, num_frames=num_frames, + multiplex_state=multiplex_state, + ) + hi_res = interactive_high_res if interactive_high_res is not None else propagation_high_res + num_pts = point_inputs["point_labels"].size(1) + multimask_output = is_init_cond_frame and 0 < num_pts <= 1 + sam_outputs = self._forward_sam_heads( + backbone_features=pix_feat_with_mem, point_inputs=point_inputs, + high_res_features=hi_res, multimask_output=multimask_output, + ) + else: + # Propagation path: use multiplex SAM decoder with propagation features + pix_feat_with_mem = self._prepare_memory_conditioned_features( + frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame, + current_vision_feats=current_vision_feats, current_vision_pos_embeds=current_vision_pos_embeds, + feat_sizes=feat_sizes, output_dict=output_dict, num_frames=num_frames, + multiplex_state=multiplex_state, + ) + sam_outputs = self._forward_propagation(pix_feat_with_mem, propagation_high_res, + multiplex_state=multiplex_state) + + (low_res_masks, high_res_masks, obj_ptr, object_score_logits) = sam_outputs + + # Mux obj_ptr if it came from interactive path (shape [B, C]) vs propagation ([num_buckets, M, C]) + if multiplex_state is not None and obj_ptr.dim() == 2: + obj_ptr = multiplex_state.mux(obj_ptr) # [N_obj, C] -> [num_buckets, M, C] + + # Encode memory (can be deferred with run_mem_encoder=False) + if run_mem_encoder and self.num_maskmem > 0: + pix_feat = to_spatial(current_vision_feats[-1], H, W) + maskmem_features, maskmem_pos_enc = self._encode_new_memory( + pix_feat=pix_feat, pred_masks_high_res=high_res_masks, + object_score_logits=object_score_logits, + is_mask_from_pts=(point_inputs is not None), + multiplex_state=multiplex_state, + is_conditioning=(mask_inputs is not None), + ) + current_out["maskmem_features"] = maskmem_features + current_out["maskmem_pos_enc"] = maskmem_pos_enc + else: + current_out["maskmem_features"] = None + current_out["maskmem_pos_enc"] = None + + # Store propagation image features for decoupled memory attention + current_out["image_features"] = current_vision_feats[-1] # [B, HW, C] + current_out["image_pos_enc"] = current_vision_pos_embeds[-1] # [B, HW, C] + + current_out["pred_masks"] = low_res_masks + current_out["pred_masks_high_res"] = high_res_masks + current_out["obj_ptr"] = obj_ptr + current_out["object_score_logits"] = object_score_logits + + return current_out + + def _compute_backbone_frame(self, backbone_fn, frame, frame_idx=None): + vision_feats, vision_pos, feat_sizes, features, trunk_out = _compute_backbone(backbone_fn, frame, frame_idx) + return vision_feats, vision_pos, feat_sizes, list(features[:-1]), trunk_out + + @staticmethod + def _suppress_recently_occluded(low_res_masks, last_occluded, frame_idx, threshold=0.3): + """Suppress overlapping masks for objects that were most recently occluded. + Prevents corrupted masks from occluded objects from contaminating other objects.""" + N_obj = low_res_masks.shape[0] + if N_obj <= 1: + return low_res_masks + binary = low_res_masks[:, 0] > 0 # [N_obj, H, W] + iou = _compute_mask_overlap(low_res_masks[:, 0], low_res_masks[:, 0]) + overlapping = torch.triu(iou >= threshold, diagonal=1) # [N, N] upper triangle + last_occ_i = last_occluded.unsqueeze(1) # [N, 1] + last_occ_j = last_occluded.unsqueeze(0) # [1, N] + # Suppress the more recently occluded object in each overlapping pair + suppress_i = overlapping & (last_occ_i > last_occ_j) & (last_occ_j > -1) + suppress_j = overlapping & (last_occ_j > last_occ_i) & (last_occ_i > -1) + to_suppress = suppress_i.any(dim=1) | suppress_j.any(dim=0) + # Update last_occluded for occluded/suppressed objects + is_empty = ~binary.any(dim=(-1, -2)) + newly_occluded = is_empty | to_suppress + last_occluded[newly_occluded] = frame_idx + # Suppress masks + low_res_masks[to_suppress] = -10.0 + return low_res_masks + + def _deferred_memory_encode(self, current_out, N_obj, vision_feats, feat_sizes, mux_state, device, + cond_obj_mask=None): + """Deferred memory encoding for propagation frames. cond_obj_mask: per-object bool for conditioning.""" + low_res_masks = current_out["pred_masks"] # [N_obj, 1, H_low, W_low] + + if N_obj > 1: + lr = low_res_masks.squeeze(1) # [N_obj, H, W] + max_obj = torch.argmax(lr, dim=0, keepdim=True) + batch_inds = torch.arange(N_obj, device=device)[:, None, None] + pixel_nol = torch.where(max_obj == batch_inds, lr, torch.clamp(lr, max=-10.0)) + area_before = (lr > 0).sum(dim=(-1, -2)).float().clamp(min=1) + area_after = (pixel_nol > 0).sum(dim=(-1, -2)).float() + shrink_ok = (area_after / area_before) >= 0.3 + low_res_masks = torch.where( + shrink_ok[:, None, None, None].expand_as(low_res_masks), + low_res_masks, torch.clamp(low_res_masks, max=-10.0)) + + interpol_size = self.maskmem_backbone.mask_downsampler.interpol_size + mem_masks = F.interpolate(low_res_masks, size=interpol_size, + mode="bilinear", align_corners=False) + + obj_scores = torch.where( + (mem_masks > 0).any(dim=(-1, -2)), 10.0, -10.0) + + pix_feat = to_spatial(vision_feats[-1], feat_sizes[-1][0], feat_sizes[-1][1]) + maskmem_features, maskmem_pos_enc = self._encode_new_memory( + pix_feat=pix_feat, pred_masks_high_res=mem_masks, + object_score_logits=obj_scores, + multiplex_state=mux_state, cond_obj_mask=cond_obj_mask) + current_out["maskmem_features"] = maskmem_features + current_out["maskmem_pos_enc"] = maskmem_pos_enc + + def _add_detected_objects(self, new_masks, mux_state, vision_feats, feat_sizes, current_out): + """Grow MultiplexState with new detections, merge masks, re-encode memory. Modifies current_out.""" + n_old = mux_state.total_valid_entries + mux_state.add_objects(new_masks.shape[0]) + N_obj = mux_state.total_valid_entries + # Stored memory with old bucket counts is padded at read time by _pad_to_buckets + for k in ("pred_masks", "pred_masks_high_res"): + det = F.interpolate(new_masks.unsqueeze(1), size=current_out[k].shape[-2:], + mode="bilinear", align_corners=False) + current_out[k] = torch.cat([current_out[k], det], dim=0) + if self.num_maskmem > 0: + # Mark new objects as conditioning (clean detection masks) so model trusts them + cond_mask = torch.zeros(N_obj, dtype=torch.bool, device=new_masks.device) + cond_mask[n_old:] = True + self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes, + mux_state, new_masks.device, cond_obj_mask=cond_mask) + + def _condition_with_masks(self, masks, frame_idx, vision_feats, vision_pos, feat_sizes, + high_res_prop, output_dict, N, mux_state, backbone_obj, frame, + trunk_out, threshold=0.5): + """Condition tracker with masks on a frame.""" + mask_input = F.interpolate(masks if masks.dim() == 4 else masks.unsqueeze(1), + size=(self.image_size, self.image_size), mode="bilinear", align_corners=False) + mask_input = (mask_input > threshold).to(masks.dtype) + hi_res = lo_feat = None + if backbone_obj is not None and backbone_obj.multiplex: + _, _, itf, _ = backbone_obj(frame, tracker_mode="interactive", cached_trunk=trunk_out, tracker_only=True) + hi_res, lo_feat = itf[:-1], itf[-1] + current_out = self.track_step( + frame_idx=frame_idx, is_init_cond_frame=True, current_vision_feats=vision_feats, + current_vision_pos_embeds=vision_pos, feat_sizes=feat_sizes, mask_inputs=mask_input, + output_dict=output_dict, num_frames=N, interactive_high_res=hi_res, + interactive_backbone=lo_feat, propagation_high_res=high_res_prop, + multiplex_state=mux_state, run_mem_encoder=True) + output_dict["cond_frame_outputs"][frame_idx] = current_out + return current_out + + def _match_and_add_detections(self, det_masks, det_scores, current_out, mux_state, + vision_feats, feat_sizes, device, max_objects=0, + keep_alive=None): + """Match detections against tracked masks, add new objects, recondition degraded tracks. + Updates keep_alive counters: +1 for matched tracks, -1 for unmatched.""" + N_obj = mux_state.total_valid_entries + if det_masks.shape[0] == 0: + if keep_alive is not None: + for i in range(N_obj): + keep_alive[i] = max(-4, keep_alive.get(i, 0) - 1) + return [] + + # Match at low-res (like reference) + trk_masks = current_out["pred_masks"][:, 0] # [N_obj, H_low, W_low] + det_resized = F.interpolate(det_masks.unsqueeze(1), size=trk_masks.shape[-2:], + mode="bilinear", align_corners=False)[:, 0] + overlap = _compute_mask_overlap(det_resized, trk_masks) + + # Update keep_alive and find matched tracks + matched = set() + if overlap.shape[1] > 0: + matched = set((overlap >= 0.5).any(dim=0).nonzero(as_tuple=True)[0].tolist()) + if keep_alive is not None: + for i in range(N_obj): + if i in matched: + keep_alive[i] = min(8, keep_alive.get(i, 0) + 1) + else: + keep_alive[i] = max(-4, keep_alive.get(i, 0) - 1) + + # Recondition: high-confidence detections (>=0.8) with high overlap refresh tracked masks + reconditioned = False + if det_scores is not None and overlap.shape[1] > 0: + HIGH_CONF = 0.8 + for det_idx in range(overlap.shape[0]): + if det_scores[det_idx] < HIGH_CONF: + continue + best_trk = overlap[det_idx].argmax().item() + if overlap[det_idx, best_trk] >= 0.5: + # Replace tracked mask with fresh detection mask + current_out["pred_masks"][best_trk] = det_resized[det_idx].unsqueeze(0) + det_hr = F.interpolate(det_masks[det_idx:det_idx+1].unsqueeze(1), + size=current_out["pred_masks_high_res"].shape[-2:], + mode="bilinear", align_corners=False) + current_out["pred_masks_high_res"][best_trk] = det_hr[0] + reconditioned = True + + # Re-encode memory if any tracks were reconditioned + if reconditioned and self.num_maskmem > 0: + self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes, mux_state, device) + + # Add new detections (not matching any track) + if max_objects > 0 and N_obj >= max_objects: + return [] + max_overlap = overlap.max(dim=1)[0] if overlap.shape[1] > 0 else torch.zeros(overlap.shape[0], device=device) + new_dets = max_overlap < 0.5 + if new_dets.any(): + if max_objects > 0: + slots = max_objects - N_obj + new_dets = new_dets & (torch.cumsum(new_dets.int(), 0) <= slots) + self._add_detected_objects(det_masks[new_dets], mux_state, + vision_feats, feat_sizes, current_out) + if keep_alive is not None: + for i in range(N_obj, mux_state.total_valid_entries): + keep_alive[i] = 1 + return det_scores[new_dets].tolist() if det_scores is not None else [0.0] * new_dets.sum().item() + return [] + + INTERNAL_MAX_OBJECTS = 64 # Hard ceiling on accumulated tracks; max_objects=0 or any value above this is clamped here. + + def track_video_with_detection(self, backbone_fn, images, initial_masks, detect_fn=None, + new_det_thresh=0.5, max_objects=0, detect_interval=1, + backbone_obj=None, pbar=None, target_device=None, target_dtype=None): + """Track with optional per-frame detection. Returns [N, max_N_obj, H, W] mask logits.""" + if max_objects <= 0 or max_objects > self.INTERNAL_MAX_OBJECTS: + max_objects = self.INTERNAL_MAX_OBJECTS + N = images.shape[0] + device = target_device if target_device is not None else images.device + dt = target_dtype if target_dtype is not None else images.dtype + size = self.image_size + output_dict = {"cond_frame_outputs": {}, "non_cond_frame_outputs": {}} + all_masks = [] + idev = comfy.model_management.intermediate_device() + mux_state = None + if initial_masks is not None: + mux_state = MultiplexState(initial_masks.shape[0], self.num_multiplex, device, dt) + obj_scores = [] # per-object detection score (1.0 for initial masks) + keep_alive = {} if detect_fn is not None else None + last_occluded = torch.empty(0, device=device, dtype=torch.long) # per-object last occluded frame + + # Prefetch next frame's backbone on a separate CUDA stream + prefetch = False + backbone_stream = None + if comfy.model_management.is_device_cuda(device): + try: + backbone_stream = torch.cuda.Stream(device=device) + prefetch = True + except RuntimeError: + pass + cur_bb = self._compute_backbone_frame(backbone_fn, _prep_frame(images, slice(0, 1), device, dt, size), frame_idx=0) + + for frame_idx in tqdm(range(N), desc="tracking"): + vision_feats, vision_pos, feat_sizes, high_res_prop, trunk_out = cur_bb + + # Start next frame's backbone on separate stream (overlaps with current frame's work) + if prefetch and frame_idx + 1 < N: + backbone_stream.wait_stream(torch.cuda.current_stream(device)) + with torch.cuda.stream(backbone_stream): + next_bb = self._compute_backbone_frame( + backbone_fn, _prep_frame(images, slice(frame_idx + 1, frame_idx + 2), device, dt, size), frame_idx=frame_idx + 1) + + # Per-frame detection with NMS (skip if no detect_fn, or interval/max not met) + det_masks = torch.empty(0, device=device) + det_scores = None + run_det = (detect_fn is not None + and frame_idx % max(detect_interval, 1) == 0 + and not (max_objects > 0 and mux_state is not None + and mux_state.total_valid_entries >= max_objects)) + if run_det: + det_out = detect_fn(trunk_out) + scores = det_out["scores"][0].sigmoid() + keep = scores > new_det_thresh + det_masks, det_scores = det_out["masks"][0][keep], scores[keep] + if det_masks.shape[0] > 1: + det_masks, det_scores = _nms_masks(det_masks, det_scores) + + if frame_idx == 0 and initial_masks is not None: + current_out = self._condition_with_masks( + initial_masks.to(device=device, dtype=dt), frame_idx, vision_feats, vision_pos, + feat_sizes, high_res_prop, output_dict, N, mux_state, backbone_obj, + _prep_frame(images, slice(frame_idx, frame_idx + 1), device, dt, size), trunk_out) + last_occluded = torch.full((mux_state.total_valid_entries,), -1, device=device, dtype=torch.long) + obj_scores = [1.0] * mux_state.total_valid_entries + if keep_alive is not None: + for i in range(mux_state.total_valid_entries): + keep_alive[i] = 8 + elif mux_state is None or mux_state.total_valid_entries == 0: + if det_masks.shape[0] > 0: + if max_objects > 0: + det_scores = det_scores[:max_objects] + det_masks = det_masks[:max_objects] + mux_state = MultiplexState(det_masks.shape[0], self.num_multiplex, device, dt) + current_out = self._condition_with_masks( + det_masks, frame_idx, vision_feats, vision_pos, feat_sizes, high_res_prop, + output_dict, N, mux_state, backbone_obj, + _prep_frame(images, slice(frame_idx, frame_idx + 1), device, dt, size), trunk_out, threshold=0.0) + last_occluded = torch.full((mux_state.total_valid_entries,), -1, device=device, dtype=torch.long) + obj_scores = det_scores[:mux_state.total_valid_entries].tolist() + if keep_alive is not None: + for i in range(mux_state.total_valid_entries): + keep_alive[i] = 1 + else: + all_masks.append(None) + if pbar is not None: + pbar.update(1) + # Skip to backbone advance at end of loop + if frame_idx + 1 < N: + if prefetch: + torch.cuda.current_stream(device).wait_stream(backbone_stream) + cur_bb = next_bb + else: + cur_bb = self._compute_backbone_frame(backbone_fn, _prep_frame(images, slice(frame_idx + 1, frame_idx + 2), device, dt, size), frame_idx=frame_idx + 1) + continue + else: + N_obj = mux_state.total_valid_entries + current_out = self.track_step( + frame_idx=frame_idx, is_init_cond_frame=False, current_vision_feats=vision_feats, + current_vision_pos_embeds=vision_pos, feat_sizes=feat_sizes, mask_inputs=None, + output_dict=output_dict, num_frames=N, propagation_high_res=high_res_prop, + multiplex_state=mux_state, run_mem_encoder=False) + current_out["pred_masks"] = fill_holes_in_mask_scores( + current_out["pred_masks"], max_area=16) + if last_occluded.shape[0] == N_obj and N_obj > 1: + self._suppress_recently_occluded( + current_out["pred_masks"], last_occluded, frame_idx) + if self.num_maskmem > 0: + self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes, mux_state, device) + output_dict["non_cond_frame_outputs"][frame_idx] = current_out + lookback = max(self.num_maskmem, self.max_obj_ptrs_in_encoder) + for old_idx in list(output_dict["non_cond_frame_outputs"]): + if old_idx < frame_idx - lookback: + del output_dict["non_cond_frame_outputs"][old_idx] + n_before = mux_state.total_valid_entries + new_obj_scores = self._match_and_add_detections(det_masks, det_scores, current_out, mux_state, + vision_feats, feat_sizes, device, max_objects, + keep_alive if run_det else None) + n_added = mux_state.total_valid_entries - n_before + if n_added > 0: + last_occluded = torch.cat([last_occluded, + torch.full((n_added,), -1, device=device, dtype=torch.long)]) + obj_scores.extend(new_obj_scores) + + masks_out = current_out["pred_masks_high_res"][:, 0] + if keep_alive is not None: + for i in range(masks_out.shape[0]): + if keep_alive.get(i, 0) <= 0: + masks_out[i] = NO_OBJ_SCORE + N_obj_now = mux_state.total_valid_entries if mux_state is not None else 0 + if N_obj_now > 0: + all_masks.append(pack_masks(masks_out).to(idev)) + else: + all_masks.append(None) + if pbar is not None: + pbar.update(1) + + # Next frame's backbone + if frame_idx + 1 < N: + if prefetch: + torch.cuda.current_stream(device).wait_stream(backbone_stream) + cur_bb = next_bb + else: + cur_bb = self._compute_backbone_frame(backbone_fn, _prep_frame(images, slice(frame_idx + 1, frame_idx + 2), device, dt, size), frame_idx=frame_idx + 1) + + if not all_masks or all(m is None for m in all_masks): + return {"packed_masks": None, "n_frames": N, "scores": []} + + max_obj = max(m.shape[0] for m in all_masks if m is not None) + sample = next(m for m in all_masks if m is not None) + empty_packed = torch.zeros(max_obj, *sample.shape[1:], dtype=torch.uint8, device=sample.device) + for i, m in enumerate(all_masks): + if m is None: + all_masks[i] = empty_packed + elif m.shape[0] < max_obj: + pad = torch.zeros(max_obj - m.shape[0], *m.shape[1:], dtype=torch.uint8, device=m.device) + all_masks[i] = torch.cat([m, pad], dim=0) + return {"packed_masks": torch.stack(all_masks, dim=0), "n_frames": N, "scores": obj_scores} diff --git a/comfy/ldm/supir/__init__.py b/comfy/ldm/supir/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/comfy/ldm/supir/supir_modules.py b/comfy/ldm/supir/supir_modules.py new file mode 100644 index 000000000..7389b01d2 --- /dev/null +++ b/comfy/ldm/supir/supir_modules.py @@ -0,0 +1,226 @@ +import torch +import torch.nn as nn + +from comfy.ldm.modules.diffusionmodules.util import timestep_embedding +from comfy.ldm.modules.diffusionmodules.openaimodel import Downsample, TimestepEmbedSequential, ResBlock, SpatialTransformer +from comfy.ldm.modules.attention import optimized_attention + + +class ZeroSFT(nn.Module): + def __init__(self, label_nc, norm_nc, concat_channels=0, dtype=None, device=None, operations=None): + super().__init__() + + ks = 3 + pw = ks // 2 + + self.param_free_norm = operations.GroupNorm(32, norm_nc + concat_channels, dtype=dtype, device=device) + + nhidden = 128 + + self.mlp_shared = nn.Sequential( + operations.Conv2d(label_nc, nhidden, kernel_size=ks, padding=pw, dtype=dtype, device=device), + nn.SiLU() + ) + self.zero_mul = operations.Conv2d(nhidden, norm_nc + concat_channels, kernel_size=ks, padding=pw, dtype=dtype, device=device) + self.zero_add = operations.Conv2d(nhidden, norm_nc + concat_channels, kernel_size=ks, padding=pw, dtype=dtype, device=device) + + self.zero_conv = operations.Conv2d(label_nc, norm_nc, 1, 1, 0, dtype=dtype, device=device) + self.pre_concat = bool(concat_channels != 0) + + def forward(self, c, h, h_ori=None, control_scale=1): + if h_ori is not None and self.pre_concat: + h_raw = torch.cat([h_ori, h], dim=1) + else: + h_raw = h + + h = h + self.zero_conv(c) + if h_ori is not None and self.pre_concat: + h = torch.cat([h_ori, h], dim=1) + actv = self.mlp_shared(c) + gamma = self.zero_mul(actv) + beta = self.zero_add(actv) + h = self.param_free_norm(h) + h = torch.addcmul(h + beta, h, gamma) + if h_ori is not None and not self.pre_concat: + h = torch.cat([h_ori, h], dim=1) + return torch.lerp(h_raw, h, control_scale) + + +class _CrossAttnInner(nn.Module): + """Inner cross-attention module matching the state_dict layout of the original CrossAttention.""" + def __init__(self, query_dim, context_dim, heads, dim_head, dtype=None, device=None, operations=None): + super().__init__() + inner_dim = dim_head * heads + self.heads = heads + self.to_q = operations.Linear(query_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.to_k = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.to_v = operations.Linear(context_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.to_out = nn.Sequential( + operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), + ) + + def forward(self, x, context): + q = self.to_q(x) + k = self.to_k(context) + v = self.to_v(context) + return self.to_out(optimized_attention(q, k, v, self.heads)) + + +class ZeroCrossAttn(nn.Module): + def __init__(self, context_dim, query_dim, dtype=None, device=None, operations=None): + super().__init__() + heads = query_dim // 64 + dim_head = 64 + self.attn = _CrossAttnInner(query_dim, context_dim, heads, dim_head, dtype=dtype, device=device, operations=operations) + self.norm1 = operations.GroupNorm(32, query_dim, dtype=dtype, device=device) + self.norm2 = operations.GroupNorm(32, context_dim, dtype=dtype, device=device) + + def forward(self, context, x, control_scale=1): + b, c, h, w = x.shape + x_in = x + + x = self.attn( + self.norm1(x).flatten(2).transpose(1, 2), + self.norm2(context).flatten(2).transpose(1, 2), + ).transpose(1, 2).unflatten(2, (h, w)) + + return x_in + x * control_scale + + +class GLVControl(nn.Module): + """SUPIR's Guided Latent Vector control encoder. Truncated UNet (input + middle blocks only).""" + def __init__( + self, + in_channels=4, + model_channels=320, + num_res_blocks=2, + attention_resolutions=(4, 2), + channel_mult=(1, 2, 4), + num_head_channels=64, + transformer_depth=(1, 2, 10), + context_dim=2048, + adm_in_channels=2816, + use_linear_in_transformer=True, + use_checkpoint=False, + dtype=None, + device=None, + operations=None, + **kwargs, + ): + super().__init__() + self.model_channels = model_channels + time_embed_dim = model_channels * 4 + + self.time_embed = nn.Sequential( + operations.Linear(model_channels, time_embed_dim, dtype=dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=dtype, device=device), + ) + + self.label_emb = nn.Sequential( + nn.Sequential( + operations.Linear(adm_in_channels, time_embed_dim, dtype=dtype, device=device), + nn.SiLU(), + operations.Linear(time_embed_dim, time_embed_dim, dtype=dtype, device=device), + ) + ) + + self.input_blocks = nn.ModuleList([ + TimestepEmbedSequential( + operations.Conv2d(in_channels, model_channels, 3, padding=1, dtype=dtype, device=device) + ) + ]) + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for nr in range(num_res_blocks): + layers = [ + ResBlock(ch, time_embed_dim, 0, out_channels=mult * model_channels, + dtype=dtype, device=device, operations=operations) + ] + ch = mult * model_channels + if ds in attention_resolutions: + num_heads = ch // num_head_channels + layers.append( + SpatialTransformer(ch, num_heads, num_head_channels, + depth=transformer_depth[level], context_dim=context_dim, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + dtype=dtype, device=device, operations=operations) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + if level != len(channel_mult) - 1: + self.input_blocks.append( + TimestepEmbedSequential( + Downsample(ch, True, out_channels=ch, dtype=dtype, device=device, operations=operations) + ) + ) + ds *= 2 + + num_heads = ch // num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock(ch, time_embed_dim, 0, dtype=dtype, device=device, operations=operations), + SpatialTransformer(ch, num_heads, num_head_channels, + depth=transformer_depth[-1], context_dim=context_dim, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + dtype=dtype, device=device, operations=operations), + ResBlock(ch, time_embed_dim, 0, dtype=dtype, device=device, operations=operations), + ) + + self.input_hint_block = TimestepEmbedSequential( + operations.Conv2d(in_channels, model_channels, 3, padding=1, dtype=dtype, device=device) + ) + + def forward(self, x, timesteps, xt, context=None, y=None, **kwargs): + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype) + emb = self.time_embed(t_emb) + self.label_emb(y) + + guided_hint = self.input_hint_block(x, emb, context) + + hs = [] + h = xt + for module in self.input_blocks: + if guided_hint is not None: + h = module(h, emb, context) + h += guided_hint + guided_hint = None + else: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + hs.append(h) + return hs + + +class SUPIR(nn.Module): + """ + SUPIR model containing GLVControl (control encoder) and project_modules (adapters). + State dict keys match the original SUPIR checkpoint layout: + control_model.* -> GLVControl + project_modules.* -> nn.ModuleList of ZeroSFT/ZeroCrossAttn + """ + def __init__(self, device=None, dtype=None, operations=None): + super().__init__() + + self.control_model = GLVControl(dtype=dtype, device=device, operations=operations) + + project_channel_scale = 2 + cond_output_channels = [320] * 4 + [640] * 3 + [1280] * 3 + project_channels = [int(c * project_channel_scale) for c in [160] * 4 + [320] * 3 + [640] * 3] + concat_channels = [320] * 2 + [640] * 3 + [1280] * 4 + [0] + cross_attn_insert_idx = [6, 3] + + self.project_modules = nn.ModuleList() + for i in range(len(cond_output_channels)): + self.project_modules.append(ZeroSFT( + project_channels[i], cond_output_channels[i], + concat_channels=concat_channels[i], + dtype=dtype, device=device, operations=operations, + )) + + for i in cross_attn_insert_idx: + self.project_modules.insert(i, ZeroCrossAttn( + cond_output_channels[i], concat_channels[i], + dtype=dtype, device=device, operations=operations, + )) diff --git a/comfy/ldm/supir/supir_patch.py b/comfy/ldm/supir/supir_patch.py new file mode 100644 index 000000000..b67ab4cd8 --- /dev/null +++ b/comfy/ldm/supir/supir_patch.py @@ -0,0 +1,103 @@ +import torch +from comfy.ldm.modules.diffusionmodules.openaimodel import Upsample + + +class SUPIRPatch: + """ + Holds GLVControl (control encoder) + project_modules (ZeroSFT/ZeroCrossAttn adapters). + Runs GLVControl lazily on first patch invocation per step, applies adapters through + middle_block_after_patch, output_block_merge_patch, and forward_timestep_embed_patch. + """ + SIGMA_MAX = 14.6146 + + def __init__(self, model_patch, project_modules, hint_latent, strength_start, strength_end): + self.model_patch = model_patch # CoreModelPatcher wrapping GLVControl + self.project_modules = project_modules # nn.ModuleList of ZeroSFT/ZeroCrossAttn + self.hint_latent = hint_latent # encoded LQ image latent + self.strength_start = strength_start + self.strength_end = strength_end + self.cached_features = None + self.adapter_idx = 0 + self.control_idx = 0 + self.current_control_idx = 0 + self.active = True + + def _ensure_features(self, kwargs): + """Run GLVControl on first call per step, cache results.""" + if self.cached_features is not None: + return + x = kwargs["x"] + b = x.shape[0] + hint = self.hint_latent.to(device=x.device, dtype=x.dtype) + if hint.shape[0] != b: + hint = hint.expand(b, -1, -1, -1) if hint.shape[0] == 1 else hint.repeat((b + hint.shape[0] - 1) // hint.shape[0], 1, 1, 1)[:b] + self.cached_features = self.model_patch.model.control_model( + hint, kwargs["timesteps"], x, + kwargs["context"], kwargs["y"] + ) + self.adapter_idx = len(self.project_modules) - 1 + self.control_idx = len(self.cached_features) - 1 + + def _get_control_scale(self, kwargs): + if self.strength_start == self.strength_end: + return self.strength_end + sigma = kwargs["transformer_options"].get("sigmas") + if sigma is None: + return self.strength_end + s = sigma[0].item() if sigma.dim() > 0 else sigma.item() + t = min(s / self.SIGMA_MAX, 1.0) + return t * (self.strength_start - self.strength_end) + self.strength_end + + def middle_after(self, kwargs): + """middle_block_after_patch: run GLVControl lazily, apply last adapter after middle block.""" + self.cached_features = None # reset from previous step + self.current_scale = self._get_control_scale(kwargs) + self.active = self.current_scale > 0 + if not self.active: + return {"h": kwargs["h"]} + self._ensure_features(kwargs) + h = kwargs["h"] + h = self.project_modules[self.adapter_idx]( + self.cached_features[self.control_idx], h, control_scale=self.current_scale + ) + self.adapter_idx -= 1 + self.control_idx -= 1 + return {"h": h} + + def output_block(self, h, hsp, transformer_options): + """output_block_patch: ZeroSFT adapter fusion replaces cat([h, hsp]). Returns (h, None) to skip cat.""" + if not self.active: + return h, hsp + self.current_control_idx = self.control_idx + h = self.project_modules[self.adapter_idx]( + self.cached_features[self.control_idx], hsp, h, control_scale=self.current_scale + ) + self.adapter_idx -= 1 + self.control_idx -= 1 + return h, None + + def pre_upsample(self, layer, x, emb, context, transformer_options, output_shape, *args, **kw): + """forward_timestep_embed_patch for Upsample: extra cross-attn adapter before upsample.""" + block_type, _ = transformer_options["block"] + if block_type == "output" and self.active and self.cached_features is not None: + x = self.project_modules[self.adapter_idx]( + self.cached_features[self.current_control_idx], x, control_scale=self.current_scale + ) + self.adapter_idx -= 1 + return layer(x, output_shape=output_shape) + + def to(self, device_or_dtype): + if isinstance(device_or_dtype, torch.device): + self.cached_features = None + if self.hint_latent is not None: + self.hint_latent = self.hint_latent.to(device_or_dtype) + return self + + def models(self): + return [self.model_patch] + + def register(self, model_patcher): + """Register all patches on a cloned model patcher.""" + model_patcher.set_model_patch(self.middle_after, "middle_block_after_patch") + model_patcher.set_model_output_block_patch(self.output_block) + model_patcher.set_model_patch((Upsample, self.pre_upsample), "forward_timestep_embed_patch") diff --git a/comfy/ldm/wan/ar_model.py b/comfy/ldm/wan/ar_model.py new file mode 100644 index 000000000..d72f53602 --- /dev/null +++ b/comfy/ldm/wan/ar_model.py @@ -0,0 +1,276 @@ +""" +CausalWanModel: Wan 2.1 backbone with KV-cached causal self-attention for +autoregressive (frame-by-frame) video generation via Causal Forcing. + +Weight-compatible with the standard WanModel -- same layer names, same shapes. +The difference is purely in the forward pass: this model processes one temporal +block at a time and maintains a KV cache across blocks. + +Reference: https://github.com/thu-ml/Causal-Forcing +""" + +import torch +import torch.nn as nn + +from comfy.ldm.modules.attention import optimized_attention +from comfy.ldm.flux.math import apply_rope1 +from comfy.ldm.wan.model import ( + sinusoidal_embedding_1d, + repeat_e, + WanModel, + WanAttentionBlock, +) +import comfy.ldm.common_dit +import comfy.model_management + + +class CausalWanSelfAttention(nn.Module): + """Self-attention with KV cache support for autoregressive inference.""" + + def __init__(self, dim, num_heads, window_size=(-1, -1), qk_norm=True, + eps=1e-6, operation_settings={}): + assert dim % num_heads == 0 + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.head_dim = dim // num_heads + self.qk_norm = qk_norm + self.eps = eps + + ops = operation_settings.get("operations") + device = operation_settings.get("device") + dtype = operation_settings.get("dtype") + + self.q = ops.Linear(dim, dim, device=device, dtype=dtype) + self.k = ops.Linear(dim, dim, device=device, dtype=dtype) + self.v = ops.Linear(dim, dim, device=device, dtype=dtype) + self.o = ops.Linear(dim, dim, device=device, dtype=dtype) + self.norm_q = ops.RMSNorm(dim, eps=eps, elementwise_affine=True, device=device, dtype=dtype) if qk_norm else nn.Identity() + self.norm_k = ops.RMSNorm(dim, eps=eps, elementwise_affine=True, device=device, dtype=dtype) if qk_norm else nn.Identity() + + def forward(self, x, freqs, kv_cache=None, transformer_options={}): + b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim + + q = apply_rope1(self.norm_q(self.q(x)).view(b, s, n, d), freqs) + k = apply_rope1(self.norm_k(self.k(x)).view(b, s, n, d), freqs) + v = self.v(x).view(b, s, n, d) + + if kv_cache is None: + x = optimized_attention( + q.view(b, s, n * d), + k.view(b, s, n * d), + v.view(b, s, n * d), + heads=self.num_heads, + transformer_options=transformer_options, + ) + else: + end = kv_cache["end"] + new_end = end + s + + # Roped K and plain V go into cache + kv_cache["k"][:, end:new_end] = k + kv_cache["v"][:, end:new_end] = v + kv_cache["end"] = new_end + + x = optimized_attention( + q.view(b, s, n * d), + kv_cache["k"][:, :new_end].view(b, new_end, n * d), + kv_cache["v"][:, :new_end].view(b, new_end, n * d), + heads=self.num_heads, + transformer_options=transformer_options, + ) + + x = self.o(x) + return x + + +class CausalWanAttentionBlock(WanAttentionBlock): + """Transformer block with KV-cached self-attention and cross-attention caching.""" + + def __init__(self, cross_attn_type, dim, ffn_dim, num_heads, + window_size=(-1, -1), qk_norm=True, cross_attn_norm=False, + eps=1e-6, operation_settings={}): + super().__init__(cross_attn_type, dim, ffn_dim, num_heads, + window_size, qk_norm, cross_attn_norm, eps, + operation_settings=operation_settings) + self.self_attn = CausalWanSelfAttention( + dim, num_heads, window_size, qk_norm, eps, + operation_settings=operation_settings) + + def forward(self, x, e, freqs, context, context_img_len=257, + kv_cache=None, crossattn_cache=None, transformer_options={}): + if e.ndim < 4: + e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1) + else: + e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device).unsqueeze(0) + e).unbind(2) + + # Self-attention with optional KV cache + x = x.contiguous() + y = self.self_attn( + torch.addcmul(repeat_e(e[0], x), self.norm1(x), 1 + repeat_e(e[1], x)), + freqs, kv_cache=kv_cache, transformer_options=transformer_options) + x = torch.addcmul(x, y, repeat_e(e[2], x)) + del y + + # Cross-attention with optional caching + if crossattn_cache is not None and crossattn_cache.get("is_init"): + q = self.cross_attn.norm_q(self.cross_attn.q(self.norm3(x))) + x_ca = optimized_attention( + q, crossattn_cache["k"], crossattn_cache["v"], + heads=self.num_heads, transformer_options=transformer_options) + x = x + self.cross_attn.o(x_ca) + else: + x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len, transformer_options=transformer_options) + if crossattn_cache is not None: + crossattn_cache["k"] = self.cross_attn.norm_k(self.cross_attn.k(context)) + crossattn_cache["v"] = self.cross_attn.v(context) + crossattn_cache["is_init"] = True + + # FFN + y = self.ffn(torch.addcmul(repeat_e(e[3], x), self.norm2(x), 1 + repeat_e(e[4], x))) + x = torch.addcmul(x, y, repeat_e(e[5], x)) + return x + + +class CausalWanModel(WanModel): + """ + Wan 2.1 diffusion backbone with causal KV-cache support. + + Same weight structure as WanModel -- loads identical state dicts. + Adds forward_block() for frame-by-frame autoregressive inference. + """ + + def __init__(self, + model_type='t2v', + patch_size=(1, 2, 2), + text_len=512, + in_dim=16, + dim=2048, + ffn_dim=8192, + freq_dim=256, + text_dim=4096, + out_dim=16, + num_heads=16, + num_layers=32, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=True, + eps=1e-6, + image_model=None, + device=None, + dtype=None, + operations=None): + super().__init__( + model_type=model_type, patch_size=patch_size, text_len=text_len, + in_dim=in_dim, dim=dim, ffn_dim=ffn_dim, freq_dim=freq_dim, + text_dim=text_dim, out_dim=out_dim, num_heads=num_heads, + num_layers=num_layers, window_size=window_size, qk_norm=qk_norm, + cross_attn_norm=cross_attn_norm, eps=eps, image_model=image_model, + wan_attn_block_class=CausalWanAttentionBlock, + device=device, dtype=dtype, operations=operations) + + def forward_block(self, x, timestep, context, start_frame, + kv_caches, crossattn_caches, clip_fea=None): + """ + Forward one temporal block for autoregressive inference. + + Args: + x: [B, C, block_frames, H, W] input latent for the current block + timestep: [B, block_frames] per-frame timesteps + context: [B, L, text_dim] raw text embeddings (pre-text_embedding) + start_frame: temporal frame index for RoPE offset + kv_caches: list of per-layer KV cache dicts + crossattn_caches: list of per-layer cross-attention cache dicts + clip_fea: optional CLIP features for I2V + + Returns: + flow_pred: [B, C_out, block_frames, H, W] flow prediction + """ + x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) + bs, c, t, h, w = x.shape + + x = self.patch_embedding(x.float()).to(x.dtype) + grid_sizes = x.shape[2:] + x = x.flatten(2).transpose(1, 2) + + # Per-frame time embedding + e = self.time_embedding( + sinusoidal_embedding_1d(self.freq_dim, timestep.flatten()).to(dtype=x.dtype)) + e = e.reshape(timestep.shape[0], -1, e.shape[-1]) + e0 = self.time_projection(e).unflatten(2, (6, self.dim)) + + # Text embedding (reuses crossattn_cache after first block) + context = self.text_embedding(context) + + context_img_len = None + if clip_fea is not None and self.img_emb is not None: + context_clip = self.img_emb(clip_fea) + context = torch.concat([context_clip, context], dim=1) + context_img_len = clip_fea.shape[-2] + + # RoPE for current block's temporal position + freqs = self.rope_encode(t, h, w, t_start=start_frame, device=x.device, dtype=x.dtype) + + # Transformer blocks + for i, block in enumerate(self.blocks): + x = block(x, e=e0, freqs=freqs, context=context, + context_img_len=context_img_len, + kv_cache=kv_caches[i], + crossattn_cache=crossattn_caches[i]) + + # Head + x = self.head(x, e) + + # Unpatchify + x = self.unpatchify(x, grid_sizes) + return x[:, :, :t, :h, :w] + + def init_kv_caches(self, batch_size, max_seq_len, device, dtype): + """Create fresh KV caches for all layers.""" + caches = [] + for _ in range(self.num_layers): + caches.append({ + "k": torch.zeros(batch_size, max_seq_len, self.num_heads, self.head_dim, device=device, dtype=dtype), + "v": torch.zeros(batch_size, max_seq_len, self.num_heads, self.head_dim, device=device, dtype=dtype), + "end": 0, + }) + return caches + + def init_crossattn_caches(self, batch_size, device, dtype): + """Create fresh cross-attention caches for all layers.""" + caches = [] + for _ in range(self.num_layers): + caches.append({"is_init": False}) + return caches + + def reset_kv_caches(self, kv_caches): + """Reset KV caches to empty (reuse allocated memory).""" + for cache in kv_caches: + cache["end"] = 0 + + def reset_crossattn_caches(self, crossattn_caches): + """Reset cross-attention caches.""" + for cache in crossattn_caches: + cache["is_init"] = False + + @property + def head_dim(self): + return self.dim // self.num_heads + + def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs): + ar_state = transformer_options.get("ar_state") + if ar_state is not None: + bs = x.shape[0] + block_frames = x.shape[2] + t_per_frame = timestep.unsqueeze(1).expand(bs, block_frames) + return self.forward_block( + x=x, timestep=t_per_frame, context=context, + start_frame=ar_state["start_frame"], + kv_caches=ar_state["kv_caches"], + crossattn_caches=ar_state["crossattn_caches"], + clip_fea=clip_fea, + ) + + return super().forward(x, timestep, context, clip_fea=clip_fea, + time_dim_concat=time_dim_concat, + transformer_options=transformer_options, **kwargs) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index b2287dba9..70dfe7b16 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -1135,7 +1135,7 @@ class AudioInjector_WAN(nn.Module): self.injector_adain_output_layers = nn.ModuleList( [operations.Linear(dim, dim, dtype=dtype, device=device) for _ in range(audio_injector_id)]) - def forward(self, x, block_id, audio_emb, audio_emb_global, seq_len): + def forward(self, x, block_id, audio_emb, audio_emb_global, seq_len, scale=1.0): audio_attn_id = self.injected_block_id.get(block_id, None) if audio_attn_id is None: return x @@ -1148,12 +1148,15 @@ class AudioInjector_WAN(nn.Module): attn_hidden_states = adain_hidden_states else: attn_hidden_states = self.injector_pre_norm_feat[audio_attn_id](input_hidden_states) - audio_emb = rearrange(audio_emb, "b t n c -> (b t) n c", t=num_frames) - attn_audio_emb = audio_emb + + if audio_emb.dim() == 3: # WanDancer case + attn_audio_emb = rearrange(audio_emb, "b t c -> (b t) 1 c", t=num_frames) + else: # S2V case + attn_audio_emb = rearrange(audio_emb, "b t n c -> (b t) n c", t=num_frames) + residual_out = self.injector[audio_attn_id](x=attn_hidden_states, context=attn_audio_emb) - residual_out = rearrange( - residual_out, "(b t) n c -> b (t n) c", t=num_frames) - x[:, :seq_len] = x[:, :seq_len] + residual_out + residual_out = rearrange(residual_out, "(b t) n c -> b (t n) c", t=num_frames) + x[:, :seq_len] = x[:, :seq_len] + residual_out * scale return x diff --git a/comfy/ldm/wan/model_wandancer.py b/comfy/ldm/wan/model_wandancer.py new file mode 100644 index 000000000..3caef6dc5 --- /dev/null +++ b/comfy/ldm/wan/model_wandancer.py @@ -0,0 +1,251 @@ +import torch +import torch.nn as nn +import comfy +from comfy.ldm.modules.attention import optimized_attention +from comfy.ldm.flux.math import apply_rope1 +from comfy.ldm.flux.layers import EmbedND + +from .model import AudioInjector_WAN, WanModel, MLPProj, Head, sinusoidal_embedding_1d + + +class MusicSelfAttention(nn.Module): + def __init__(self, dim, num_heads, device=None, dtype=None, operations=None): + assert dim % num_heads == 0 + super().__init__() + self.embed_dim = dim + self.num_heads = num_heads + self.head_dim = dim // num_heads + + self.q_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + self.k_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + self.v_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + self.out_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + + def forward(self, x, freqs): + b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim + + q = self.q_proj(x).view(b, s, n, d) + q = apply_rope1(q, freqs) + + k = self.k_proj(x).view(b, s, n, d) + k = apply_rope1(k, freqs) + + x = optimized_attention( + q.view(b, s, n * d), + k.view(b, s, n * d), + self.v_proj(x).view(b, s, n * d), + heads=self.num_heads, + ) + + return self.out_proj(x) + + +class MusicEncoderLayer(nn.Module): + def __init__(self, dim: int, num_heads: int, ffn_dim: int, device=None, dtype=None, operations=None): + super().__init__() + self.self_attn = MusicSelfAttention(dim, num_heads, device=device, dtype=dtype, operations=operations) + + self.linear1 = operations.Linear(dim, ffn_dim, device=device, dtype=dtype) + self.linear2 = operations.Linear(ffn_dim, dim, device=device, dtype=dtype) + + self.norm1 = operations.LayerNorm(dim, device=device, dtype=dtype) + self.norm2 = operations.LayerNorm(dim, device=device, dtype=dtype) + + def forward(self, x: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + x = x + self.self_attn(self.norm1(x), freqs=freqs) + x = x + self.linear2(torch.nn.functional.gelu(self.linear1(self.norm2(x)))) # ffn + return x + + +class WanDancerModel(WanModel): + def __init__(self, + model_type='wandancer', + patch_size=(1, 2, 2), + text_len=512, + in_dim=16, + dim=5120, + ffn_dim=8192, + freq_dim=256, + text_dim=4096, + out_dim=16, + num_heads=16, + num_layers=40, + window_size=(-1, -1), + qk_norm=True, + cross_attn_norm=True, + eps=1e-6, + in_dim_ref_conv=None, + image_model=None, + device=None, dtype=None, operations=None, + audio_inject_layers=[0, 4, 8, 12, 16, 20, 24, 27], + music_dim = 256, + music_heads = 4, + music_feature_dim = 35, + music_latent_dim = 256 + ): + + super().__init__(model_type='i2v', patch_size=patch_size, text_len=text_len, in_dim=in_dim, dim=dim, ffn_dim=ffn_dim, freq_dim=freq_dim, text_dim=text_dim, out_dim=out_dim, + num_heads=num_heads, num_layers=num_layers, window_size=window_size, qk_norm=qk_norm, cross_attn_norm=cross_attn_norm, eps=eps, image_model=image_model, in_dim_ref_conv=in_dim_ref_conv, + device=device, dtype=dtype, operations=operations) + + self.dtype = dtype + operation_settings = {"operations": operations, "device": device, "dtype": dtype} + + self.patch_embedding_global = operations.Conv3d(in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=torch.float32) + self.img_emb_refimage = MLPProj(1280, dim, operation_settings=operation_settings) + self.head_global = Head(dim, out_dim, patch_size, eps, operation_settings=operation_settings) + + self.music_injector = AudioInjector_WAN( + dim=self.dim, + num_heads=self.num_heads, + inject_layer=audio_inject_layers, + root_net=self, + enable_adain=False, + dtype=dtype, device=device, operations=operations + ) + + self.music_projection = operations.Linear(music_feature_dim, music_latent_dim, device=device, dtype=dtype) + self.music_encoder = nn.ModuleList([MusicEncoderLayer(dim=music_dim, num_heads=music_heads, ffn_dim=1024, device=device, dtype=dtype, operations=operations) for _ in range(2)]) + music_head_dim = music_dim // music_heads + self.music_rope_embedder = EmbedND(dim=music_head_dim, theta=10000.0, axes_dim=[music_head_dim]) + + def forward_orig(self, x, t, context, clip_fea=None, clip_fea_ref=None, freqs=None, audio_embed=None, fps=30, audio_inject_scale=1.0, transformer_options={}, **kwargs): + # embeddings + if int(fps + 0.5) != 30: + x = self.patch_embedding_global(x.float()).to(x.dtype) + else: + x = self.patch_embedding(x.float()).to(x.dtype) + + grid_sizes = x.shape[2:] + latent_frames = grid_sizes[0] + transformer_options["grid_sizes"] = grid_sizes + x = x.flatten(2).transpose(1, 2) + seq_len = x.size(1) + + # time embeddings + e = self.time_embedding(sinusoidal_embedding_1d(self.freq_dim, t.flatten()).to(dtype=x[0].dtype)) + e = e.reshape(t.shape[0], -1, e.shape[-1]) + e0 = self.time_projection(e).unflatten(2, (6, self.dim)) + + full_ref = None + if self.ref_conv is not None: # model has the weight, but this wasn't used in the original pipeline + full_ref = kwargs.get("reference_latent", None) + if full_ref is not None: + full_ref = self.ref_conv(full_ref).flatten(2).transpose(1, 2) + x = torch.concat((full_ref, x), dim=1) + + # context + context = self.text_embedding(context) + + audio_emb = None + if audio_embed is not None: # encode music feature,[1, frame_num, 35] -> [1, F*8, dim] + music_feature = self.music_projection(audio_embed) + + music_seq_len = music_feature.shape[1] + music_ids = torch.arange(music_seq_len, device=music_feature.device, dtype=music_feature.dtype).reshape(1, -1, 1) # create 1D position IDs + music_freqs = self.music_rope_embedder(music_ids).movedim(1, 2) + + # apply encoder layers + for layer in self.music_encoder: + music_feature = layer(music_feature, music_freqs) + + # interpolate + audio_emb = torch.nn.functional.interpolate(music_feature.unsqueeze(1), size=(latent_frames * 8, self.dim), mode='bilinear').squeeze(1) + + context_img_len = 0 + if self.img_emb is not None and clip_fea is not None: + context_clip = self.img_emb(clip_fea) # bs x 257 x dim + context = torch.cat([context_clip, context], dim=1) + context_img_len += clip_fea.shape[-2] + if self.img_emb_refimage is not None and clip_fea_ref is not None: + context_clip_ref = self.img_emb_refimage(clip_fea_ref) + context = torch.cat([context_clip_ref, context], dim=1) + context_img_len += clip_fea_ref.shape[-2] + + patches_replace = transformer_options.get("patches_replace", {}) + blocks_replace = patches_replace.get("dit", {}) + transformer_options["total_blocks"] = len(self.blocks) + transformer_options["block_type"] = "double" + for i, block in enumerate(self.blocks): + transformer_options["block_index"] = i + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], context_img_len=context_img_len, transformer_options=args["transformer_options"]) + return out + out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap}) + x = out["img"] + else: + x = block(x, e=e0, freqs=freqs, context=context, context_img_len=context_img_len, transformer_options=transformer_options) + if audio_emb is not None: + x = self.music_injector(x, i, audio_emb, audio_emb_global=None, seq_len=seq_len, scale=audio_inject_scale) + + # head + if int(fps + 0.5) != 30: + x = self.head_global(x, e) + else: + x = self.head(x, e) + + if full_ref is not None: + x = x[:, full_ref.shape[1]:] + + # unpatchify + x = self.unpatchify(x, grid_sizes) + return x + + def _forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, clip_fea_ref=None, fps=30, audio_inject_scale=1.0, **kwargs): + bs, c, t, h, w = x.shape + x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) + + t_len = t + if time_dim_concat is not None: + time_dim_concat = comfy.ldm.common_dit.pad_to_patch_size(time_dim_concat, self.patch_size) + x = torch.cat([x, time_dim_concat], dim=2) + t_len = x.shape[2] + + freqs = self.rope_encode(t_len, h, w, device=x.device, dtype=x.dtype, fps=fps, transformer_options=transformer_options) + return self.forward_orig(x, timestep, context, clip_fea=clip_fea, clip_fea_ref=clip_fea_ref, freqs=freqs, fps=fps, audio_inject_scale=audio_inject_scale, transformer_options=transformer_options, **kwargs)[:, :, :t, :h, :w] + + def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, fps=30, device=None, dtype=None, transformer_options={}): + patch_size = self.patch_size + t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) + h_len = ((h + (patch_size[1] // 2)) // patch_size[1]) + w_len = ((w + (patch_size[2] // 2)) // patch_size[2]) + + if steps_t is None: + steps_t = t_len + if steps_h is None: + steps_h = h_len + if steps_w is None: + steps_w = w_len + + h_start = 0 + w_start = 0 + rope_options = transformer_options.get("rope_options", None) + if rope_options is not None: + t_len = (t_len - 1.0) * rope_options.get("scale_t", 1.0) + 1.0 + h_len = (h_len - 1.0) * rope_options.get("scale_y", 1.0) + 1.0 + w_len = (w_len - 1.0) * rope_options.get("scale_x", 1.0) + 1.0 + + t_start += rope_options.get("shift_t", 0.0) + h_start += rope_options.get("shift_y", 0.0) + w_start += rope_options.get("shift_x", 0.0) + + img_ids = torch.zeros((steps_t, steps_h, steps_w, 3), device=device, dtype=dtype) + + if int(fps + 0.5) != 30: + time_scale = 30.0 / fps # how many time units each frame represents relative to 30fps + positions_new = torch.arange(steps_t, device=device, dtype=dtype) * time_scale + t_start + total_frames_at_30fps = int(time_scale * steps_t + 0.5) + positions_new[-1] = t_start + (total_frames_at_30fps - 1) + + img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + positions_new.reshape(-1, 1, 1) + else: + img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(t_start, t_start + (t_len - 1), steps=steps_t, device=device, dtype=dtype).reshape(-1, 1, 1) + + img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(h_start, h_start + (h_len - 1), steps=steps_h, device=device, dtype=dtype).reshape(1, -1, 1) + img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(w_start, w_start + (w_len - 1), steps=steps_w, device=device, dtype=dtype).reshape(1, 1, -1) + img_ids = img_ids.reshape(1, -1, img_ids.shape[-1]) + + freqs = self.rope_embedder(img_ids).movedim(1, 2) + return freqs diff --git a/comfy/lora.py b/comfy/lora.py index 63ee85323..f11e26ec9 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -17,6 +17,7 @@ """ from __future__ import annotations +import comfy.memory_management import comfy.utils import comfy.model_management import comfy.model_base @@ -96,12 +97,14 @@ def load_lora(lora, to_load, log_missing=True): def model_lora_keys_clip(model, key_map={}): sdk = model.state_dict().keys() + prefix_set = set() for k in sdk: if k.endswith(".weight"): key_map["text_encoders.{}".format(k[:-len(".weight")])] = k #generic lora format without any weird key names tp = k.find(".transformer.") #also map without wrapper prefix for composite text encoder models if tp > 0 and not k.startswith("clip_"): key_map["text_encoders.{}".format(k[tp + 1:-len(".weight")])] = k + prefix_set.add(k.split('.')[0]) text_model_lora_key = "lora_te_text_model_encoder_layers_{}_{}" clip_l_present = False @@ -162,6 +165,13 @@ def model_lora_keys_clip(model, key_map={}): lora_key = "lora_te1_{}".format(l_key.replace(".", "_")) key_map[lora_key] = k + if len(prefix_set) == 1: + full_prefix = "{}.transformer.model.".format(next(iter(prefix_set))) # kohya anima and maybe other single TE models that use a single llama arch based te + for k in sdk: + if k.endswith(".weight"): + if k.startswith(full_prefix): + l_key = k[len(full_prefix):-len(".weight")] + key_map["lora_te_{}".format(l_key.replace(".", "_"))] = k k = "clip_g.transformer.text_projection.weight" if k in sdk: @@ -342,6 +352,12 @@ def model_lora_keys_unet(model, key_map={}): key_map["base_model.model.{}".format(key_lora)] = k # Official base model loras key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k # LyCORIS/LoKR format + if isinstance(model, comfy.model_base.ErnieImage): + for k in sdk: + if k.startswith("diffusion_model.") and k.endswith(".weight"): + key_lora = k[len("diffusion_model."):-len(".weight")] + key_map["transformer.{}".format(key_lora)] = k + return key_map @@ -467,3 +483,17 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori weight = old_weight return weight + +def prefetch_prepared_value(value, allocate_buffer, stream): + if isinstance(value, torch.Tensor): + dest = allocate_buffer(comfy.memory_management.vram_aligned_size(value)) + comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream) + return comfy.memory_management.interpret_gathered_like([value], dest)[0] + elif isinstance(value, weight_adapter.WeightAdapterBase): + return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, allocate_buffer, stream)) + elif isinstance(value, tuple): + return tuple(prefetch_prepared_value(item, allocate_buffer, stream) for item in value) + elif isinstance(value, list): + return [prefetch_prepared_value(item, allocate_buffer, stream) for item in value] + + return value diff --git a/comfy/model_base.py b/comfy/model_base.py index c2ae646aa..c22705655 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -42,6 +42,8 @@ import comfy.ldm.cosmos.predict2 import comfy.ldm.lumina.model import comfy.ldm.wan.model import comfy.ldm.wan.model_animate +import comfy.ldm.wan.ar_model +import comfy.ldm.wan.model_wandancer import comfy.ldm.hunyuan3d.model import comfy.ldm.hidream.model import comfy.ldm.chroma.model @@ -52,7 +54,12 @@ import comfy.ldm.qwen_image.model import comfy.ldm.kandinsky5.model import comfy.ldm.anima.model import comfy.ldm.ace.ace_step15 +import comfy.ldm.cogvideo.model import comfy.ldm.rt_detr.rtdetr_v4 +import comfy.ldm.ernie.model +import comfy.ldm.sam3.detector +import comfy.ldm.hidream_o1.model +from comfy.ldm.hidream_o1.conditioning import build_extra_conds import comfy.model_management import comfy.patcher_extension @@ -79,6 +86,7 @@ class ModelType(Enum): IMG_TO_IMG = 9 FLOW_COSMOS = 10 IMG_TO_IMG_FLOW = 11 + V_PREDICTION_DDPM = 12 def model_sampling(model_config, model_type): @@ -113,6 +121,8 @@ def model_sampling(model_config, model_type): s = comfy.model_sampling.ModelSamplingCosmosRFlow elif model_type == ModelType.IMG_TO_IMG_FLOW: c = comfy.model_sampling.IMG_TO_IMG_FLOW + elif model_type == ModelType.V_PREDICTION_DDPM: + c = comfy.model_sampling.V_PREDICTION_DDPM class ModelSampling(s, c): pass @@ -208,6 +218,11 @@ class BaseModel(torch.nn.Module): if "latent_shapes" in extra_conds: xc = utils.unpack_latents(xc, extra_conds.pop("latent_shapes")) + transformer_options = transformer_options.copy() + transformer_options["prefetch_dynamic_vbars"] = ( + self.current_patcher is not None and self.current_patcher.is_dynamic() + ) + model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds) if len(model_output) > 1 and not torch.is_tensor(model_output): model_output, _ = utils.pack_latents(model_output) @@ -577,8 +592,8 @@ class Stable_Zero123(BaseModel): def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None): super().__init__(model_config, model_type, device=device) self.cc_projection = comfy.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device) - self.cc_projection.weight.copy_(cc_projection_weight) - self.cc_projection.bias.copy_(cc_projection_bias) + self.cc_projection.weight = torch.nn.Parameter(cc_projection_weight.clone()) + self.cc_projection.bias = torch.nn.Parameter(cc_projection_bias.clone()) def extra_conds(self, **kwargs): out = {} @@ -1354,6 +1369,13 @@ class WAN21(BaseModel): return out +class WAN21_CausalAR(WAN21): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super(WAN21, self).__init__(model_config, model_type, device=device, + unet_model=comfy.ldm.wan.ar_model.CausalWanModel) + self.image_to_video = False + + class WAN21_Vace(WAN21): def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None): super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.VaceWanModel) @@ -1580,6 +1602,30 @@ class WAN21_SCAIL(WAN21): return out +class WAN22_WanDancer(WAN21): + def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=True, device=None): + super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model_wandancer.WanDancerModel) + self.image_to_video = image_to_video + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + audio_embed = kwargs.get("audio_embed", None) + if audio_embed is not None: + out['audio_embed'] = comfy.conds.CONDRegular(audio_embed) + + clip_vision_output_ref = kwargs.get("clip_vision_output_ref", None) + if clip_vision_output_ref is not None: + out['clip_fea_ref'] = comfy.conds.CONDRegular(clip_vision_output_ref.penultimate_hidden_states) + + fps = kwargs.get("fps", None) + if fps is not None: + out['fps'] = comfy.conds.CONDRegular(torch.FloatTensor([fps])) + + audio_inject_scale = kwargs.get("audio_inject_scale", None) + if audio_inject_scale is not None: + out['audio_inject_scale'] = comfy.conds.CONDRegular(torch.FloatTensor([audio_inject_scale])) + return out + class Hunyuan3Dv2(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2) @@ -1630,6 +1676,39 @@ class HiDream(BaseModel): out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond)) return out +class HiDreamO1(BaseModel): + """HiDream-O1-Image: pixel-space DiT (no VAE). Refs from HiDreamO1ReferenceImages and tokens from the stub TE flow through + extra_conds; the heavy preprocessing lives in comfy.ldm.hidream_o1.conditioning.""" + PATCH_SIZE = 32 + + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hidream_o1.model.HiDreamO1Transformer) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + text_input_ids = kwargs.get("text_input_ids", None) + noise = kwargs.get("noise", None) + if text_input_ids is None or noise is None: + return out + + # handle area conds + area = kwargs.get("area", None) + if area is not None: + crop_h = min(noise.shape[-2] - area[2], area[0]) + crop_w = min(noise.shape[-1] - area[3], area[1]) + noise = torch.empty((noise.shape[0], 3, crop_h, crop_w), dtype=noise.dtype, device=noise.device) + + conds = build_extra_conds( + text_input_ids, noise, + ref_images=kwargs.get("reference_latents", None), + target_patch_size=self.PATCH_SIZE, + ) + for k, v in conds.items(): + # ar_len is a Python int (precomputed to avoid a GPU sync in forward). + cls = comfy.conds.CONDConstant if k == "ar_len" else comfy.conds.CONDRegular + out[k] = cls(v) + return out + class Chroma(Flux): def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.chroma.model.Chroma): super().__init__(model_config, model_type, device=device, unet_model=unet_model) @@ -1962,3 +2041,74 @@ class Kandinsky5Image(Kandinsky5): class RT_DETR_v4(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.rt_detr.rtdetr_v4.RTv4) + +class ErnieImage(BaseModel): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ernie.model.ErnieImageModel) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) + return out + +class SAM3(BaseModel): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.sam3.detector.SAM3Model) + +class CogVideoX(BaseModel): + def __init__(self, model_config, model_type=ModelType.V_PREDICTION_DDPM, image_to_video=False, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.cogvideo.model.CogVideoXTransformer3DModel) + self.image_to_video = image_to_video + + def concat_cond(self, **kwargs): + noise = kwargs.get("noise", None) + # Detect extra channels needed (e.g. 32 - 16 = 16 for ref latent) + extra_channels = self.diffusion_model.in_channels - noise.shape[1] + if extra_channels == 0: + return None + + image = kwargs.get("concat_latent_image", None) + device = kwargs["device"] + + if image is None: + shape = list(noise.shape) + shape[1] = extra_channels + return torch.zeros(shape, dtype=noise.dtype, layout=noise.layout, device=noise.device) + + latent_dim = self.latent_format.latent_channels + image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center") + + if noise.ndim == 5 and image.ndim == 5: + if image.shape[-3] < noise.shape[-3]: + image = torch.nn.functional.pad(image, (0, 0, 0, 0, 0, noise.shape[-3] - image.shape[-3]), "constant", 0) + elif image.shape[-3] > noise.shape[-3]: + image = image[:, :, :noise.shape[-3]] + + for i in range(0, image.shape[1], latent_dim): + image[:, i:i + latent_dim] = self.process_latent_in(image[:, i:i + latent_dim]) + image = utils.resize_to_batch_size(image, noise.shape[0]) + + if image.shape[1] > extra_channels: + image = image[:, :extra_channels] + elif image.shape[1] < extra_channels: + repeats = extra_channels // image.shape[1] + remainder = extra_channels % image.shape[1] + parts = [image] * repeats + if remainder > 0: + parts.append(image[:, :remainder]) + image = torch.cat(parts, dim=1) + + return image + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + # OFS embedding (CogVideoX 1.5 I2V), default 2.0 as used by SparkVSR + if self.diffusion_model.ofs_proj_dim is not None: + ofs = kwargs.get("ofs", None) + if ofs is None: + noise = kwargs.get("noise", None) + ofs = torch.full((noise.shape[0],), 2.0, device=noise.device, dtype=noise.dtype) + out['ofs'] = comfy.conds.CONDRegular(ofs) + return out diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 1c8ae2325..bc0b933bc 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -490,6 +490,54 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): return dit_config + if '{}blocks.0.norm1.linear.weight'.format(key_prefix) in state_dict_keys: # CogVideoX + dit_config = {} + dit_config["image_model"] = "cogvideox" + + # Extract config from weight shapes + norm1_weight = state_dict['{}blocks.0.norm1.linear.weight'.format(key_prefix)] + time_embed_dim = norm1_weight.shape[1] + dim = norm1_weight.shape[0] // 6 + + dit_config["num_attention_heads"] = dim // 64 + dit_config["attention_head_dim"] = 64 + dit_config["time_embed_dim"] = time_embed_dim + dit_config["num_layers"] = count_blocks(state_dict_keys, '{}blocks.'.format(key_prefix) + '{}.') + + # Detect in_channels from patch_embed + patch_proj_key = '{}patch_embed.proj.weight'.format(key_prefix) + if patch_proj_key in state_dict_keys: + w = state_dict[patch_proj_key] + if w.ndim == 4: + # Conv2d: [out, in, kh, kw] — CogVideoX 1.0 + dit_config["in_channels"] = w.shape[1] + dit_config["patch_size"] = w.shape[2] + elif w.ndim == 2: + # Linear: [out, in_channels * patch_size * patch_size * patch_size_t] — CogVideoX 1.5 + dit_config["patch_size"] = 2 + dit_config["patch_size_t"] = 2 + dit_config["in_channels"] = w.shape[1] // (2 * 2 * 2) # 256 // 8 = 32 + + text_proj_key = '{}patch_embed.text_proj.weight'.format(key_prefix) + if text_proj_key in state_dict_keys: + dit_config["text_embed_dim"] = state_dict[text_proj_key].shape[1] + + # Detect OFS embedding + ofs_key = '{}ofs_embedding_linear_1.weight'.format(key_prefix) + if ofs_key in state_dict_keys: + dit_config["ofs_embed_dim"] = state_dict[ofs_key].shape[1] + + # Detect positional embedding type + pos_key = '{}patch_embed.pos_embedding'.format(key_prefix) + if pos_key in state_dict_keys: + dit_config["use_learned_positional_embeddings"] = True + dit_config["use_rotary_positional_embeddings"] = False + else: + dit_config["use_learned_positional_embeddings"] = False + dit_config["use_rotary_positional_embeddings"] = True + + return dit_config + if '{}head.modulation'.format(key_prefix) in state_dict_keys: # Wan 2.1 dit_config = {} dit_config["image_model"] = "wan2.1" @@ -524,6 +572,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["model_type"] = "animate" elif '{}patch_embedding_pose.weight'.format(key_prefix) in state_dict_keys: dit_config["model_type"] = "scail" + elif '{}patch_embedding_global.weight'.format(key_prefix) in state_dict_keys: + dit_config["model_type"] = "wandancer" else: if '{}img_emb.proj.0.bias'.format(key_prefix) in state_dict_keys: dit_config["model_type"] = "i2v" @@ -570,6 +620,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["guidance_cond_proj_dim"] = None#f"{key_prefix}t_embedder.cond_proj.weight" in state_dict_keys return dit_config + if '{}t_embedder1.mlp.0.weight'.format(key_prefix) in state_dict_keys and '{}x_embedder.proj1.weight'.format(key_prefix) in state_dict_keys: # HiDream-O1 + return {"image_model": "hidream_o1"} + if '{}caption_projection.0.linear.weight'.format(key_prefix) in state_dict_keys: # HiDream dit_config = {} dit_config["image_model"] = "hidream" @@ -696,6 +749,15 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): if '{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix) in state_dict_keys: dit_config = {} dit_config["audio_model"] = "ace1.5" + head_dim = 128 + dit_config["hidden_size"] = state_dict['{}decoder.layers.0.self_attn_norm.weight'.format(key_prefix)].shape[0] + dit_config["intermediate_size"] = state_dict['{}decoder.layers.0.mlp.gate_proj.weight'.format(key_prefix)].shape[0] + dit_config["num_heads"] = state_dict['{}decoder.layers.0.self_attn.q_proj.weight'.format(key_prefix)].shape[0] // head_dim + + dit_config["encoder_hidden_size"] = state_dict['{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix)].shape[0] + dit_config["encoder_num_heads"] = state_dict['{}encoder.lyric_encoder.layers.0.self_attn.q_proj.weight'.format(key_prefix)].shape[0] // head_dim + dit_config["encoder_intermediate_size"] = state_dict['{}encoder.lyric_encoder.layers.0.mlp.gate_proj.weight'.format(key_prefix)].shape[0] + dit_config["num_dit_layers"] = count_blocks(state_dict_keys, '{}decoder.layers.'.format(key_prefix) + '{}.') return dit_config if '{}encoder.pan_blocks.1.cv4.conv.weight'.format(key_prefix) in state_dict_keys: # RT-DETR_v4 @@ -704,6 +766,19 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["enc_h"] = state_dict['{}encoder.pan_blocks.1.cv4.conv.weight'.format(key_prefix)].shape[0] return dit_config + if '{}layers.0.mlp.linear_fc2.weight'.format(key_prefix) in state_dict_keys: # Ernie Image + dit_config = {} + dit_config["image_model"] = "ernie" + return dit_config + + if 'detector.backbone.vision_backbone.trunk.blocks.0.attn.qkv.weight' in state_dict_keys: # SAM3 / SAM3.1 + if 'detector.transformer.decoder.query_embed.weight' in state_dict_keys: + dit_config = {} + dit_config["image_model"] = "SAM3" + if 'detector.backbone.vision_backbone.propagation_convs.0.conv_1x1.weight' in state_dict_keys: + dit_config["image_model"] = "SAM31" + return dit_config + if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys: return None @@ -859,6 +934,10 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal return model_config def unet_prefix_from_state_dict(state_dict): + # SAM3: detector.* and tracker.* at top level, no common prefix + if any(k.startswith("detector.") for k in state_dict) and any(k.startswith("tracker.") for k in state_dict): + return "" + candidates = ["model.diffusion_model.", #ldm/sgm models "model.model.", #audio models "net.", #cosmos diff --git a/comfy/model_management.py b/comfy/model_management.py index 0eebf1ded..21738a4c7 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -31,6 +31,7 @@ from contextlib import nullcontext import comfy.memory_management import comfy.utils import comfy.quant_ops +import comfy_aimdo.vram_buffer class VRAMState(Enum): DISABLED = 0 #No vram present: no need to move models to vram @@ -112,10 +113,6 @@ if args.directml is not None: # torch_directml.disable_tiled_resources(True) lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default. -try: - import intel_extension_for_pytorch as ipex # noqa: F401 -except: - pass try: _ = torch.xpu.device_count() @@ -583,9 +580,6 @@ class LoadedModel: real_model = self.model.model - if is_intel_xpu() and not args.disable_ipex_optimize and 'ipex' in globals() and real_model is not None: - with torch.no_grad(): - real_model = ipex.optimize(real_model.eval(), inplace=True, graph_mode=True, concat_linear=True) self.real_model = weakref.ref(real_model) self.model_finalizer = weakref.finalize(real_model, cleanup_models) @@ -663,6 +657,7 @@ def minimum_inference_memory(): def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0): cleanup_models_gc() + comfy.memory_management.extra_ram_release(max(pins_required, ram_required)) unloaded_model = [] can_unload = [] unloaded_models = [] @@ -726,13 +721,15 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu else: minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory()) - models_temp = set() + # Order-preserving dedup. A plain set() would randomize iteration order across runs + models_temp = {} for m in models: - models_temp.add(m) + models_temp[m] = None for mm in m.model_patches_models(): - models_temp.add(mm) + models_temp[mm] = None - models = models_temp + models = list(models_temp) + models.reverse() models_to_load = [] @@ -1181,6 +1178,10 @@ stream_counters = {} STREAM_CAST_BUFFERS = {} LARGEST_CASTED_WEIGHT = (None, 0) +STREAM_AIMDO_CAST_BUFFERS = {} +LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) + +DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3 def get_cast_buffer(offload_stream, device, size, ref): global LARGEST_CASTED_WEIGHT @@ -1214,13 +1215,26 @@ def get_cast_buffer(offload_stream, device, size, ref): return cast_buffer +def get_aimdo_cast_buffer(offload_stream, device): + cast_buffer = STREAM_AIMDO_CAST_BUFFERS.get(offload_stream, None) + if cast_buffer is None: + cast_buffer = comfy_aimdo.vram_buffer.VRAMBuffer(DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE, device.index) + STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer + + return cast_buffer def reset_cast_buffers(): global LARGEST_CASTED_WEIGHT + global LARGEST_AIMDO_CASTED_WEIGHT + LARGEST_CASTED_WEIGHT = (None, 0) - for offload_stream in STREAM_CAST_BUFFERS: - offload_stream.synchronize() + LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) + for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS): + if offload_stream is not None: + offload_stream.synchronize() synchronize() + STREAM_CAST_BUFFERS.clear() + STREAM_AIMDO_CAST_BUFFERS.clear() soft_empty_cache() def get_offload_stream(device): @@ -1580,10 +1594,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma return False if is_intel_xpu(): - if torch_version_numeric < (2, 3): - return True - else: - return torch.xpu.get_device_properties(device).has_fp16 + return torch.xpu.get_device_properties(device).has_fp16 if is_ascend_npu(): return True @@ -1649,10 +1660,7 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma return False if is_intel_xpu(): - if torch_version_numeric < (2, 3): - return True - else: - return torch.xpu.is_bf16_supported() + return torch.xpu.is_bf16_supported() if is_ascend_npu(): return True @@ -1732,6 +1740,21 @@ def supports_mxfp8_compute(device=None): return True +def supports_fp64(device=None): + if is_device_mps(device): + return False + + if is_intel_xpu(): + return False + + if is_directml_enabled(): + return False + + if is_ixuca(): + return False + + return True + def extended_fp16_support(): # TODO: check why some models work with fp16 on newer torch versions but not on older if torch_version_numeric < (2, 7): @@ -1768,6 +1791,7 @@ def soft_empty_cache(force=False): if cpu_state == CPUState.MPS: torch.mps.empty_cache() elif is_intel_xpu(): + torch.xpu.synchronize() torch.xpu.empty_cache() elif is_ascend_npu(): torch.npu.empty_cache() @@ -1786,7 +1810,7 @@ def debug_memory_summary(): return torch.cuda.memory.memory_summary() return "" -class InterruptProcessingException(Exception): +class InterruptProcessingException(BaseException): pass interrupt_processing_mutex = threading.RLock() diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 6deb71e12..2ea14bc2c 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -26,11 +26,13 @@ import uuid from typing import Callable, Optional import torch +import tqdm import comfy.float import comfy.hooks import comfy.lora import comfy.model_management +import comfy.ops import comfy.patcher_extension import comfy.utils from comfy.comfy_types import UnetWrapperFunction @@ -120,9 +122,20 @@ class LowVramPatch: self.patches = patches self.convert_func = convert_func # TODO: remove self.set_func = set_func + self.prepared_patches = None + + def prepare(self, allocate_buffer, stream): + self.prepared_patches = [ + (patch[0], comfy.lora.prefetch_prepared_value(patch[1], allocate_buffer, stream), patch[2], patch[3], patch[4]) + for patch in self.patches[self.key] + ] + + def clear_prepared(self): + self.prepared_patches = None def __call__(self, weight): - return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=weight.dtype) + patches = self.prepared_patches if self.prepared_patches is not None else self.patches[self.key] + return comfy.lora.calculate_weight(patches, weight, self.key, intermediate_dtype=weight.dtype) LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR = 2 @@ -229,6 +242,37 @@ class LazyCastingParam(torch.nn.Parameter): return self.model.patch_weight_to_device(self.key, device_to=self.model.load_device, return_weight=True).to("cpu") +class LazyCastingQuantizedParam: + def __init__(self, model, key): + self.model = model + self.key = key + self.cpu_state_dict = None + + def state_dict_tensor(self, state_dict_key): + if self.cpu_state_dict is None: + weight = self.model.patch_weight_to_device(self.key, device_to=self.model.load_device, return_weight=True) + self.cpu_state_dict = {k: v.to("cpu") for k, v in weight.state_dict(self.key).items()} + return self.cpu_state_dict[state_dict_key] + + +class LazyCastingParamPiece(torch.nn.Parameter): + def __new__(cls, caster, state_dict_key, tensor): + return super().__new__(cls, tensor) + + def __init__(self, caster, state_dict_key, tensor): + self.caster = caster + self.state_dict_key = state_dict_key + + @property + def device(self): + return CustomTorchDevice + + def to(self, *args, **kwargs): + caster = self.caster + del self.caster + return caster.state_dict_tensor(self.state_dict_key) + + class ModelPatcher: def __init__(self, model, load_device, offload_device, size=0, weight_inplace_update=False): self.size = size @@ -506,6 +550,10 @@ class ModelPatcher: def set_model_noise_refiner_patch(self, patch): self.set_model_patch(patch, "noise_refiner") + def set_model_middle_block_after_patch(self, patch): + self.set_model_patch(patch, "middle_block_after_patch") + + def set_model_rope_options(self, scale_x, shift_x, scale_y, shift_y, scale_t, shift_t, **kwargs): rope_options = self.model_options["transformer_options"].get("rope_options", {}) rope_options["scale_x"] = scale_x @@ -681,9 +729,9 @@ class ModelPatcher: sd.pop(k) return sd - def patch_weight_to_device(self, key, device_to=None, inplace_update=False, return_weight=False): + def patch_weight_to_device(self, key, device_to=None, inplace_update=False, return_weight=False, force_cast=False): weight, set_func, convert_func = get_key_weight(self.model, key) - if key not in self.patches: + if key not in self.patches and not force_cast: return weight inplace_update = self.weight_inplace_update or inplace_update @@ -691,7 +739,7 @@ class ModelPatcher: if key not in self.backup and not return_weight: self.backup[key] = collections.namedtuple('Dimension', ['weight', 'inplace_update'])(weight.to(device=self.offload_device, copy=inplace_update), inplace_update) - temp_dtype = comfy.model_management.lora_compute_dtype(device_to) + temp_dtype = comfy.model_management.lora_compute_dtype(device_to) if key in self.patches else None if device_to is not None: temp_weight = comfy.model_management.cast_to_device(weight, device_to, temp_dtype, copy=True) else: @@ -699,9 +747,10 @@ class ModelPatcher: if convert_func is not None: temp_weight = convert_func(temp_weight, inplace=True) - out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key) + out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key) if key in self.patches else temp_weight if set_func is None: - out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key)) + if key in self.patches: + out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key)) if return_weight: return out_weight elif inplace_update: @@ -851,7 +900,9 @@ class ModelPatcher: if m.comfy_patched_weights == True: continue - for param in params: + for param, param_value in params.items(): + if hasattr(m, "comfy_cast_weights") and getattr(param_value, "is_meta", False): + comfy.ops.disable_weight_init._zero_init_parameter(m, param) key = key_param_name_to_key(n, param) self.unpin_weight(key) self.patch_weight_to_device(key, device_to=device_to) @@ -1443,20 +1494,37 @@ class ModelPatcher: self.clear_cached_hook_weights() def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None): - unet_state_dict = self.model.diffusion_model.state_dict() - for k, v in unet_state_dict.items(): + original_state_dict = self.model.diffusion_model.state_dict() + unet_state_dict = {} + keys = list(original_state_dict) + while len(keys) > 0: + k = keys.pop(0) + v = original_state_dict[k] op_keys = k.rsplit('.', 1) if (len(op_keys) < 2) or op_keys[1] not in ["weight", "bias"]: + unet_state_dict[k] = v continue try: op = comfy.utils.get_attr(self.model.diffusion_model, op_keys[0]) except: + unet_state_dict[k] = v continue if not op or not hasattr(op, "comfy_cast_weights") or \ (hasattr(op, "comfy_patched_weights") and op.comfy_patched_weights == True): + unet_state_dict[k] = v continue key = "diffusion_model." + k - unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key)) + weight = comfy.utils.get_attr(self.model, key) + if isinstance(weight, QuantizedTensor) and k in original_state_dict: + qt_state_dict = weight.state_dict(k) + caster = LazyCastingQuantizedParam(self, key) + for group_key in (x for x in qt_state_dict if x in original_state_dict): + if group_key in keys: + keys.remove(group_key) + unet_state_dict.pop(group_key, "") + unet_state_dict[group_key] = LazyCastingParamPiece(caster, "diffusion_model." + group_key, original_state_dict[group_key]) + continue + unet_state_dict[k] = LazyCastingParam(self, key, weight) return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict) def __del__(self): @@ -1580,7 +1648,7 @@ class ModelPatcherDynamic(ModelPatcher): key = key_param_name_to_key(n, param_key) if key in self.backup: comfy.utils.set_attr_param(self.model, key, self.backup[key].weight) - self.patch_weight_to_device(key, device_to=device_to) + self.patch_weight_to_device(key, device_to=device_to, force_cast=True) weight, _, _ = get_key_weight(self.model, key) if weight is not None: self.model.model_loaded_weight_memory += weight.numel() * weight.element_size() @@ -1605,6 +1673,10 @@ class ModelPatcherDynamic(ModelPatcher): m._v = vbar.alloc(v_weight_size) allocated_size += v_weight_size + for param in params: + if param not in ("weight", "bias"): + force_load_param(self, param, device_to) + else: for param in params: key = key_param_name_to_key(n, param) @@ -1628,7 +1700,11 @@ class ModelPatcherDynamic(ModelPatcher): self.model.model_loaded_weight_memory += casted_buf.numel() * casted_buf.element_size() force_load_stat = f" Force pre-loaded {len(self.backup)} weights: {self.model.model_loaded_weight_memory // 1024} KB." if len(self.backup) > 0 else "" - logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.{force_load_stat}") + log_key = (self.patches_uuid, allocated_size, num_patches, len(self.backup), self.model.model_loaded_weight_memory) + in_loop = bool(getattr(tqdm.tqdm, "_instances", None)) + level = logging.DEBUG if in_loop and getattr(self, "_last_prepare_log_key", None) == log_key else logging.INFO + self._last_prepare_log_key = log_key + logging.log(level, f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.{force_load_stat}") self.model.device = device_to self.model.current_weight_patches_uuid = self.patches_uuid diff --git a/comfy/model_prefetch.py b/comfy/model_prefetch.py new file mode 100644 index 000000000..72e11dec6 --- /dev/null +++ b/comfy/model_prefetch.py @@ -0,0 +1,66 @@ +import comfy_aimdo.model_vbar +import comfy.model_management +import comfy.ops + +PREFETCH_QUEUES = [] + +def cleanup_prefetched_modules(comfy_modules): + for s in comfy_modules: + prefetch = getattr(s, "_prefetch", None) + if prefetch is None: + continue + for param_key in ("weight", "bias"): + lowvram_fn = getattr(s, param_key + "_lowvram_function", None) + if lowvram_fn is not None: + lowvram_fn.clear_prepared() + if prefetch["signature"] is not None: + comfy_aimdo.model_vbar.vbar_unpin(s._v) + delattr(s, "_prefetch") + +def cleanup_prefetch_queues(): + global PREFETCH_QUEUES + + for queue in PREFETCH_QUEUES: + for entry in queue: + if entry is None or not isinstance(entry, tuple): + continue + _, prefetch_state = entry + comfy_modules = prefetch_state[1] + if comfy_modules is not None: + cleanup_prefetched_modules(comfy_modules) + PREFETCH_QUEUES = [] + +def prefetch_queue_pop(queue, device, module): + if queue is None: + return + + consumed = queue.pop(0) + if consumed is not None: + offload_stream, prefetch_state = consumed + if offload_stream is not None: + offload_stream.wait_stream(comfy.model_management.current_stream(device)) + _, comfy_modules = prefetch_state + if comfy_modules is not None: + cleanup_prefetched_modules(comfy_modules) + + prefetch = queue[0] + if prefetch is not None: + comfy_modules = [] + for s in prefetch.modules(): + if hasattr(s, "_v"): + comfy_modules.append(s) + + offload_stream = comfy.ops.cast_modules_with_vbar(comfy_modules, None, device, None, True) + comfy.model_management.sync_stream(device, offload_stream) + queue[0] = (offload_stream, (prefetch, comfy_modules)) + +def make_prefetch_queue(queue, device, transformer_options): + if (not transformer_options.get("prefetch_dynamic_vbars", False) + or comfy.model_management.NUM_STREAMS == 0 + or comfy.model_management.is_device_cpu(device) + or not comfy.model_management.device_supports_non_blocking(device)): + return None + + queue = [None] + queue + [None] + PREFETCH_QUEUES.append(queue) + return queue diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py index 13860e6a2..5af336e76 100644 --- a/comfy/model_sampling.py +++ b/comfy/model_sampling.py @@ -54,6 +54,30 @@ class V_PREDICTION(EPS): sigma = reshape_sigma(sigma, model_output.ndim) return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 +class V_PREDICTION_DDPM: + """CogVideoX v-prediction: model receives raw x_t (unscaled), predicts velocity v. + x_0 = sqrt(alpha) * x_t - sqrt(1-alpha) * v + = x_t / sqrt(sigma^2 + 1) - v * sigma / sqrt(sigma^2 + 1) + """ + def calculate_input(self, sigma, noise): + return noise + + def calculate_denoised(self, sigma, model_output, model_input): + sigma = reshape_sigma(sigma, model_output.ndim) + return model_input / (sigma ** 2 + 1.0) ** 0.5 - model_output * sigma / (sigma ** 2 + 1.0) ** 0.5 + + def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): + sigma = reshape_sigma(sigma, noise.ndim) + if max_denoise: + noise = noise * torch.sqrt(1.0 + sigma ** 2.0) + else: + noise = noise * sigma + noise += latent_image + return noise + + def inverse_noise_scaling(self, sigma, latent): + return latent + class EDM(V_PREDICTION): def calculate_denoised(self, sigma, model_output, model_input): sigma = reshape_sigma(sigma, model_output.ndim) @@ -69,7 +93,8 @@ class CONST: def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): sigma = reshape_sigma(sigma, noise.ndim) - return sigma * noise + (1.0 - sigma) * latent_image + s = getattr(self, "noise_scale", 1.0) + return sigma * (s * noise) + (1.0 - sigma) * latent_image def inverse_noise_scaling(self, sigma, latent): sigma = reshape_sigma(sigma, latent.ndim) @@ -264,7 +289,11 @@ class ModelSamplingDiscreteFlow(torch.nn.Module): else: sampling_settings = {} - self.set_parameters(shift=sampling_settings.get("shift", 1.0), multiplier=sampling_settings.get("multiplier", 1000)) + self.set_noise_scale(sampling_settings.get("noise_scale", 1.0)) + self.set_parameters( + shift=sampling_settings.get("shift", 1.0), + multiplier=sampling_settings.get("multiplier", 1000), + ) def set_parameters(self, shift=1.0, timesteps=1000, multiplier=1000): self.shift = shift @@ -272,6 +301,9 @@ class ModelSamplingDiscreteFlow(torch.nn.Module): ts = self.sigma((torch.arange(1, timesteps + 1, 1) / timesteps) * multiplier) self.register_buffer('sigmas', ts) + def set_noise_scale(self, noise_scale): + self.noise_scale = float(noise_scale) + @property def sigma_min(self): return self.sigmas[0] diff --git a/comfy/ops.py b/comfy/ops.py index b5cd1d47e..f9456854b 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -79,37 +79,68 @@ def cast_to_input(weight, input, non_blocking=False, copy=True): return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) -def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): +def materialize_meta_param(s, param_keys): + for param_key in param_keys: + param = getattr(s, param_key, None) + if param is not None and getattr(param, "is_meta", False): + setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad)) - #vbar doesn't support CPU weights, but some custom nodes have weird paths - #that might switch the layer to the CPU and expect it to work. We have to take - #a clone conservatively as we are mmapped and some SFT files are packed misaligned - #If you are a custom node author reading this, please move your layer to the GPU - #or declare your ModelPatcher as CPU in the first place. - if comfy.model_management.is_device_cpu(device): - weight = s.weight.to(dtype=dtype, copy=True) - if isinstance(weight, QuantizedTensor): - weight = weight.dequantize() - bias = None - if s.bias is not None: - bias = s.bias.to(dtype=bias_dtype, copy=True) - return weight, bias, (None, None, None) +# FIXME: add n=1 cache hit fast path +def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blocking): offload_stream = None - xfer_dest = None + cast_buffer = None + cast_buffer_offset = 0 + + def ensure_offload_stream(module, required_size, check_largest): + nonlocal offload_stream + nonlocal cast_buffer + + if offload_stream is None: + offload_stream = comfy.model_management.get_offload_stream(device) + if offload_stream is None or not check_largest or len(comfy_modules) != 1: + return + + current_size = 0 if cast_buffer is None else cast_buffer.size() + if current_size < required_size and module is comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT[0]: + offload_stream = comfy.model_management.get_offload_stream(device) + cast_buffer = None + if required_size > comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT[1]: + comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT = (module, required_size) + + def get_cast_buffer(buffer_size): + nonlocal offload_stream + nonlocal cast_buffer + nonlocal cast_buffer_offset + + if buffer_size == 0: + return None + + if offload_stream is None: + return torch.empty((buffer_size,), dtype=torch.uint8, device=device) + + cast_buffer = comfy.model_management.get_aimdo_cast_buffer(offload_stream, device) + buffer = comfy_aimdo.torch.aimdo_to_tensor(cast_buffer.get(buffer_size, cast_buffer_offset), device) + cast_buffer_offset += buffer_size + return buffer + + for s in comfy_modules: + signature = comfy_aimdo.model_vbar.vbar_fault(s._v) + resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature) + prefetch = { + "signature": signature, + "resident": resident, + } - signature = comfy_aimdo.model_vbar.vbar_fault(s._v) - resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature) - if signature is not None: if resident: - weight = s._v_weight - bias = s._v_bias - else: - xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) + s._prefetch = prefetch + continue - if not resident: + materialize_meta_param(s, ["weight", "bias"]) + xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) if signature is not None else None cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ]) cast_dest = None + needs_cast = False xfer_source = [ s.weight, s.bias ] @@ -121,22 +152,15 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu if data is None: continue if data.dtype != geometry.dtype: + needs_cast = True cast_dest = xfer_dest - if cast_dest is None: - cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device) xfer_dest = None break dest_size = comfy.memory_management.vram_aligned_size(xfer_source) - offload_stream = comfy.model_management.get_offload_stream(device) - if xfer_dest is None and offload_stream is not None: - xfer_dest = comfy.model_management.get_cast_buffer(offload_stream, device, dest_size, s) - if xfer_dest is None: - offload_stream = comfy.model_management.get_offload_stream(device) - xfer_dest = comfy.model_management.get_cast_buffer(offload_stream, device, dest_size, s) + ensure_offload_stream(s, dest_size if xfer_dest is None else 0, True) if xfer_dest is None: - xfer_dest = torch.empty((dest_size,), dtype=torch.uint8, device=device) - offload_stream = None + xfer_dest = get_cast_buffer(dest_size) if signature is None and pin is None: comfy.pinned_memory.pin_memory(s) @@ -149,27 +173,54 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu xfer_source = [ pin ] #send it over comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=offload_stream) - comfy.model_management.sync_stream(device, offload_stream) - if cast_dest is not None: + for param_key in ("weight", "bias"): + lowvram_fn = getattr(s, param_key + "_lowvram_function", None) + if lowvram_fn is not None: + ensure_offload_stream(s, cast_buffer_offset, False) + lowvram_fn.prepare(lambda size: get_cast_buffer(size), offload_stream) + + prefetch["xfer_dest"] = xfer_dest + prefetch["cast_dest"] = cast_dest + prefetch["cast_geometry"] = cast_geometry + prefetch["needs_cast"] = needs_cast + s._prefetch = prefetch + + return offload_stream + + +def resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, want_requant): + + prefetch = getattr(s, "_prefetch", None) + + if prefetch["resident"]: + weight = s._v_weight + bias = s._v_bias + else: + xfer_dest = prefetch["xfer_dest"] + if prefetch["needs_cast"]: + cast_dest = prefetch["cast_dest"] if prefetch["cast_dest"] is not None else torch.empty((comfy.memory_management.vram_aligned_size(prefetch["cast_geometry"]),), dtype=torch.uint8, device=device) for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like([s.weight, s.bias ], xfer_dest), - comfy.memory_management.interpret_gathered_like(cast_geometry, cast_dest)): + comfy.memory_management.interpret_gathered_like(prefetch["cast_geometry"], cast_dest)): if post_cast is not None: post_cast.copy_(pre_cast) xfer_dest = cast_dest - params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest) + params = comfy.memory_management.interpret_gathered_like(prefetch["cast_geometry"], xfer_dest) weight = params[0] bias = params[1] - if signature is not None: + if prefetch["signature"] is not None: s._v_weight = weight s._v_bias = bias - s._v_signature=signature + s._v_signature = prefetch["signature"] def post_cast(s, param_key, x, dtype, resident, update_weight): lowvram_fn = getattr(s, param_key + "_lowvram_function", None) fns = getattr(s, param_key + "_function", []) + if x is None: + return None + orig = x def to_dequant(tensor, dtype): @@ -197,14 +248,15 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu x = f(x) return x - update_weight = signature is not None + update_weight = prefetch["signature"] is not None + weight = post_cast(s, "weight", weight, dtype, prefetch["resident"], update_weight) + if bias is not None: + bias = post_cast(s, "bias", bias, bias_dtype, prefetch["resident"], update_weight) - weight = post_cast(s, "weight", weight, dtype, resident, update_weight) - if s.bias is not None: - bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight) + if prefetch["signature"] is not None: + prefetch["resident"] = True - #FIXME: weird offload return protocol - return weight, bias, (offload_stream, device if signature is not None else None, None) + return weight, bias def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None, want_requant=False): @@ -222,10 +274,46 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of if device is None: device = input.device + def format_return(result, offloadable): + weight, bias, offload_stream = result + return (weight, bias, offload_stream) if offloadable else (weight, bias) + non_blocking = comfy.model_management.device_supports_non_blocking(device) if hasattr(s, "_v"): - return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant) + + #vbar doesn't support CPU weights, but some custom nodes have weird paths + #that might switch the layer to the CPU and expect it to work. We have to take + #a clone conservatively as we are mmapped and some SFT files are packed misaligned + #If you are a custom node author reading this, please move your layer to the GPU + #or declare your ModelPatcher as CPU in the first place. + if comfy.model_management.is_device_cpu(device): + materialize_meta_param(s, ["weight", "bias"]) + weight = s.weight.to(dtype=dtype, copy=True) + if isinstance(weight, QuantizedTensor): + weight = weight.dequantize() + bias = s.bias.to(dtype=bias_dtype, copy=True) if s.bias is not None else None + return format_return((weight, bias, (None, None, None)), offloadable) + + prefetched = hasattr(s, "_prefetch") + offload_stream = None + offload_device = None + if not prefetched: + offload_stream = cast_modules_with_vbar([s], dtype, device, bias_dtype, non_blocking) + comfy.model_management.sync_stream(device, offload_stream) + + weight, bias = resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, want_requant) + + if not prefetched: + if getattr(s, "_prefetch")["signature"] is not None: + offload_device = device + for param_key in ("weight", "bias"): + lowvram_fn = getattr(s, param_key + "_lowvram_function", None) + if lowvram_fn is not None: + lowvram_fn.clear_prepared() + delattr(s, "_prefetch") + return format_return((weight, bias, (offload_stream, offload_device, None)), offloadable) + if offloadable and (device != s.weight.device or (s.bias is not None and device != s.bias.device)): @@ -272,11 +360,7 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of for f in s.weight_function: weight = f(weight) - if offloadable: - return weight, bias, (offload_stream, weight_a, bias_a) - else: - #Legacy function signature - return weight, bias + return format_return((weight, bias, (offload_stream, weight_a, bias_a)), offloadable) def uncast_bias_weight(s, weight, bias, offload_stream): @@ -306,6 +390,12 @@ class CastWeightBiasOp: bias_function = [] class disable_weight_init: + @staticmethod + def _zero_init_parameter(module, name): + param = getattr(module, name) + device = None if getattr(param, "is_meta", False) else param.device + setattr(module, name, torch.nn.Parameter(torch.zeros(param.shape, device=device, dtype=param.dtype), requires_grad=False)) + @staticmethod def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata, missing_keys, unexpected_keys, weight_shape, @@ -472,6 +562,25 @@ class disable_weight_init: else: return super().forward(*args, **kwargs) + class BatchNorm2d(torch.nn.BatchNorm2d, CastWeightBiasOp): + def reset_parameters(self): + return None + + def forward_comfy_cast_weights(self, input): + weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True) + running_mean = self.running_mean.to(device=input.device, dtype=weight.dtype) if self.running_mean is not None else None + running_var = self.running_var.to(device=input.device, dtype=weight.dtype) if self.running_var is not None else None + x = torch.nn.functional.batch_norm(input, running_mean, running_var, weight, bias, self.training, self.momentum, self.eps) + uncast_bias_weight(self, weight, bias, offload_stream) + return x + + def forward(self, *args, **kwargs): + run_every_op() + if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0: + return self.forward_comfy_cast_weights(*args, **kwargs) + else: + return super().forward(*args, **kwargs) + class LayerNorm(torch.nn.LayerNorm, CastWeightBiasOp): def reset_parameters(self): return None @@ -659,6 +768,9 @@ class manual_cast(disable_weight_init): class Conv3d(disable_weight_init.Conv3d): comfy_cast_weights = True + class BatchNorm2d(disable_weight_init.BatchNorm2d): + comfy_cast_weights = True + class GroupNorm(disable_weight_init.GroupNorm): comfy_cast_weights = True @@ -1151,7 +1263,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec if param is None: continue p = fn(param) - if p.is_inference(): + if (not torch.is_inference_mode_enabled()) and p.is_inference(): p = p.clone() self.register_parameter(key, torch.nn.Parameter(p, requires_grad=False)) for key, buf in self._buffers.items(): @@ -1159,6 +1271,94 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec self._buffers[key] = fn(buf) return self + class Embedding(manual_cast.Embedding): + def _load_from_state_dict(self, state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, error_msgs): + weight_key = f"{prefix}weight" + layer_conf = state_dict.pop(f"{prefix}comfy_quant", None) + if layer_conf is not None: + layer_conf = json.loads(layer_conf.numpy().tobytes()) + + # Only fp8 makes sense for embeddings (per-row dequant via index select). + # Block-scaled formats (NVFP4, MXFP8) can't do per-row lookup efficiently. + quant_format = layer_conf.get("format", None) if layer_conf is not None else None + if quant_format in ["float8_e4m3fn", "float8_e5m2"] and weight_key in state_dict: + self.quant_format = quant_format + qconfig = QUANT_ALGOS[quant_format] + self.layout_type = qconfig["comfy_tensor_layout"] + layout_cls = get_layout_class(self.layout_type) + weight = state_dict.pop(weight_key) + manually_loaded_keys = [weight_key] + + scale_key = f"{prefix}weight_scale" + scale = state_dict.pop(scale_key, None) + if scale is not None: + scale = scale.float() + manually_loaded_keys.append(scale_key) + + params = layout_cls.Params( + scale=scale if scale is not None else torch.ones((), dtype=torch.float32), + orig_dtype=MixedPrecisionOps._compute_dtype, + orig_shape=(self.num_embeddings, self.embedding_dim), + ) + self.weight = torch.nn.Parameter( + QuantizedTensor(weight.to(dtype=qconfig["storage_t"]), qconfig["comfy_tensor_layout"], params), + requires_grad=False) + + super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) + for k in manually_loaded_keys: + if k in missing_keys: + missing_keys.remove(k) + else: + if layer_conf is not None: + state_dict[f"{prefix}comfy_quant"] = torch.tensor(list(json.dumps(layer_conf).encode('utf-8')), dtype=torch.uint8) + super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) + + def state_dict(self, *args, destination=None, prefix="", **kwargs): + if destination is not None: + sd = destination + else: + sd = {} + + if not hasattr(self, 'weight') or self.weight is None: + return sd + + if isinstance(self.weight, QuantizedTensor): + sd_out = self.weight.state_dict("{}weight".format(prefix)) + for k in sd_out: + sd[k] = sd_out[k] + + quant_conf = {"format": self.quant_format} + sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8) + else: + sd["{}weight".format(prefix)] = self.weight + return sd + + def forward_comfy_cast_weights(self, input, out_dtype=None): + weight = self.weight + + # Optimized path: lookup in fp8, dequantize only the selected rows. + if isinstance(weight, QuantizedTensor) and len(self.weight_function) == 0: + qdata, _, offload_stream = cast_bias_weight(self, device=input.device, dtype=weight.dtype, offloadable=True) + if isinstance(qdata, QuantizedTensor): + scale = qdata._params.scale + qdata = qdata._qdata + else: + scale = None + + x = torch.nn.functional.embedding( + input, qdata, self.padding_idx, self.max_norm, + self.norm_type, self.scale_grad_by_freq, self.sparse) + uncast_bias_weight(self, qdata, None, offload_stream) + target_dtype = out_dtype if out_dtype is not None else weight._params.orig_dtype + x = x.to(dtype=target_dtype) + if scale is not None and scale != 1.0: + x = x * scale.to(dtype=target_dtype) + return x + + # Fallback for non-quantized or weight_function (LoRA) case + return super().forward_comfy_cast_weights(input, out_dtype=out_dtype) + return MixedPrecisionOps def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None): @@ -1176,6 +1376,7 @@ def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_ if not fp8_compute: disabled.add("float8_e4m3fn") disabled.add("float8_e5m2") + logging.info("Native ops: {} {}".format(", ".join(QUANT_ALGOS.keys() - disabled), ", emulated ops: {}".format(", ".join(disabled)) if len(disabled) > 0 else "")) return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled) if ( diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py index 6f142282d..6d3ba367a 100644 --- a/comfy/pinned_memory.py +++ b/comfy/pinned_memory.py @@ -2,7 +2,6 @@ import comfy.model_management import comfy.memory_management import comfy_aimdo.host_buffer import comfy_aimdo.torch -import psutil from comfy.cli_args import args @@ -12,11 +11,6 @@ def get_pin(module): def pin_memory(module): if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None: return - #FIXME: This is a RAM cache trigger event - ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM - #we split the difference and assume half the RAM cache headroom is for us - if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5): - comfy.memory_management.extra_ram_release(ram_headroom) size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ]) diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 42ee08fb2..b90bcfd25 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -1,6 +1,8 @@ import torch import logging +from comfy.cli_args import args + try: import comfy_kitchen as ck from comfy_kitchen.tensor import ( @@ -21,7 +23,15 @@ try: ck.registry.disable("cuda") logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") - ck.registry.disable("triton") + if args.enable_triton_backend: + try: + import triton + logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__) + except ImportError as e: + logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.") + ck.registry.disable("triton") + else: + ck.registry.disable("triton") for k, v in ck.list_backends().items(): logging.info(f"Found comfy_kitchen backend {k}: {v}") except ImportError as e: diff --git a/comfy/rmsnorm.py b/comfy/rmsnorm.py index ab7cf14fa..e54be98d6 100644 --- a/comfy/rmsnorm.py +++ b/comfy/rmsnorm.py @@ -3,6 +3,7 @@ import comfy.model_management RMSNorm = torch.nn.RMSNorm +# Note: torch's fused F.rms_norm is faster but produces slightly different output than manual implementations (rsqrt/reduction rounding). def rms_norm(x, weight=None, eps=1e-6): if weight is None: return torch.nn.functional.rms_norm(x, (x.shape[-1],), eps=eps) diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py index bbba09e26..3782fd2d5 100644 --- a/comfy/sampler_helpers.py +++ b/comfy/sampler_helpers.py @@ -89,7 +89,8 @@ def get_additional_models(conds, dtype): gligen += get_models_from_cond(conds[k], "gligen") add_models += get_models_from_cond(conds[k], "additional_models") - control_nets = set(cnets) + # Order-preserving dedup. A plain set() would randomize iteration order across runs + control_nets = list(dict.fromkeys(cnets)) inference_memory = 0 control_models = [] diff --git a/comfy/sd.py b/comfy/sd.py index 7425765a4..1391dfad7 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -12,11 +12,13 @@ from .ldm.cascade.stage_c_coder import StageC_coder from .ldm.audio.autoencoder import AudioOobleckVAE import comfy.ldm.genmo.vae.model import comfy.ldm.lightricks.vae.causal_video_autoencoder +import comfy.ldm.lightricks.vae.audio_vae import comfy.ldm.cosmos.vae import comfy.ldm.wan.vae import comfy.ldm.wan.vae2_2 import comfy.ldm.hunyuan3d.vae import comfy.ldm.ace.vae.music_dcae_pipeline +import comfy.ldm.cogvideo.vae import comfy.ldm.hunyuan_video.vae import comfy.ldm.mmaudio.vae.autoencoder import comfy.pixel_space_convert @@ -62,6 +64,9 @@ import comfy.text_encoders.anima import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image import comfy.text_encoders.qwen35 +import comfy.text_encoders.ernie +import comfy.text_encoders.gemma4 +import comfy.text_encoders.cogvideo import comfy.model_patcher import comfy.lora @@ -74,7 +79,7 @@ import comfy.latent_formats import comfy.ldm.flux.redux -def load_lora_for_models(model, clip, lora, strength_model, strength_clip): +def load_lora_for_models(model, clip, lora, strength_model, strength_clip, lora_metadata=None): key_map = {} if model is not None: key_map = comfy.lora.model_lora_keys_unet(model.model, key_map) @@ -86,6 +91,8 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip): if model is not None: new_modelpatcher = model.clone() k = new_modelpatcher.add_patches(loaded, strength_model) + if lora_metadata: + new_modelpatcher.set_attachments("lora_metadata", lora_metadata) else: k = () new_modelpatcher = None @@ -93,6 +100,8 @@ def load_lora_for_models(model, clip, lora, strength_model, strength_clip): if clip is not None: new_clip = clip.clone() k1 = new_clip.add_patches(loaded, strength_clip) + if lora_metadata: + new_clip.patcher.set_attachments("lora_metadata", lora_metadata) else: k1 = () new_clip = None @@ -234,7 +243,8 @@ class CLIP: model_management.archive_model_dtypes(self.cond_stage_model) self.tokenizer = tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data) - ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher + te_disable_dynamic = disable_dynamic or getattr(self.cond_stage_model, "disable_offload", False) + ModelPatcher = comfy.model_patcher.ModelPatcher if te_disable_dynamic else comfy.model_patcher.CoreModelPatcher self.patcher = ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device) #Match torch.float32 hardcode upcast in TE implemention self.patcher.set_model_compute_dtype(torch.float32) @@ -476,7 +486,10 @@ class VAE: encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': encoder_config}, decoder_config={'target': "comfy.ldm.modules.temporal_ae.VideoDecoder", 'params': decoder_config}) elif "taesd_decoder.1.weight" in sd: - self.latent_channels = sd["taesd_decoder.1.weight"].shape[1] + if isinstance(metadata, dict) and "tae_latent_channels" in metadata: + self.latent_channels = metadata["tae_latent_channels"] + else: + self.latent_channels = sd["taesd_decoder.1.weight"].shape[1] self.first_stage_model = comfy.taesd.taesd.TAESD(latent_channels=self.latent_channels) elif "vquantizer.codebook.weight" in sd: #VQGan: stage a of stable cascade self.first_stage_model = StageA() @@ -556,12 +569,19 @@ class VAE: old_memory_used_decode = self.memory_used_decode self.memory_used_decode = lambda shape, dtype: old_memory_used_decode(shape, dtype) * 4.0 + decoder_ch = sd['decoder.conv_in.weight'].shape[0] // ddconfig['ch_mult'][-1] + if decoder_ch != ddconfig['ch']: + decoder_ddconfig = ddconfig.copy() + decoder_ddconfig['ch'] = decoder_ch + else: + decoder_ddconfig = None + if 'post_quant_conv.weight' in sd: - self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1]) + self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1], **({"decoder_ddconfig": decoder_ddconfig} if decoder_ddconfig is not None else {})) else: self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"}, encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig}, - decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig}) + decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': decoder_ddconfig if decoder_ddconfig is not None else ddconfig}) elif "decoder.layers.1.layers.0.beta" in sd: config = {} param_key = None @@ -643,6 +663,17 @@ class VAE: self.memory_used_encode = lambda shape, dtype: (1400 * 9 * shape[-2] * shape[-1]) * model_management.dtype_size(dtype) self.memory_used_decode = lambda shape, dtype: (3600 * 4 * shape[-2] * shape[-1] * 16 * 16) * model_management.dtype_size(dtype) + elif "decoder.conv_in.conv.weight" in sd and "decoder.mid_block.resnets.0.norm1.norm_layer.weight" in sd: # CogVideoX VAE + self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8) + self.upscale_index_formula = (4, 8, 8) + self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8) + self.downscale_index_formula = (4, 8, 8) + self.latent_dim = 3 + self.latent_channels = sd["encoder.conv_out.conv.weight"].shape[0] // 2 + self.first_stage_model = comfy.ldm.cogvideo.vae.AutoencoderKLCogVideoX(latent_channels=self.latent_channels) + self.memory_used_decode = lambda shape, dtype: (2800 * max(2, ((shape[2] - 1) * 4) + 1) * shape[3] * shape[4] * (8 * 8)) * model_management.dtype_size(dtype) + self.memory_used_encode = lambda shape, dtype: (1400 * max(1, shape[2]) * shape[3] * shape[4]) * model_management.dtype_size(dtype) + self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32] elif "decoder.conv_in.conv.weight" in sd: ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} ddconfig["conv3d"] = True @@ -750,6 +781,7 @@ class VAE: self.latent_channels = 3 self.latent_dim = 2 self.output_channels = 3 + self.disable_offload = True elif "vocoder.activation_post.downsample.lowpass.filter" in sd: #MMAudio VAE sample_rate = 16000 if sample_rate == 16000: @@ -797,6 +829,24 @@ class VAE: self.downscale_index_formula = (4, 8, 8) self.memory_used_encode = lambda shape, dtype: (700 * (max(1, (shape[-3] ** 0.66 * 0.11)) * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)) self.memory_used_decode = lambda shape, dtype: (50 * (max(1, (shape[-3] ** 0.65 * 0.26)) * shape[-2] * shape[-1] * 32 * 32) * model_management.dtype_size(dtype)) + elif "vocoder.resblocks.0.convs1.0.weight" in sd or "vocoder.vocoder.resblocks.0.convs1.0.weight" in sd: # LTX Audio + sd = comfy.utils.state_dict_prefix_replace(sd, {"audio_vae.": "autoencoder."}) + self.first_stage_model = comfy.ldm.lightricks.vae.audio_vae.AudioVAE(metadata=metadata) + self.memory_used_encode = lambda shape, dtype: (shape[2] * 330) * model_management.dtype_size(dtype) + self.memory_used_decode = lambda shape, dtype: (shape[2] * shape[3] * 87000) * model_management.dtype_size(dtype) + self.latent_channels = self.first_stage_model.latent_channels + self.audio_sample_rate_output = self.first_stage_model.output_sample_rate + self.autoencoder = self.first_stage_model.autoencoder # TODO: remove hack for ltxv custom nodes + self.output_channels = 2 + self.pad_channel_value = "replicate" + self.upscale_ratio = 4096 + self.downscale_ratio = 4096 + self.latent_dim = 2 + self.process_output = lambda audio: audio + self.process_input = lambda audio: audio + self.working_dtypes = [torch.float32] + self.disable_offload = True + self.extra_1d_channel = 16 else: logging.warning("WARNING: No VAE weights detected, VAE not initalized.") self.first_stage_model = None @@ -1181,6 +1231,7 @@ class CLIPType(Enum): NEWBIE = 24 FLUX2 = 25 LONGCAT_IMAGE = 26 + COGVIDEOX = 27 @@ -1228,6 +1279,10 @@ class TEModel(Enum): QWEN35_4B = 25 QWEN35_9B = 26 QWEN35_27B = 27 + MINISTRAL_3_3B = 28 + GEMMA_4_E4B = 29 + GEMMA_4_E2B = 30 + GEMMA_4_31B = 31 def detect_te_model(sd): @@ -1253,6 +1308,12 @@ def detect_te_model(sd): return TEModel.BYT5_SMALL_GLYPH return TEModel.T5_BASE if 'model.layers.0.post_feedforward_layernorm.weight' in sd: + if 'model.layers.59.self_attn.q_norm.weight' in sd: + return TEModel.GEMMA_4_31B + if 'model.layers.41.self_attn.q_norm.weight' in sd and 'model.layers.47.self_attn.q_norm.weight' not in sd: + return TEModel.GEMMA_4_E4B + if 'model.layers.34.self_attn.q_norm.weight' in sd and 'model.layers.41.self_attn.q_norm.weight' not in sd: + return TEModel.GEMMA_4_E2B if 'model.layers.47.self_attn.q_norm.weight' in sd: return TEModel.GEMMA_3_12B if 'model.layers.0.self_attn.q_norm.weight' in sd: @@ -1294,6 +1355,8 @@ def detect_te_model(sd): return TEModel.MISTRAL3_24B else: return TEModel.MISTRAL3_24B_PRUNED_FLUX2 + if weight.shape[0] == 3072: + return TEModel.MINISTRAL_3_3B return TEModel.LLAMA3_8 return None @@ -1373,6 +1436,9 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**t5xxl_detect(clip_data), clip_l=False, clip_g=False, t5=True, llama=False, dtype_llama=None) clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer + elif clip_type == CLIPType.COGVIDEOX: + clip_target.clip = comfy.text_encoders.cogvideo.cogvideo_te(**t5xxl_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.cogvideo.CogVideoXTokenizer else: #CLIPType.MOCHI clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer @@ -1390,6 +1456,13 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip else: clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer + elif te_model in (TEModel.GEMMA_4_E4B, TEModel.GEMMA_4_E2B, TEModel.GEMMA_4_31B): + variant = {TEModel.GEMMA_4_E4B: comfy.text_encoders.gemma4.Gemma4_E4B, + TEModel.GEMMA_4_E2B: comfy.text_encoders.gemma4.Gemma4_E2B, + TEModel.GEMMA_4_31B: comfy.text_encoders.gemma4.Gemma4_31B}[te_model] + clip_target.clip = comfy.text_encoders.gemma4.gemma4_te(**llama_detect(clip_data), model_class=variant) + clip_target.tokenizer = variant.tokenizer + tokenizer_data["tokenizer_json"] = clip_data[0].get("tokenizer_json", None) elif te_model == TEModel.GEMMA_2_2B: clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer @@ -1451,6 +1524,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip elif te_model == TEModel.QWEN3_06B: clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer + elif te_model == TEModel.MINISTRAL_3_3B: + clip_target.clip = comfy.text_encoders.ernie.te(**llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.ernie.ErnieTokenizer + tokenizer_data["tekken_model"] = clip_data[0].get("tekken_model", None) else: # clip_l if clip_type == CLIPType.SD3: @@ -1745,6 +1822,8 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable temp_sd = comfy.utils.state_dict_prefix_replace(sd, {diffusion_model_prefix: ""}, filter_keys=True) if len(temp_sd) > 0: sd = temp_sd + if custom_operations is None: + sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata) parameters = comfy.utils.calculate_parameters(sd) weight_dtype = comfy.utils.weight_dtype(sd) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 9a5612716..1e4434fd5 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -26,6 +26,9 @@ import comfy.text_encoders.z_image import comfy.text_encoders.anima import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image +import comfy.text_encoders.ernie +import comfy.text_encoders.cogvideo +import comfy.text_encoders.hidream_o1 from . import supported_models_base from . import latent_formats @@ -1165,6 +1168,25 @@ class WAN21_T2V(supported_models_base.BASE): t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}umt5xxl.transformer.".format(pref)) return supported_models_base.ClipTarget(comfy.text_encoders.wan.WanT5Tokenizer, comfy.text_encoders.wan.te(**t5_detect)) +class WAN21_CausalAR_T2V(WAN21_T2V): + unet_config = { + "image_model": "wan2.1", + "model_type": "t2v", + "causal_ar": True, + } + + sampling_settings = { + "shift": 5.0, + } + + def __init__(self, unet_config): + super().__init__(unet_config) + self.unet_config.pop("causal_ar", None) + + def get_model(self, state_dict, prefix="", device=None): + return model_base.WAN21_CausalAR(self, device=device) + + class WAN21_I2V(WAN21_T2V): unet_config = { "image_model": "wan2.1", @@ -1292,6 +1314,37 @@ class WAN21_SCAIL(WAN21_T2V): out = model_base.WAN21_SCAIL(self, image_to_video=False, device=device) return out +class WAN22_WanDancer(WAN21_T2V): + unet_config = { + "image_model": "wan2.1", + "model_type": "wandancer", + "in_dim": 36, + } + + def __init__(self, unet_config): + super().__init__(unet_config) + self.memory_usage_factor = 1.8 + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.WAN22_WanDancer(self, image_to_video=True, device=device) + return out + + def process_unet_state_dict(self, state_dict): + out_sd = {} + for k in list(state_dict.keys()): + # split music_encoder in_proj into q_proj, k_proj, v_proj + if "music_encoder" in k and "self_attn.in_proj" in k: + suffix = "weight" if k.endswith("weight") else "bias" + tensor = state_dict[k] + d = tensor.shape[0] // 3 + prefix = k.replace(f"in_proj_{suffix}", "") + out_sd[f"{prefix}q_proj.{suffix}"] = tensor[:d] + out_sd[f"{prefix}k_proj.{suffix}"] = tensor[d:2*d] + out_sd[f"{prefix}v_proj.{suffix}"] = tensor[2*d:] + else: + out_sd[k] = state_dict[k] + return out_sd + class Hunyuan3Dv2(supported_models_base.BASE): unet_config = { "image_model": "hunyuan3d2", @@ -1379,6 +1432,50 @@ class HiDream(supported_models_base.BASE): def clip_target(self, state_dict={}): return None # TODO +class HiDreamO1(supported_models_base.BASE): + unet_config = { + "image_model": "hidream_o1", + } + + sampling_settings = { + "shift": 3.0, + "noise_scale": 8.0, + } + + latent_format = latent_formats.HiDreamO1Pixel + memory_usage_factor = 0.033 + # fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs + supported_inference_dtypes = [torch.bfloat16, torch.float32] + + vae_key_prefix = ["vae."] + text_encoder_key_prefix = ["text_encoders."] + + optimizations = {"fp8": False} + + def get_model(self, state_dict, prefix="", device=None): + return model_base.HiDreamO1(self, device=device) + + def process_unet_state_dict(self, state_dict): + # Drop unused Qwen3-VL deepstack merger weights; upstream discards them at inference. + for key in list(state_dict.keys()): + if "visual.deepstack_merger_list" in key: + del state_dict[key] + return state_dict + + def process_vae_state_dict(self, state_dict): + # Pixel-space model: inject sentinel so VAE construction picks PixelspaceConversionVAE. + return {"pixel_space_vae": torch.tensor(1.0)} + + def process_clip_state_dict(self, state_dict): + # Tokenizer-only TE: inject sentinel so load_state_dict_guess_config triggers CLIP init. + return {"_hidream_o1_te_sentinel": torch.zeros(1)} + + def clip_target(self, state_dict={}): + return supported_models_base.ClipTarget( + comfy.text_encoders.hidream_o1.HiDreamO1Tokenizer, + comfy.text_encoders.hidream_o1.HiDreamO1TE, + ) + class Chroma(supported_models_base.BASE): unet_config = { "image_model": "chroma", @@ -1749,6 +1846,240 @@ class RT_DETR_v4(supported_models_base.BASE): def clip_target(self, state_dict={}): return None -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4] -models += [SVD_img2vid] +class ErnieImage(supported_models_base.BASE): + unet_config = { + "image_model": "ernie", + } + + sampling_settings = { + "multiplier": 1000.0, + "shift": 3.0, + } + + memory_usage_factor = 10.0 + + unet_extra_config = {} + latent_format = latent_formats.Flux2 + + supported_inference_dtypes = [torch.bfloat16, torch.float32] + + vae_key_prefix = ["vae."] + text_encoder_key_prefix = ["text_encoders."] + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.ErnieImage(self, device=device) + return out + + def clip_target(self, state_dict={}): + pref = self.text_encoder_key_prefix[0] + hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}ministral3_3b.transformer.".format(pref)) + return supported_models_base.ClipTarget(comfy.text_encoders.ernie.ErnieTokenizer, comfy.text_encoders.ernie.te(**hunyuan_detect)) + + +class SAM3(supported_models_base.BASE): + unet_config = {"image_model": "SAM3"} + supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32] + text_encoder_key_prefix = ["detector.backbone.language_backbone."] + unet_extra_prefix = "" + + def process_clip_state_dict(self, state_dict): + clip_keys = getattr(self, "_clip_stash", {}) + clip_keys = utils.state_dict_prefix_replace(clip_keys, {"detector.backbone.language_backbone.": "", "backbone.language_backbone.": ""}, filter_keys=True) + clip_keys = utils.clip_text_transformers_convert(clip_keys, "encoder.", "sam3_clip.transformer.") + return {k: v for k, v in clip_keys.items() if not k.startswith("resizer.")} + + def process_unet_state_dict(self, state_dict): + self._clip_stash = {k: state_dict.pop(k) for k in list(state_dict.keys()) if "language_backbone" in k and "resizer" not in k} + # SAM3.1: remap tracker.model.* -> tracker.* + for k in list(state_dict.keys()): + if k.startswith("tracker.model."): + state_dict["tracker." + k[len("tracker.model."):]] = state_dict.pop(k) + # SAM3.1: remove per-block freqs_cis buffers (computed dynamically) + for k in [k for k in list(state_dict.keys()) if ".attn.freqs_cis" in k]: + state_dict.pop(k) + # Split fused QKV projections + for k in [k for k in list(state_dict.keys()) if k.endswith((".in_proj_weight", ".in_proj_bias"))]: + t = state_dict.pop(k) + base, suffix = k.rsplit(".in_proj_", 1) + s = ".weight" if suffix == "weight" else ".bias" + d = t.shape[0] // 3 + state_dict[base + ".q_proj" + s] = t[:d] + state_dict[base + ".k_proj" + s] = t[d:2*d] + state_dict[base + ".v_proj" + s] = t[2*d:] + # Remap tracker SAM decoder transformer key names to match sam.py TwoWayTransformer + for k in list(state_dict.keys()): + if "sam_mask_decoder.transformer." not in k: + continue + new_k = k.replace(".mlp.lin1.", ".mlp.0.").replace(".mlp.lin2.", ".mlp.2.").replace(".norm_final_attn.", ".norm_final.") + if new_k != k: + state_dict[new_k] = state_dict.pop(k) + return state_dict + + def get_model(self, state_dict, prefix="", device=None): + return model_base.SAM3(self, device=device) + + def clip_target(self, state_dict={}): + import comfy.text_encoders.sam3_clip + return supported_models_base.ClipTarget(comfy.text_encoders.sam3_clip.SAM3TokenizerWrapper, comfy.text_encoders.sam3_clip.SAM3ClipModelWrapper) + + +class SAM31(SAM3): + unet_config = {"image_model": "SAM31"} + + +class CogVideoX_T2V(supported_models_base.BASE): + unet_config = { + "image_model": "cogvideox", + } + + sampling_settings = { + "linear_start": 0.00085, + "linear_end": 0.012, + "beta_schedule": "linear", + "zsnr": True, + } + + unet_extra_config = {} + latent_format = latent_formats.CogVideoX + + supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + + vae_key_prefix = ["vae."] + text_encoder_key_prefix = ["text_encoders."] + + def __init__(self, unet_config): + # 2b-class (dim=1920, heads=30) uses scale_factor=1.15258426. + # 5b-class (dim=3072, heads=48) — incl. CogVideoX-5b, 1.5-5B, and + # Fun-V1.5 inpainting — uses scale_factor=0.7 per vae/config.json. + if unet_config.get("num_attention_heads", 0) >= 48: + self.latent_format = latent_formats.CogVideoX1_5 + super().__init__(unet_config) + + def get_model(self, state_dict, prefix="", device=None): + # CogVideoX 1.5 (patch_size_t=2) has different training base dimensions for RoPE + if self.unet_config.get("patch_size_t") is not None: + self.unet_config.setdefault("sample_height", 96) + self.unet_config.setdefault("sample_width", 170) + self.unet_config.setdefault("sample_frames", 81) + out = model_base.CogVideoX(self, device=device) + return out + + def clip_target(self, state_dict={}): + return supported_models_base.ClipTarget(comfy.text_encoders.cogvideo.CogVideoXT5Tokenizer, comfy.text_encoders.sd3_clip.T5XXLModel) + +class CogVideoX_I2V(CogVideoX_T2V): + unet_config = { + "image_model": "cogvideox", + "in_channels": 32, + } + + def get_model(self, state_dict, prefix="", device=None): + if self.unet_config.get("patch_size_t") is not None: + self.unet_config.setdefault("sample_height", 96) + self.unet_config.setdefault("sample_width", 170) + self.unet_config.setdefault("sample_frames", 81) + out = model_base.CogVideoX(self, image_to_video=True, device=device) + return out + +class CogVideoX_Inpaint(CogVideoX_T2V): + unet_config = { + "image_model": "cogvideox", + "in_channels": 48, + } + + def get_model(self, state_dict, prefix="", device=None): + if self.unet_config.get("patch_size_t") is not None: + self.unet_config.setdefault("sample_height", 96) + self.unet_config.setdefault("sample_width", 170) + self.unet_config.setdefault("sample_frames", 81) + out = model_base.CogVideoX(self, image_to_video=True, device=device) + return out + + +models = [ + LotusD, + Stable_Zero123, + SD15_instructpix2pix, + SD15, + SD20, + SD21UnclipL, + SD21UnclipH, + SDXL_instructpix2pix, + SDXLRefiner, + SDXL, + SSD1B, + KOALA_700M, + KOALA_1B, + Segmind_Vega, + SD_X4Upscaler, + Stable_Cascade_C, + Stable_Cascade_B, + SV3D_u, + SV3D_p, + SD3, + StableAudio, + AuraFlow, + PixArtAlpha, + PixArtSigma, + HunyuanDiT, + HunyuanDiT1, + FluxInpaint, + Flux, + LongCatImage, + FluxSchnell, + GenmoMochi, + LTXV, + LTXAV, + HunyuanVideo15_SR_Distilled, + HunyuanVideo15, + HunyuanImage21Refiner, + HunyuanImage21, + HunyuanVideoSkyreelsI2V, + HunyuanVideoI2V, + HunyuanVideo, + CosmosT2V, + CosmosI2V, + CosmosT2IPredict2, + CosmosI2VPredict2, + ZImagePixelSpace, + ZImage, + Lumina2, + WAN22_T2V, + WAN21_CausalAR_T2V, + WAN21_T2V, + WAN21_I2V, + WAN21_FunControl2V, + WAN21_Vace, + WAN21_Camera, + WAN22_Camera, + WAN22_S2V, + WAN21_HuMo, + WAN22_Animate, + WAN21_FlowRVS, + WAN21_SCAIL, + WAN22_WanDancer, + Hunyuan3Dv2mini, + Hunyuan3Dv2, + Hunyuan3Dv2_1, + HiDream, + HiDreamO1, + Chroma, + ChromaRadiance, + ACEStep, + ACEStep15, + Omnigen2, + QwenImage, + Flux2, + Kandinsky5Image, + Kandinsky5, + Anima, + RT_DETR_v4, + ErnieImage, + SAM3, + SAM31, + CogVideoX_Inpaint, + CogVideoX_I2V, + CogVideoX_T2V, + SVD_img2vid, +] diff --git a/comfy/taesd/taehv.py b/comfy/taesd/taehv.py index 6c06ce19d..696013200 100644 --- a/comfy/taesd/taehv.py +++ b/comfy/taesd/taehv.py @@ -7,6 +7,7 @@ from tqdm.auto import tqdm from collections import namedtuple, deque import comfy.ops +import comfy.model_management operations=comfy.ops.disable_weight_init DecoderResult = namedtuple("DecoderResult", ("frame", "memory")) @@ -47,11 +48,14 @@ class TGrow(nn.Module): x = self.conv(x) return x.reshape(-1, C, H, W) -def apply_model_with_memblocks(model, x, parallel, show_progress_bar): +def apply_model_with_memblocks(model, x, parallel, show_progress_bar, output_device=None, + patch_size=1, decode=False): B, T, C, H, W = x.shape if parallel: x = x.reshape(B*T, C, H, W) + if not decode and patch_size > 1: + x = F.pixel_unshuffle(x, patch_size) # parallel over input timesteps, iterate over blocks for b in tqdm(model, disable=not show_progress_bar): if isinstance(b, MemBlock): @@ -62,20 +66,27 @@ def apply_model_with_memblocks(model, x, parallel, show_progress_bar): x = b(x, mem) else: x = b(x) - BT, C, H, W = x.shape - T = BT // B - x = x.view(B, T, C, H, W) + if decode and patch_size > 1: + x = F.pixel_shuffle(x, patch_size) + x = x.view(B, x.shape[0] // B, *x.shape[1:]) + x = x.to(output_device) else: out = [] - work_queue = deque([TWorkItem(xt, 0) for t, xt in enumerate(x.reshape(B, T * C, H, W).chunk(T, dim=1))]) + # Chunk along the time dim directly (chunks are [B,1,C,H,W] views, squeeze to [B,C,H,W] views). + # Avoids forcing a contiguous copy when x is non-contiguous (e.g. after movedim in encode/decode). + work_queue = deque([TWorkItem(xt.squeeze(1), 0) for xt in x.chunk(T, dim=1)]) progress_bar = tqdm(range(T), disable=not show_progress_bar) mem = [None] * len(model) while work_queue: xt, i = work_queue.popleft() if i == 0: progress_bar.update(1) + if not decode and patch_size > 1: + xt = F.pixel_unshuffle(xt, patch_size) if i == len(model): - out.append(xt) + if decode and patch_size > 1: + xt = F.pixel_shuffle(xt, patch_size) + out.append(xt.to(output_device)) del xt else: b = model[i] @@ -165,24 +176,20 @@ class TAEHV(nn.Module): def encode(self, x, **kwargs): x = x.movedim(2, 1) # [B, C, T, H, W] -> [B, T, C, H, W] - if self.patch_size > 1: - B, T, C, H, W = x.shape - x = x.reshape(B * T, C, H, W) - x = F.pixel_unshuffle(x, self.patch_size) - x = x.reshape(B, T, C * self.patch_size ** 2, H // self.patch_size, W // self.patch_size) if x.shape[1] % self.t_downscale != 0: # pad at end to multiple of t_downscale n_pad = self.t_downscale - x.shape[1] % self.t_downscale padding = x[:, -1:].repeat_interleave(n_pad, dim=1) x = torch.cat([x, padding], 1) - x = apply_model_with_memblocks(self.encoder, x, self.parallel, self.show_progress_bar).movedim(2, 1) + x = apply_model_with_memblocks(self.encoder, x, self.parallel, self.show_progress_bar, + patch_size=self.patch_size).movedim(2, 1) return self.process_out(x) def decode(self, x, **kwargs): x = x.unsqueeze(0) if x.ndim == 4 else x # [T, C, H, W] -> [1, T, C, H, W] x = x.movedim(1, 2) if x.shape[1] != self.latent_channels else x # [B, T, C, H, W] or [B, C, T, H, W] x = self.process_in(x).movedim(2, 1) # [B, C, T, H, W] -> [B, T, C, H, W] - x = apply_model_with_memblocks(self.decoder, x, self.parallel, self.show_progress_bar) - if self.patch_size > 1: - x = F.pixel_shuffle(x, self.patch_size) + x = apply_model_with_memblocks(self.decoder, x, self.parallel, self.show_progress_bar, + output_device=comfy.model_management.intermediate_device(), + patch_size=self.patch_size, decode=True) return x[:, self.frames_to_trim:].movedim(2, 1) diff --git a/comfy/taesd/taesd.py b/comfy/taesd/taesd.py index ce36f1a84..05d370209 100644 --- a/comfy/taesd/taesd.py +++ b/comfy/taesd/taesd.py @@ -17,32 +17,79 @@ class Clamp(nn.Module): return torch.tanh(x / 3) * 3 class Block(nn.Module): - def __init__(self, n_in, n_out): + def __init__(self, n_in: int, n_out: int, use_midblock_gn: bool = False): super().__init__() self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) self.skip = comfy.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() self.fuse = nn.ReLU() - def forward(self, x): + if not use_midblock_gn: + self.pool = None + return + n_gn = n_in * 4 + self.pool = nn.Sequential( + comfy.ops.disable_weight_init.Conv2d(n_in, n_gn, 1, bias=False), + comfy.ops.disable_weight_init.GroupNorm(4, n_gn), + nn.ReLU(inplace=True), + comfy.ops.disable_weight_init.Conv2d(n_gn, n_in, 1, bias=False), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.pool is not None: + x = x + self.pool(x) return self.fuse(self.conv(x) + self.skip(x)) -def Encoder(latent_channels=4): - return nn.Sequential( - conv(3, 64), Block(64, 64), - conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, latent_channels), - ) +class Encoder(nn.Sequential): + def __init__(self, latent_channels: int = 4, use_gn: bool = False): + super().__init__( + conv(3, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), + conv(64, 64, stride=2, bias=False), Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn), + conv(64, latent_channels), + ) +class Decoder(nn.Sequential): + def __init__(self, latent_channels: int = 4, use_gn: bool = False): + super().__init__( + Clamp(), conv(latent_channels, 64), nn.ReLU(), + Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), conv(64, 3), + ) + +class DecoderFlux2(Decoder): + def __init__(self, latent_channels: int = 128, use_gn: bool = True): + if latent_channels != 128 or not use_gn: + raise ValueError("Unexpected parameters for Flux2 TAE module") + super().__init__(latent_channels=32, use_gn=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + B, C, H, W = x.shape + x = ( + x + .reshape(B, 32, 2, 2, H, W) + .permute(0, 1, 4, 2, 5, 3) + .reshape(B, 32, H * 2, W * 2) + ) + return super().forward(x) + +class EncoderFlux2(Encoder): + def __init__(self, latent_channels: int = 128, use_gn: bool = True): + if latent_channels != 128 or not use_gn: + raise ValueError("Unexpected parameters for Flux2 TAE module") + super().__init__(latent_channels=32, use_gn=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + result = super().forward(x) + B, C, H, W = result.shape + return ( + result + .reshape(B, C, H // 2, 2, W // 2, 2) + .permute(0, 1, 3, 5, 2, 4) + .reshape(B, 128, H // 2, W // 2) + ) -def Decoder(latent_channels=4): - return nn.Sequential( - Clamp(), conv(latent_channels, 64), nn.ReLU(), - Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), - Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), - Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), - Block(64, 64), conv(64, 3), - ) class TAESD(nn.Module): latent_magnitude = 3 @@ -51,8 +98,15 @@ class TAESD(nn.Module): def __init__(self, encoder_path=None, decoder_path=None, latent_channels=4): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.taesd_encoder = Encoder(latent_channels=latent_channels) - self.taesd_decoder = Decoder(latent_channels=latent_channels) + if latent_channels == 128: + encoder_class = EncoderFlux2 + decoder_class = DecoderFlux2 + else: + encoder_class = Encoder + decoder_class = Decoder + self.taesd_encoder = encoder_class(latent_channels=latent_channels) + self.taesd_decoder = decoder_class(latent_channels=latent_channels) + self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) self.vae_shift = torch.nn.Parameter(torch.tensor(0.0)) if encoder_path is not None: @@ -61,19 +115,19 @@ class TAESD(nn.Module): self.taesd_decoder.load_state_dict(comfy.utils.load_torch_file(decoder_path, safe_load=True)) @staticmethod - def scale_latents(x): + def scale_latents(x: torch.Tensor) -> torch.Tensor: """raw latents -> [0, 1]""" return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) @staticmethod - def unscale_latents(x): + def unscale_latents(x: torch.Tensor) -> torch.Tensor: """[0, 1] -> raw latents""" return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) - def decode(self, x): + def decode(self, x: torch.Tensor) -> torch.Tensor: x_sample = self.taesd_decoder((x - self.vae_shift) * self.vae_scale) x_sample = x_sample.sub(0.5).mul(2) return x_sample - def encode(self, x): + def encode(self, x: torch.Tensor) -> torch.Tensor: return (self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale) + self.vae_shift diff --git a/comfy/text_encoders/cogvideo.py b/comfy/text_encoders/cogvideo.py new file mode 100644 index 000000000..b97310709 --- /dev/null +++ b/comfy/text_encoders/cogvideo.py @@ -0,0 +1,48 @@ +import comfy.text_encoders.sd3_clip +from comfy import sd1_clip + + +class CogVideoXT5Tokenizer(comfy.text_encoders.sd3_clip.T5XXLTokenizer): + """Inner T5 tokenizer for CogVideoX. + + CogVideoX was trained with T5 embeddings padded to 226 tokens (not 77 like SD3). + Used both directly by supported_models.CogVideoX_T2V.clip_target (paired with + the raw T5XXLModel) and by the CogVideoXTokenizer outer wrapper below. + """ + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, min_length=226) + + +class CogVideoXTokenizer(sd1_clip.SD1Tokenizer): + """Outer tokenizer wrapper for CLIPLoader (type="cogvideox").""" + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, + clip_name="t5xxl", tokenizer=CogVideoXT5Tokenizer) + + +class CogVideoXT5XXL(sd1_clip.SD1ClipModel): + """Outer T5XXL model wrapper for CLIPLoader (type="cogvideox"). + + Wraps the raw T5XXL model in the SD1ClipModel interface so that CLIP.__init__ + (which reads self.dtypes) works correctly. The inner model is the standard + sd3_clip.T5XXLModel (no attention_mask change needed for CogVideoX). + """ + def __init__(self, device="cpu", dtype=None, model_options={}): + super().__init__(device=device, dtype=dtype, name="t5xxl", + clip_model=comfy.text_encoders.sd3_clip.T5XXLModel, + model_options=model_options) + + +def cogvideo_te(dtype_t5=None, t5_quantization_metadata=None): + """Factory that returns a CogVideoXT5XXL class configured with the detected + T5 dtype and optional quantization metadata, for use in load_text_encoder_state_dicts. + """ + class CogVideoXTEModel_(CogVideoXT5XXL): + def __init__(self, device="cpu", dtype=None, model_options={}): + if t5_quantization_metadata is not None: + model_options = model_options.copy() + model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata + if dtype_t5 is not None: + dtype = dtype_t5 + super().__init__(device=device, dtype=dtype, model_options=model_options) + return CogVideoXTEModel_ diff --git a/comfy/text_encoders/ernie.py b/comfy/text_encoders/ernie.py new file mode 100644 index 000000000..46d24d222 --- /dev/null +++ b/comfy/text_encoders/ernie.py @@ -0,0 +1,38 @@ +from .flux import Mistral3Tokenizer +from comfy import sd1_clip +import comfy.text_encoders.llama + +class Ministral3_3BTokenizer(Mistral3Tokenizer): + def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='ministral3_3b', tokenizer_data={}): + return super().__init__(embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_data=tokenizer_data) + +class ErnieTokenizer(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="ministral3_3b", tokenizer=Mistral3Tokenizer) + + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs): + tokens = super().tokenize_with_weights(text, return_word_ids=return_word_ids, disable_weights=True, **kwargs) + return tokens + + +class Ministral3_3BModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}): + textmodel_json_config = {} + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 1, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Ministral3_3B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + + +class ErnieTEModel(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}, name="ministral3_3b", clip_model=Ministral3_3BModel): + super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options) + + +def te(dtype_llama=None, llama_quantization_metadata=None): + class ErnieTEModel_(ErnieTEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if dtype_llama is not None: + dtype = dtype_llama + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + super().__init__(device=device, dtype=dtype, model_options=model_options) + return ErnieTEModel_ diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index 1ae398789..d5eb91dcb 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -116,9 +116,9 @@ class MistralTokenizerClass: return LlamaTokenizerFast(**kwargs) class Mistral3Tokenizer(sd1_clip.SDTokenizer): - def __init__(self, embedding_directory=None, tokenizer_data={}): + def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_data={}): self.tekken_data = tokenizer_data.get("tekken_model", None) - super().__init__("", pad_with_end=False, embedding_directory=embedding_directory, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data) + super().__init__("", pad_with_end=False, embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, disable_weights=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data) def state_dict(self): return {"tekken_model": self.tekken_data} diff --git a/comfy/text_encoders/gemma4.py b/comfy/text_encoders/gemma4.py new file mode 100644 index 000000000..f050061ed --- /dev/null +++ b/comfy/text_encoders/gemma4.py @@ -0,0 +1,1298 @@ +import torch +import torch.nn as nn +import numpy as np +from dataclasses import dataclass +import math + +from comfy import sd1_clip +import comfy.model_management +from comfy.ldm.modules.attention import optimized_attention_for_device +from comfy.rmsnorm import rms_norm +from comfy.text_encoders.llama import RMSNorm, MLP, BaseLlama, BaseGenerate, _make_scaled_embedding + + +# Intentional minor divergences from transformers -reference implementation: +# - Embedding sqrt(hidden_size) scale applied as a Python scalar (full precision) instead of dtype-matched buffer tensor. +# - RMSNorm uses torch fused F.rms_norm, very slight numerical differences, but considerably faster +# - Input image and audio resizing/resampling slightly different numerically + + +GEMMA4_VISION_CONFIG = {"hidden_size": 768, "image_size": 896, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 16, "patch_size": 16, "head_dim": 64, "rms_norm_eps": 1e-6, "position_embedding_size": 10240, "pooling_kernel_size": 3} +GEMMA4_VISION_31B_CONFIG = {"hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 16, "head_dim": 72, "rms_norm_eps": 1e-6, "position_embedding_size": 10240, "pooling_kernel_size": 3} +GEMMA4_AUDIO_CONFIG = {"hidden_size": 1024, "num_hidden_layers": 12, "num_attention_heads": 8, "intermediate_size": 4096, "conv_kernel_size": 5, "attention_chunk_size": 12, "attention_context_left": 13, "attention_context_right": 0, "attention_logit_cap": 50.0, "output_proj_dims": 1536, "rms_norm_eps": 1e-6, "residual_weight": 0.5} + +@dataclass +class Gemma4Config: + vocab_size: int = 262144 + hidden_size: int = 2560 + intermediate_size: int = 10240 + num_hidden_layers: int = 42 + num_attention_heads: int = 8 + num_key_value_heads: int = 2 + max_position_embeddings: int = 131072 + rms_norm_eps: float = 1e-6 + rope_theta = [1000000.0, 10000.0] + transformer_type: str = "gemma4" + head_dim = 256 + global_head_dim = 512 + rms_norm_add = False + mlp_activation = "gelu_pytorch_tanh" + qkv_bias = False + rope_dims = None + q_norm = "gemma3" + k_norm = "gemma3" + sliding_attention = [512, 512, 512, 512, 512, False] + rope_scale = None + partial_rotary_factor: float = 0.25 + final_norm: bool = True + lm_head: bool = False + final_logit_softcapping: float = 30.0 + hidden_size_per_layer_input: int = 256 + num_kv_shared_layers: int = 18 + use_double_wide_mlp: bool = False + stop_tokens = [1, 50, 106] + vision_config = GEMMA4_VISION_CONFIG + audio_config = GEMMA4_AUDIO_CONFIG + mm_tokens_per_image = 280 + +@dataclass +class Gemma4_E2B_Config(Gemma4Config): + hidden_size: int = 1536 + intermediate_size: int = 6144 + num_hidden_layers: int = 35 + num_key_value_heads: int = 1 + sliding_attention = [512, 512, 512, 512, False] + num_kv_shared_layers: int = 20 + use_double_wide_mlp: bool = True + +@dataclass +class Gemma4_31B_Config(Gemma4Config): + hidden_size: int = 5376 + intermediate_size: int = 21504 + num_hidden_layers: int = 60 + num_attention_heads: int = 32 + num_key_value_heads: int = 16 + sliding_attention = [1024, 1024, 1024, 1024, 1024, False] + hidden_size_per_layer_input: int = 0 + num_kv_shared_layers: int = 0 + audio_config = None + vision_config = GEMMA4_VISION_31B_CONFIG + + +# unfused RoPE as addcmul_ RoPE diverges from reference code +def _apply_rotary_pos_emb(x, freqs_cis): + cos, sin = freqs_cis[0], freqs_cis[1] + half = x.shape[-1] // 2 + out = x * cos + out[..., :half] -= x[..., half:] * sin[..., :half] + out[..., half:] += x[..., :half] * sin[..., half:] + return out + +class Gemma4Attention(nn.Module): + def __init__(self, config, head_dim, device=None, dtype=None, ops=None): + super().__init__() + self.num_heads = config.num_attention_heads + self.num_kv_heads = config.num_key_value_heads + self.hidden_size = config.hidden_size + self.head_dim = head_dim + self.inner_size = self.num_heads * head_dim + + self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=config.qkv_bias, device=device, dtype=dtype) + self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * head_dim, bias=config.qkv_bias, device=device, dtype=dtype) + self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * head_dim, bias=config.qkv_bias, device=device, dtype=dtype) + self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype) + + self.q_norm = None + self.k_norm = None + if config.q_norm == "gemma3": + self.q_norm = RMSNorm(head_dim, eps=config.rms_norm_eps, device=device, dtype=dtype) + if config.k_norm == "gemma3": + self.k_norm = RMSNorm(head_dim, eps=config.rms_norm_eps, device=device, dtype=dtype) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask=None, + freqs_cis=None, + past_key_value=None, + sliding_window=None, + shared_kv=None, + ): + batch_size, seq_length, _ = hidden_states.shape + + xq = self.q_proj(hidden_states) + xq = xq.view(batch_size, seq_length, self.num_heads, self.head_dim).transpose(1, 2) + if self.q_norm is not None: + xq = self.q_norm(xq) + + if shared_kv is not None: + xk, xv = shared_kv + # Apply RoPE to Q only (K already has RoPE from source layer) + xq = _apply_rotary_pos_emb(xq, freqs_cis) + present_key_value = None + shareable_kv = None + else: + xk = self.k_proj(hidden_states).view(batch_size, seq_length, self.num_kv_heads, self.head_dim) + xv = self.v_proj(hidden_states).view(batch_size, seq_length, self.num_kv_heads, self.head_dim) + if self.k_norm is not None: + xk = self.k_norm(xk) + xv = rms_norm(xv) + xk = xk.transpose(1, 2) + xv = xv.transpose(1, 2) + xq = _apply_rotary_pos_emb(xq, freqs_cis) + xk = _apply_rotary_pos_emb(xk, freqs_cis) + + present_key_value = None + if past_key_value is not None: + cumulative_len = 0 + if len(past_key_value) > 0: + past_key, past_value, cumulative_len = past_key_value + xk = torch.cat((past_key, xk), dim=2) + xv = torch.cat((past_value, xv), dim=2) + new_cumulative = cumulative_len + seq_length + if sliding_window is not None and xk.shape[2] > sliding_window - 1: + cache_k = xk[:, :, -(sliding_window - 1):] + cache_v = xv[:, :, -(sliding_window - 1):] + else: + cache_k = xk + cache_v = xv + present_key_value = (cache_k, cache_v, new_cumulative) + + # KV for sharing: full xk/xv that SDPA sees (not evicted cache) + shareable_kv = (xk, xv) + + # GQA: pass unexpanded KV with enable_gqa when no sliding mask, + # expand heads when sliding mask is present + # has to be done within SDPA itself to match the reference code, pre-scaling expansion causes numerical differences + expand_kv = (self.num_heads != self.num_kv_heads and + sliding_window is not None and + xk.shape[2] >= sliding_window) + if expand_kv: + xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1) + xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1) + gqa_kwargs = {} if expand_kv else ({"enable_gqa": True} if self.num_heads != self.num_kv_heads else {}) + output = optimized_attention_for_device(xq.device, mask=attention_mask is not None, small_input=True)(xq, xk, xv, self.num_heads, mask=attention_mask, skip_reshape=True, scale=1.0, **gqa_kwargs) + + return self.o_proj(output), present_key_value, shareable_kv + + +class TransformerBlockGemma4(nn.Module): + def __init__(self, config, index, device=None, dtype=None, ops=None): + super().__init__() + if config.sliding_attention is not None: + self.sliding_attention = config.sliding_attention[index % len(config.sliding_attention)] + else: + self.sliding_attention = False + + head_dim = config.head_dim if self.sliding_attention else config.global_head_dim + + self.self_attn = Gemma4Attention(config, head_dim=head_dim, device=device, dtype=dtype, ops=ops) + + num_kv_shared = config.num_kv_shared_layers + first_kv_shared = config.num_hidden_layers - num_kv_shared + mlp_size = config.intermediate_size * 2 if config.use_double_wide_mlp and index >= first_kv_shared else None + self.mlp = MLP(config, device=device, dtype=dtype, ops=ops, intermediate_size=mlp_size) + + self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) + self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) + self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) + self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) + + self.hidden_size_per_layer_input = config.hidden_size_per_layer_input + if self.hidden_size_per_layer_input: + self.per_layer_input_gate = ops.Linear(config.hidden_size, self.hidden_size_per_layer_input, bias=False, device=device, dtype=dtype) + self.per_layer_projection = ops.Linear(self.hidden_size_per_layer_input, config.hidden_size, bias=False, device=device, dtype=dtype) + self.post_per_layer_input_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) + self.register_buffer("layer_scalar", torch.ones(1, device=device, dtype=dtype)) + else: + self.layer_scalar = None + + def forward(self, x, attention_mask=None, freqs_cis=None, past_key_value=None, per_layer_input=None, shared_kv=None): + sliding_window = None + if self.sliding_attention: + sliding_window = self.sliding_attention + # For prefill > sliding window, add sliding window restriction to the causal mask. + if x.shape[1] > self.sliding_attention: + sw_mask = torch.zeros(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device) + sw_mask.masked_fill_(torch.ones_like(sw_mask, dtype=torch.bool).tril_(-self.sliding_attention), torch.finfo(x.dtype).min) + attention_mask = attention_mask + sw_mask if attention_mask is not None else sw_mask + freqs_cis = freqs_cis[1] + else: + freqs_cis = freqs_cis[0] + + residual = x + x = self.input_layernorm(x) + x, present_key_value, shareable_kv = self.self_attn( + hidden_states=x, attention_mask=attention_mask, freqs_cis=freqs_cis, + past_key_value=past_key_value, sliding_window=sliding_window, shared_kv=shared_kv, + ) + x = self.post_attention_layernorm(x) + x = residual + x + + residual = x + x = self.pre_feedforward_layernorm(x) + x = self.mlp(x) + x = self.post_feedforward_layernorm(x) + x = residual + x + + if self.hidden_size_per_layer_input and per_layer_input is not None: + residual = x + x = self.per_layer_input_gate(x) + x = torch.nn.functional.gelu(x, approximate="tanh") + x = x * per_layer_input + x = self.per_layer_projection(x) + x = self.post_per_layer_input_norm(x) + x = residual + x + + if self.layer_scalar is not None: + x = x * self.layer_scalar + + return x, present_key_value, shareable_kv + + +class Gemma4Transformer(nn.Module): + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + self.config = config + + self.embed_tokens = _make_scaled_embedding(ops, config.vocab_size, config.hidden_size, config.hidden_size ** 0.5, device, dtype) + + self.layers = nn.ModuleList([ + TransformerBlockGemma4(config, index=i, device=device, dtype=dtype, ops=ops) + for i in range(config.num_hidden_layers) + ]) + + self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) if config.final_norm else None + + # Precompute RoPE inv_freq on CPU to match reference code's exact value + rope_angles_global = int(config.partial_rotary_factor * config.global_head_dim // 2) + nope_global = config.global_head_dim // 2 - rope_angles_global + global_inv = 1.0 / (config.rope_theta[0] ** (torch.arange(0, 2 * rope_angles_global, 2).float() / config.global_head_dim)) + if nope_global > 0: + global_inv = torch.cat([global_inv, torch.zeros(nope_global)]) + self.register_buffer("_global_inv_freq", global_inv, persistent=False) + + sliding_inv = 1.0 / (config.rope_theta[1] ** (torch.arange(0, config.head_dim, 2).float() / config.head_dim)) + self.register_buffer("_sliding_inv_freq", sliding_inv, persistent=False) + + # Per-layer input mechanism + self.hidden_size_per_layer_input = config.hidden_size_per_layer_input + if self.hidden_size_per_layer_input: + self.embed_tokens_per_layer = _make_scaled_embedding(ops, config.vocab_size, config.num_hidden_layers * self.hidden_size_per_layer_input, self.hidden_size_per_layer_input ** 0.5, device, dtype) + self.per_layer_model_projection = ops.Linear( + config.hidden_size, config.num_hidden_layers * self.hidden_size_per_layer_input, + bias=False, device=device, dtype=dtype) + self.per_layer_projection_norm = RMSNorm( + self.hidden_size_per_layer_input, eps=config.rms_norm_eps, + device=device, dtype=dtype) + + def get_past_len(self, past_key_values): + for kv in past_key_values: + if len(kv) >= 3: + return kv[2] + return 0 + + def _freqs_from_inv(self, inv_freq, position_ids, device, dtype): + """Compute cos/sin from stored inv_freq""" + inv_exp = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(device) + pos_exp = position_ids[:, None, :].float() + freqs = (inv_exp @ pos_exp).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1) + return emb.cos().unsqueeze(1).to(dtype), emb.sin().unsqueeze(1).to(dtype) + + def compute_freqs_cis(self, position_ids, device, dtype=None): + global_freqs = self._freqs_from_inv(self._global_inv_freq, position_ids, device, dtype) + sliding_freqs = self._freqs_from_inv(self._sliding_inv_freq, position_ids, device, dtype) + return [global_freqs, sliding_freqs] + + def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, + final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=None, + past_key_values=None, input_ids=None): + if embeds is not None: + x = embeds + else: + x = self.embed_tokens(x, out_dtype=dtype) + + seq_len = x.shape[1] + past_len = 0 + if past_key_values is not None and len(past_key_values) > 0: + past_len = self.get_past_len(past_key_values) + + if position_ids is None: + position_ids = torch.arange(past_len, past_len + seq_len, device=x.device).unsqueeze(0) + + freqs_cis = self.compute_freqs_cis(position_ids, x.device, dtype=x.dtype) + + mask = None + min_val = torch.finfo(x.dtype).min + if attention_mask is not None: + mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1]) + mask = mask.masked_fill(mask.to(torch.bool), min_val) + + if seq_len > 1: + causal_mask = torch.zeros(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device) + causal_mask.masked_fill_(torch.ones_like(causal_mask, dtype=torch.bool).triu_(1), min_val) + mask = mask + causal_mask if mask is not None else causal_mask + + # Per-layer inputs + per_layer_inputs = None + if self.hidden_size_per_layer_input: + num_layers = self.config.num_hidden_layers + hpl = self.hidden_size_per_layer_input + per_layer_proj = self.per_layer_model_projection(x) * (1.0 / (self.config.hidden_size ** 0.5)) + per_layer_proj = self.per_layer_projection_norm(per_layer_proj.reshape(*x.shape[:-1], num_layers, hpl)) + if input_ids is not None and input_ids.shape[1] == x.shape[1]: + per_layer_emb = self.embed_tokens_per_layer(input_ids).reshape(*input_ids.shape, num_layers, hpl) + per_layer_inputs = (per_layer_proj + per_layer_emb) * (0.5 ** 0.5) + else: + per_layer_inputs = per_layer_proj + + # KV sharing: later layers reuse KV from the last non-shared sliding/global layer + num_kv_shared = self.config.num_kv_shared_layers + first_kv_shared = self.config.num_hidden_layers - num_kv_shared if num_kv_shared > 0 else self.config.num_hidden_layers + shared_sliding_kv = None # KV from last non-shared sliding layer + shared_global_kv = None # KV from last non-shared global layer + + intermediate = None + next_key_values = [] + for i, layer in enumerate(self.layers): + past_kv = past_key_values[i] if past_key_values is not None and len(past_key_values) > 0 else None + + layer_kwargs = {} + if per_layer_inputs is not None: + layer_kwargs['per_layer_input'] = per_layer_inputs[:, :, i, :] + + is_sliding = hasattr(layer, 'sliding_attention') and layer.sliding_attention + if i >= first_kv_shared and num_kv_shared > 0: + shared = shared_sliding_kv if is_sliding else shared_global_kv + if shared is not None: + layer_kwargs['shared_kv'] = shared + + x, current_kv, shareable_kv = layer(x=x, attention_mask=mask, freqs_cis=freqs_cis, past_key_value=past_kv, **layer_kwargs) + + next_key_values.append(current_kv if current_kv is not None else ()) + + # Only track the last sliding/global before the sharing boundary + if i < first_kv_shared and shareable_kv is not None: + if is_sliding: + shared_sliding_kv = shareable_kv + else: + shared_global_kv = shareable_kv + + if i == intermediate_output: + intermediate = x.clone() + + if self.norm is not None: + x = self.norm(x) + + if len(next_key_values) > 0: + return x, intermediate, next_key_values + return x, intermediate + + +class Gemma4Base(BaseLlama, BaseGenerate, torch.nn.Module): + """Common base for all Gemma4 variants: text model + vision.""" + def _init_model(self, config, dtype, device, operations): + self.num_layers = config.num_hidden_layers + self.model = Gemma4Transformer(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + self.multi_modal_projector = Gemma4MultiModalProjector(config, dtype=dtype, device=device, ops=operations) + self.vision_model = Gemma4VisionEncoder(config.vision_config, dtype=dtype, device=device, ops=operations) + + def logits(self, x): + logits = super().logits(x) + cap = self.model.config.final_logit_softcapping + if cap: + logits = cap * torch.tanh(logits / cap) + return logits + + def init_kv_cache(self, batch, max_cache_len, device, execution_dtype): + past_key_values = [] + for _ in range(self.model.config.num_hidden_layers): + past_key_values.append(()) + return past_key_values + + def preprocess_embed(self, embed, device): + if embed["type"] == "image": + image = embed.pop("data").movedim(-1, 1) # [B, H, W, C] -> [B, C, H, W] + max_soft_tokens = embed.get("max_soft_tokens", None) + vision_out = self.vision_model(image.to(device, dtype=torch.float32), max_soft_tokens=max_soft_tokens) + return self.multi_modal_projector(vision_out), None + return None, None + + +class Gemma4AudioMixin: + """Adds audio support to a Gemma4 model.""" + def _init_audio(self, config, dtype, device, operations): + self.audio_model = Gemma4AudioEncoder(config.audio_config, dtype=dtype, device=device, ops=operations) + self.audio_projector = Gemma4AudioProjector({"audio_output_proj_dims": config.audio_config["output_proj_dims"], "text_hidden_size": config.hidden_size, "rms_norm_eps": config.rms_norm_eps}, dtype=dtype, device=device, ops=operations) + + def preprocess_embed(self, embed, device): + result, extra = super().preprocess_embed(embed, device) + if result is not None: + return result, extra + if embed["type"] == "audio": + audio = embed.pop("data").to(device, dtype=torch.float32) + audio_mask = embed.pop("mask", None) + if audio_mask is not None: + audio_mask = audio_mask.to(device) + audio_out = self.audio_model(audio, audio_mask=audio_mask) + return self.audio_projector(audio_out), None + return None, None + + +# Vision Encoder + +def _compute_vision_2d_rope(head_dim, pixel_position_ids, theta=100.0, device=None): + """Compute 2D RoPE for vision: separate frequencies for x and y dimensions. + + Args: + head_dim: dimension per head (e.g. 64) + pixel_position_ids: [batch, num_patches, 2] with (x, y) coords + theta: RoPE base frequency + Returns: + (cos, sin) each of shape [batch, num_patches, head_dim] + """ + rotary_dim_per_axis = head_dim // 2 + freq_indices = torch.arange(0, rotary_dim_per_axis, 2, device=device).float() + inv_freq = 1.0 / (theta ** (freq_indices / rotary_dim_per_axis)) + + all_cos, all_sin = [], [] + for i in range(2): # x and y + dim_positions = pixel_position_ids[:, :, i].float() # [batch, num_patches] + freqs = torch.einsum('bi,j->bij', dim_positions, inv_freq.to(device)) # [batch, num_patches, rotary_dim/2] + emb = torch.cat([freqs, freqs], dim=-1) # [batch, num_patches, rotary_dim] + all_cos.append(emb.cos()) + all_sin.append(emb.sin()) + + cos = torch.cat(all_cos, dim=-1).to(pixel_position_ids.device) # [batch, num_patches, head_dim] + sin = torch.cat(all_sin, dim=-1).to(pixel_position_ids.device) + return cos, sin + + +def _apply_vision_2d_rope(x, freqs): + """Apply 2D RoPE (multidimensional) to vision query/key states. + + Splits x and cos/sin into ndim=2 parts, applies 1D RoPE to each independently. + + x: [batch, heads, seq, head_dim] + freqs: (cos, sin) each [batch, seq, head_dim] + """ + cos = freqs[0].unsqueeze(1) # [batch, 1, seq, head_dim] + sin = freqs[1].unsqueeze(1) + half = x.shape[-1] // 2 + a = _apply_rotary_pos_emb(x[..., :half], (cos[..., :half], sin[..., :half])) + b = _apply_rotary_pos_emb(x[..., half:], (cos[..., half:], sin[..., half:])) + return torch.cat([a, b], dim=-1) + + +class ClippedLinear(nn.Module): + """Linear layer with activation clipping (from quantization-aware training). + + Stores input_max/min and output_max/min as buffers loaded from checkpoint. + """ + def __init__(self, in_features, out_features, bias=False, device=None, dtype=None, ops=None): + super().__init__() + self.linear = ops.Linear(in_features, out_features, bias=bias, device=device, dtype=dtype) + self.register_buffer('input_max', torch.tensor(float('inf'), device=device, dtype=dtype)) + self.register_buffer('input_min', torch.tensor(float('-inf'), device=device, dtype=dtype)) + self.register_buffer('output_max', torch.tensor(float('inf'), device=device, dtype=dtype)) + self.register_buffer('output_min', torch.tensor(float('-inf'), device=device, dtype=dtype)) + + @property + def weight(self): + return self.linear.weight + + def forward(self, x): + x = x.clamp(min=self.input_min, max=self.input_max) + x = self.linear(x) + return x.clamp_(min=self.output_min, max=self.output_max) + + +class Gemma4VisionMLP(nn.Module): + """SwiGLU MLP matching gate_proj/up_proj/down_proj structure.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + hidden_size = config["hidden_size"] + intermediate_size = config["intermediate_size"] + self.gate_proj = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops) + self.up_proj = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops) + self.down_proj = ClippedLinear(intermediate_size, hidden_size, device=device, dtype=dtype, ops=ops) + + def forward(self, x): + return self.down_proj(torch.nn.functional.gelu(self.gate_proj(x), approximate="tanh") * self.up_proj(x)) + + +class Gemma4VisionAttention(nn.Module): + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + self.hidden_size = config["hidden_size"] + self.num_heads = config["num_attention_heads"] + self.head_dim = config.get("head_dim", self.hidden_size // self.num_heads) + + self.q_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops) + self.k_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops) + self.v_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops) + self.o_proj = ClippedLinear(self.num_heads * self.head_dim, self.hidden_size, device=device, dtype=dtype, ops=ops) + + self.q_norm = RMSNorm(self.head_dim, eps=config["rms_norm_eps"], device=device, dtype=dtype) + self.k_norm = RMSNorm(self.head_dim, eps=config["rms_norm_eps"], device=device, dtype=dtype) + + def forward(self, x, freqs, attention_mask=None): + batch_size, seq_length, _ = x.shape + + xq = self.q_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim) + xk = self.k_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim) + xv = self.v_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim) + + xq = self.q_norm(xq).transpose(1, 2) + xk = self.k_norm(xk).transpose(1, 2) + xv = rms_norm(xv) + + xq = _apply_vision_2d_rope(xq, freqs) + xk = _apply_vision_2d_rope(xk, freqs) + + xv = xv.to(xq.dtype).transpose(1, 2) + + output = optimized_attention_for_device(xq.device, mask=attention_mask is not None, small_input=True)(xq, xk, xv, self.num_heads, mask=attention_mask, skip_reshape=True, scale=1.0) + return self.o_proj(output) + + +class Gemma4VisionLayer(nn.Module): + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + self.self_attn = Gemma4VisionAttention(config, device=device, dtype=dtype, ops=ops) + self.mlp = Gemma4VisionMLP(config, device=device, dtype=dtype, ops=ops) + norm_kwargs = dict(eps=config["rms_norm_eps"], device=device, dtype=dtype) + hidden = config["hidden_size"] + self.input_layernorm = RMSNorm(hidden, **norm_kwargs) + self.post_attention_layernorm = RMSNorm(hidden, **norm_kwargs) + self.pre_feedforward_layernorm = RMSNorm(hidden, **norm_kwargs) + self.post_feedforward_layernorm = RMSNorm(hidden, **norm_kwargs) + + def forward(self, x, freqs, attention_mask=None): + residual = x + x = self.input_layernorm(x) + x = self.self_attn(x, freqs, attention_mask=attention_mask) + x = self.post_attention_layernorm(x) + x = residual + x + + residual = x + x = self.pre_feedforward_layernorm(x) + x = self.mlp(x) + x = self.post_feedforward_layernorm(x) + x = residual + x + return x + + +class Gemma4PatchEmbedder(nn.Module): + """Patch embedding with learned 2D position embeddings via one-hot lookup.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + hidden_size = config["hidden_size"] + patch_size = config["patch_size"] + self.patch_size = patch_size + self.position_embedding_size = config.get("position_embedding_size", 10240) + + self.input_proj = ops.Linear(3 * patch_size * patch_size, hidden_size, bias=False, device=device, dtype=dtype) + self.position_embedding_table = nn.Parameter( + torch.empty(2, self.position_embedding_size, hidden_size, device=device, dtype=dtype) + ) + + def forward(self, patches, pixel_position_ids): + """ + patches: [B, num_patches, 3*patch_size²] in [0,1] range (normalized to [-1,1] inside, matching HF) + pixel_position_ids: [B, num_patches, 2] with (x,y) positions, (-1,-1) for padding + """ + hidden_states = self.input_proj((2.0 * (patches - 0.5)).to(self.input_proj.weight.dtype)) + + clamped_positions = pixel_position_ids.clamp(min=0) + pos_table = comfy.model_management.cast_to_device(self.position_embedding_table, hidden_states.device, hidden_states.dtype) + position_embeddings = pos_table[0][clamped_positions[..., 0]] + pos_table[1][clamped_positions[..., 1]] + + # Zero out position embeddings for padding patches (matching HF) + padding_positions = (pixel_position_ids == -1).all(dim=-1) + position_embeddings = torch.where(padding_positions.unsqueeze(-1), 0.0, position_embeddings) + + return hidden_states + position_embeddings + + +class Gemma4VisionEncoderLayers(nn.Module): + """Wrapper to produce state dict keys as encoder.layers.X.*""" + def __init__(self, config, dtype=None, device=None, ops=None): + super().__init__() + self.layers = nn.ModuleList([ + Gemma4VisionLayer(config, device=device, dtype=dtype, ops=ops) + for _ in range(config["num_hidden_layers"]) + ]) + + +class Gemma4VisionEncoder(nn.Module): + def __init__(self, config, dtype=None, device=None, ops=None): + super().__init__() + self.config = config + self.hidden_size = config["hidden_size"] + self.head_dim = config.get("head_dim", config["hidden_size"] // config["num_attention_heads"]) + self.patch_size = config["patch_size"] + self.pooling_kernel_size = config.get("pooling_kernel_size", 3) + self.root_hidden_size = self.hidden_size ** 0.5 + + self.patch_embedder = Gemma4PatchEmbedder(config, device=device, dtype=dtype, ops=ops) + self.encoder = Gemma4VisionEncoderLayers(config, dtype=dtype, device=device, ops=ops) + + def forward(self, pixel_values, max_soft_tokens=None): + """ + pixel_values: [B, C, H, W] in [0,1] range + max_soft_tokens: if provided, pad to max_soft_tokens * k² total patches + """ + batch_size, _, height, width = pixel_values.shape + ps = self.patch_size + k = self.pooling_kernel_size + patches_h, patches_w = height // ps, width // ps + num_patches = patches_h * patches_w + output_length = max_soft_tokens if max_soft_tokens is not None else num_patches // (k * k) + n_padding = output_length * k * k - num_patches + + # Patchify and build position grid + patches = pixel_values.reshape(batch_size, -1, patches_h, ps, patches_w, ps) + patches = patches.permute(0, 2, 4, 3, 5, 1).reshape(batch_size, num_patches, -1) + grid_y, grid_x = torch.meshgrid(torch.arange(patches_h, device=pixel_values.device), torch.arange(patches_w, device=pixel_values.device), indexing='ij') + position_ids = torch.stack([grid_x.flatten(), grid_y.flatten()], dim=-1).unsqueeze(0).expand(batch_size, -1, -1) + + # Append zero-pixel padding with (-1,-1) positions + if n_padding > 0: + patches = torch.cat([patches, patches.new_zeros(batch_size, n_padding, patches.shape[-1])], dim=1) + position_ids = torch.cat([position_ids, position_ids.new_full((batch_size, n_padding, 2), -1)], dim=1) + + padding = (position_ids == -1).all(dim=-1) + + # Embed, encode, pool + x = self.patch_embedder(patches, position_ids) + freqs = _compute_vision_2d_rope(self.head_dim, position_ids, device=pixel_values.device) + freqs = tuple(t.to(x.dtype) for t in freqs) + if n_padding > 0: + mask = padding.unsqueeze(1).unsqueeze(2).expand(-1, 1, position_ids.shape[1], -1) + mask = torch.zeros_like(mask, dtype=x.dtype).masked_fill_(mask, torch.finfo(x.dtype).min) + else: + mask = None + + for layer in self.encoder.layers: + x = layer(x, freqs, attention_mask=mask) + + if n_padding > 0: + x = x.masked_fill(padding.unsqueeze(-1), 0.0) + + # Average pool by spatial position + clamped = position_ids.clamp(min=0) + max_x = clamped[:, :, 0].max(dim=-1, keepdim=True)[0] + 1 + ki = torch.div(clamped, k, rounding_mode="floor") + ki = ki[:, :, 0] + (max_x // k) * ki[:, :, 1] + weights = torch.nn.functional.one_hot(ki.long(), output_length).float() / (k * k) + x = (weights.transpose(1, 2) @ x.float()).to(x.dtype) + + # Strip empty output tokens + valid_out = ~((weights == 0).all(dim=1)) + if valid_out.any() and not valid_out.all(): + x = x[:, valid_out[0]] if batch_size > 1 else x[valid_out].unsqueeze(0) + + return x * self.root_hidden_size + + +class Gemma4RMSNormProjector(nn.Module): + """Shared projector: parameterless RMSNorm → linear. Used for both vision and audio.""" + def __init__(self, in_dim, out_dim, dtype=None, device=None, ops=None): + super().__init__() + self.embedding_projection = ops.Linear(in_dim, out_dim, bias=False, device=device, dtype=dtype) + + def forward(self, x): + return self.embedding_projection(rms_norm(x)) + + +class Gemma4MultiModalProjector(Gemma4RMSNormProjector): + def __init__(self, config, dtype=None, device=None, ops=None): + super().__init__(config.vision_config["hidden_size"], config.hidden_size, dtype=dtype, device=device, ops=ops) + + +# Audio Encoder + +class Gemma4AudioConvSubsampler(nn.Module): + """2D convolution subsampling for audio features""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + eps = config["rms_norm_eps"] + self.layer0 = nn.ModuleDict({ + 'conv': ops.Conv2d(1, 128, kernel_size=3, stride=2, padding=1, bias=False, device=device, dtype=dtype), + 'norm': ops.LayerNorm(128, eps=eps, elementwise_affine=True, bias=False, device=device, dtype=dtype), + }) + self.layer1 = nn.ModuleDict({ + 'conv': ops.Conv2d(128, 32, kernel_size=3, stride=2, padding=1, bias=False, device=device, dtype=dtype), + 'norm': ops.LayerNorm(32, eps=eps, elementwise_affine=True, bias=False, device=device, dtype=dtype), + }) + # proj_input_dim = (128 // 4) * 32 = 1024 + self.input_proj_linear = ops.Linear(1024, config["hidden_size"], bias=False, device=device, dtype=dtype) + + def _conv_layer(self, x, layer, mask): + if mask is not None: + x = x * mask[:, None, :, None].to(x.device) + x = layer['conv'](x.to(layer['conv'].weight.dtype)) + x = torch.relu(layer['norm'](x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2).contiguous()) + if mask is not None: + mask = mask[:, ::2] + return x, mask + + def forward(self, x, mask=None): + x = x.unsqueeze(1) + x, mask = self._conv_layer(x, self.layer0, mask) + x, mask = self._conv_layer(x, self.layer1, mask) + batch_size, _, seq_len, _ = x.shape + x = x.permute(0, 2, 3, 1).contiguous().reshape(batch_size, seq_len, -1) + return self.input_proj_linear(x), mask + + +class Gemma4AudioFeedForward(nn.Module): + """Conformer feed-forward with residual scaling.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + hidden_size = config["hidden_size"] + intermediate_size = config.get("intermediate_size", hidden_size * 4) + self.pre_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype) + self.ffw_layer_1 = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops) + self.ffw_layer_2 = ClippedLinear(intermediate_size, hidden_size, device=device, dtype=dtype, ops=ops) + self.post_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype) + self.post_layer_scale = config.get("residual_weight", 0.5) + + def forward(self, x): + residual = x + x = self.pre_layer_norm(x) + x = torch.nn.functional.silu(self.ffw_layer_1(x)) + x = self.ffw_layer_2(x) + x = self.post_layer_norm(x) + x = x * self.post_layer_scale + return x + residual + + +class Gemma4AudioRelPositionalEncoding(nn.Module): + """Sinusoidal relative positional encoding for audio attention.""" + def __init__(self, config, device=None, dtype=None): + super().__init__() + hidden_size = config["hidden_size"] + context_left = config.get("attention_context_left", 13) + context_right = config.get("attention_context_right", 0) + self.chunk_size = config.get("attention_chunk_size", 12) + self.context_size = self.chunk_size + context_left - 1 + context_right + + num_timescales = hidden_size // 2 + log_inc = math.log(10000.0) / max(num_timescales - 1, 1) + inv_timescales = torch.exp(torch.arange(num_timescales) * -log_inc).to(dtype=dtype).unsqueeze(0).unsqueeze(0) + self.register_buffer("inv_timescales", inv_timescales, persistent=False) + + def forward(self, hidden_states): + positions = torch.arange(self.chunk_size, -1, -1, device=hidden_states.device).unsqueeze(-1) + scaled = positions * self.inv_timescales.to(device=hidden_states.device) + return torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1).to(dtype=hidden_states.dtype) + + +class Gemma4AudioAttention(nn.Module): + """Chunked block attention with relative position bias and softcap.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + self.hidden_size = config["hidden_size"] + self.num_heads = config["num_attention_heads"] + self.head_dim = self.hidden_size // self.num_heads + self.chunk_size = config.get("attention_chunk_size", 12) + self.max_past_horizon = config.get("attention_context_left", 13) - 1 + self.max_future_horizon = config.get("attention_context_right", 0) + self.context_size = self.chunk_size + self.max_past_horizon + self.max_future_horizon + + self.q_scale = (self.head_dim ** -0.5) / math.log(2) + self.k_scale = math.log(1 + math.e) / math.log(2) + self.register_buffer("softcap", torch.tensor(config.get("attention_logit_cap", 50.0), dtype=dtype), persistent=False) + + self.q_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops) + self.k_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops) + self.v_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops) + self.post = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops) + self.per_dim_scale = nn.Parameter(torch.empty(self.head_dim, device=device, dtype=dtype)) + self.relative_k_proj = ops.Linear(self.hidden_size, self.hidden_size, bias=False, device=device, dtype=dtype) + + def _convert_to_block(self, x): + B, S, H, D = x.shape + num_blocks = (S + self.chunk_size - 1) // self.chunk_size + pad = num_blocks * self.chunk_size - S + x = torch.nn.functional.pad(x, (0, 0, 0, 0, 0, pad)) + return x.reshape(B, num_blocks, self.chunk_size, H, D).contiguous() + + def _extract_block_context(self, x): + x = torch.nn.functional.pad(x, (0, 0, 0, 0, self.max_past_horizon, self.max_future_horizon + self.chunk_size - 1)) + x = x.unfold(1, self.context_size, self.chunk_size) + return torch.movedim(x, -1, 2).contiguous() + + def _rel_shift(self, x): + B, H, NB, BS, PL = x.shape + CS = self.context_size + x = torch.nn.functional.pad(x, (0, CS + 1 - PL)) + x = x.view(B, H, NB, BS * (CS + 1)) + x = x[..., :BS * CS] + return x.view(B, H, NB, BS, CS) + + def _build_blocked_mask(self, seq_len, num_blocks, device, audio_mask=None): + """Build 5D boolean blocked attention mask (True=attend, False=mask)""" + q = torch.arange(seq_len, device=device) + dist = q[:, None] - q[None, :] + mask = (dist >= 0) & (dist < self.max_past_horizon) + if self.max_future_horizon > 0: + mask = mask | ((dist < 0) & ((-dist) < self.max_future_horizon)) + if audio_mask is not None: + mask = mask & audio_mask[0, None, :].bool() + m = mask[None, None] + # Reshape to blocked 5D matching reference code + p = num_blocks * self.chunk_size - seq_len + m = torch.nn.functional.pad(m, (0, p, 0, p), value=False) + m = m.reshape(1, 1, num_blocks, self.chunk_size, -1) + m = torch.nn.functional.pad(m, (self.max_past_horizon, self.max_future_horizon), value=False) + idx = (torch.arange(num_blocks, device=device) * self.chunk_size)[:, None] + torch.arange(self.context_size, device=device)[None, :] + return m.gather(-1, idx[None, None, :, None, :].expand(1, 1, -1, self.chunk_size, -1)) + + def forward(self, x, position_embeddings=None, attn_mask=None): + B, S, _ = x.shape + + q = self.q_proj(x).float().view(B, S, self.num_heads, self.head_dim) + k = self.k_proj(x).float().view(B, S, self.num_heads, self.head_dim) + v = self.v_proj(x).float().view(B, S, self.num_heads, self.head_dim) + + q = q * self.q_scale * torch.nn.functional.softplus(self.per_dim_scale) + k = k * self.k_scale + + q_blocks = self._convert_to_block(q) + k_context = self._extract_block_context(k) + v_context = self._extract_block_context(v) + num_blocks = q_blocks.shape[1] + + rel_k = self.relative_k_proj(position_embeddings).view(-1, self.num_heads, self.head_dim).to(q.dtype) + + queries = q_blocks.permute(0, 3, 1, 2, 4) # [B, H, NB, CS, D] + matrix_ac = queries @ k_context.permute(0, 3, 1, 4, 2) + + queries_flat = queries.reshape(B, self.num_heads, -1, self.head_dim) + matrix_bd = queries_flat @ rel_k.permute(1, 2, 0) + matrix_bd = matrix_bd.reshape(B, self.num_heads, num_blocks, self.chunk_size, -1) + matrix_bd = self._rel_shift(matrix_bd) + + attn_weights = matrix_ac + matrix_bd + attn_weights = torch.tanh(attn_weights / self.softcap) * self.softcap + + # Mask out invalid positions in chunk context (matching reference's masked_fill approach) + if attn_mask is None: + attn_mask = self._build_blocked_mask(S, num_blocks, x.device) + attn_weights = attn_weights.masked_fill(attn_mask.logical_not(), -1e9) + + attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(v.dtype) + out = attn_weights @ v_context.permute(0, 3, 1, 2, 4) + out = out.permute(0, 2, 3, 1, 4).reshape(B, num_blocks * self.chunk_size, -1) + out = out[:, :S].contiguous() + return self.post(out.to(self.post.linear.weight.dtype)) + + +class Gemma4AudioLConv1d(nn.Module): + """Lightweight convolution with standard GLU.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + hidden_size = config["hidden_size"] + conv_kernel_size = config.get("conv_kernel_size", 5) + self.pre_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype) + self.linear_start = ClippedLinear(hidden_size, hidden_size * 2, device=device, dtype=dtype, ops=ops) + # Causal conv: left-pad only + self.depthwise_conv1d = ops.Conv1d(hidden_size, hidden_size, kernel_size=conv_kernel_size, padding=0, groups=hidden_size, bias=False, device=device, dtype=dtype) + self.conv_left_pad = conv_kernel_size - 1 # causal: pad left by kernel-1 + self.conv_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype) + self.linear_end = ClippedLinear(hidden_size, hidden_size, device=device, dtype=dtype, ops=ops) + + def forward(self, x): + residual = x + x = self.pre_layer_norm(x) + x = self.linear_start(x) + x = torch.nn.functional.glu(x, dim=-1) + x = x.transpose(1, 2) + x = torch.nn.functional.pad(x, (self.conv_left_pad, 0)) + x = self.depthwise_conv1d(x).transpose(1, 2) + x = self.conv_norm(x) + x = torch.nn.functional.silu(x) + x = self.linear_end(x) + return x + residual + + +class Gemma4AudioLayer(nn.Module): + """Conformer block: FFN1 -> Attention -> LConv -> FFN2.""" + def __init__(self, config, device=None, dtype=None, ops=None): + super().__init__() + self.feed_forward1 = Gemma4AudioFeedForward(config, device=device, dtype=dtype, ops=ops) + self.self_attn = Gemma4AudioAttention(config, device=device, dtype=dtype, ops=ops) + norm_kwargs = dict(eps=config["rms_norm_eps"], device=device, dtype=dtype) + hidden_size = config["hidden_size"] + self.norm_pre_attn = RMSNorm(hidden_size, **norm_kwargs) + self.norm_post_attn = RMSNorm(hidden_size, **norm_kwargs) + self.lconv1d = Gemma4AudioLConv1d(config, device=device, dtype=dtype, ops=ops) + self.feed_forward2 = Gemma4AudioFeedForward(config, device=device, dtype=dtype, ops=ops) + self.norm_out = RMSNorm(hidden_size, **norm_kwargs) + + def forward(self, x, position_embeddings=None, attn_mask=None): + x = self.feed_forward1(x) + + residual = x + x = self.norm_pre_attn(x) + x = self.self_attn(x, position_embeddings=position_embeddings, attn_mask=attn_mask) + x = self.norm_post_attn(x) + x = x + residual + + x = self.lconv1d(x) + x = self.feed_forward2(x) + + x = self.norm_out(x) + return x + + +class Gemma4AudioEncoder(nn.Module): + def __init__(self, config, dtype=None, device=None, ops=None): + super().__init__() + self.hidden_size = config["hidden_size"] + self.output_proj_dims = config.get("output_proj_dims", 1536) + + self.subsample_conv_projection = Gemma4AudioConvSubsampler(config, device=device, dtype=dtype, ops=ops) + self.rel_pos_enc = Gemma4AudioRelPositionalEncoding(config, device=device, dtype=dtype) + + self.layers = nn.ModuleList([ + Gemma4AudioLayer(config, device=device, dtype=dtype, ops=ops) + for _ in range(config["num_hidden_layers"]) + ]) + + self.output_proj = ops.Linear(self.hidden_size, self.output_proj_dims, bias=True, device=device, dtype=dtype) + + def forward(self, audio_features, audio_mask=None): + x, audio_mask = self.subsample_conv_projection(audio_features, audio_mask) + position_embeddings = self.rel_pos_enc(x) + + # Build blocked attention mask once for all layers + attn_mask = self.layers[0].self_attn._build_blocked_mask( + x.shape[1], (x.shape[1] + self.layers[0].self_attn.chunk_size - 1) // self.layers[0].self_attn.chunk_size, + x.device, audio_mask=audio_mask) + + for layer in self.layers: + x = layer(x, position_embeddings=position_embeddings, attn_mask=attn_mask) + + x = self.output_proj(x) + return x + + +class Gemma4AudioProjector(Gemma4RMSNormProjector): + def __init__(self, config, dtype=None, device=None, ops=None): + super().__init__(config.get("audio_output_proj_dims", 1536), config.get("text_hidden_size", 2560), dtype=dtype, device=device, ops=ops) + + +# Tokenizer and Wrappers + +class Gemma4_Tokenizer(): + tokenizer_json_data = None + + def state_dict(self): + if self.tokenizer_json_data is not None: + return {"tokenizer_json": self.tokenizer_json_data} + return {} + + def _extract_mel_spectrogram(self, waveform, sample_rate): + """Extract 128-bin log mel spectrogram. + Uses numpy for FFT/matmul/log to produce bit-identical results with reference code. + """ + # Mix to mono first, then resample to 16kHz + if waveform.dim() > 1 and waveform.shape[0] > 1: + waveform = waveform.mean(dim=0, keepdim=True) + if waveform.dim() == 1: + waveform = waveform.unsqueeze(0) + audio = waveform.squeeze(0).float().numpy() + if sample_rate != 16000: + # Use scipy's resample_poly with a high-quality FIR filter to get as close as possible to librosa's resampling (while still not full match) + from scipy.signal import resample_poly, firwin + from math import gcd + g = gcd(sample_rate, 16000) + up, down = 16000 // g, sample_rate // g + L = max(up, down) + h = firwin(160 * L + 1, 0.96 / L, window=('kaiser', 6.5)) + audio = resample_poly(audio, up, down, window=h).astype(np.float32) + n = len(audio) + + # Pad to multiple of 128, build sample-level mask + if n % 128 != 0: + audio = np.pad(audio, (0, 128 - n % 128)) + mask_raw = np.ones(len(audio), dtype=np.float32) + mask_raw[n:] = 0.0 + + # Semicausal padding: 160 zeros prepended + audio = np.pad(audio, (160, 0)) + mask_raw = np.pad(mask_raw, (160, 0)) + + # Extract 321-sample frames via stride tricks, drop last → 320 + nf = (len(audio) - 321) // 160 + 1 + strides = (audio.strides[0] * 160, audio.strides[0]) + frames = np.lib.stride_tricks.as_strided(audio, (nf, 321), strides)[..., :-1].copy() + + # Periodic Hann window, FFT magnitude, mel filterbank, log + window = (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(320) / 320)).astype(np.float32) + magnitude = np.abs(np.fft.rfft(frames * window, n=512, axis=-1)) + mel_fb = self._build_mel_filterbank() + log_mel = np.log(np.matmul(magnitude, mel_fb) + np.float64(0.001)).astype(np.float32) + + # Frame mask: valid when last sample in window is real audio + mask = mask_raw[np.arange(nf) * 160 + 320].astype(bool) + log_mel = log_mel * mask[:, None] + return torch.from_numpy(log_mel), torch.from_numpy(mask) # [T, 128], [T] + + @staticmethod + def _build_mel_filterbank(): + """Build 128-bin HTK mel filterbank [257, 128] for 512-pt FFT at 16kHz.""" + mel_freqs = np.linspace(0.0, 2595.0 * np.log10(1.0 + 8000.0 / 700.0), 130) + filter_freqs = 700.0 * (10.0 ** (mel_freqs / 2595.0) - 1.0) + fft_freqs = np.linspace(0, 16000 // 2, 257) + filter_diff = np.diff(filter_freqs) + slopes = np.expand_dims(filter_freqs, 0) - np.expand_dims(fft_freqs, 1) + down_slopes = -slopes[:, :-2] / filter_diff[:-1] + up_slopes = slopes[:, 2:] / filter_diff[1:] + return np.maximum(np.zeros(1), np.minimum(down_slopes, up_slopes)) + + def tokenize_with_weights(self, text, return_word_ids=False, image=None, audio=None, video=None, llama_template=None, skip_template=True, thinking=False, **kwargs): + + # Process audio + audio_features = [] + if audio is not None: + waveform = audio["waveform"].squeeze(0) if hasattr(audio, "__getitem__") else audio + sample_rate = audio.get("sample_rate", 16000) if hasattr(audio, "get") else 16000 + mel, mel_mask = self._extract_mel_spectrogram(waveform, sample_rate) + audio_features = [(mel.unsqueeze(0), mel_mask.unsqueeze(0))] # ([1, T, 128], [1, T]) + + # Process image/video frames + is_video = video is not None + source = video if is_video else image + images = [] + if source is not None: + samples = source.movedim(-1, 1) # [B, C, H, W] + num_frames = samples.shape[0] + + # Subsample video to 1fps + if is_video: + fps = kwargs.get("fps", 24) + step = max(1, round(fps)) + indices = list(range(0, num_frames, step)) + if len(indices) == 0: + indices = [0] + samples = samples[indices] + num_frames = len(indices) + + h, w = samples.shape[2], samples.shape[3] + patch_size = 16 + pooling_k = 3 + max_soft_tokens = 70 if is_video else 280 # video uses smaller token budget per frame + max_patches = max_soft_tokens * pooling_k * pooling_k + target_px = max_patches * patch_size * patch_size + factor = (target_px / (h * w)) ** 0.5 + side_mult = pooling_k * patch_size + target_h = max(int(factor * h // side_mult) * side_mult, side_mult) + target_w = max(int(factor * w // side_mult) * side_mult, side_mult) + + import torchvision.transforms.functional as TVF + for i in range(num_frames): + # rescaling to match reference code + s = (samples[i].clamp(0, 1) * 255).to(torch.uint8) # [C, H, W] uint8 + if target_h != h or target_w != w: + s = TVF.resize(s, [target_h, target_w], interpolation=TVF.InterpolationMode.BICUBIC, antialias=True) + s = s.float() * (1.0 / 255.0) + images.append({"pixels": s.unsqueeze(0).movedim(1, -1)[:, :, :, :3], "max_soft_tokens": max_soft_tokens}) + + if text.startswith('<|turn>'): + skip_template = True + + if skip_template: + llama_text = text + else: + if llama_template is not None: + llama_text = llama_template.format(text) + else: + # Build template from modalities present + system = "<|turn>system\n<|think|>\n" if thinking else "" + media = "" + if len(images) > 0: + if is_video: + media += "\n\n" + for i in range(len(images)): + ts = f"{int(i // 60):02d}:{int(i % 60):02d}" + sep = "" if i == 0 else " " + media += f"{sep}{ts} <|image><|video|>" + media += "\n\n" + else: + media += "\n\n" + for i in range(len(images)): + if i > 0: + media += "\n\n\n\n" + media += "<|image><|image|>" + media += "\n\n" + if len(audio_features) > 0: + # Compute audio token count (always at 16kHz) + num_samples = int(waveform.shape[-1] * 16000 / sample_rate) if sample_rate != 16000 else waveform.shape[-1] + _fl = 320 # int(round(16000 * 20.0 / 1000.0)) + _hl = 160 # int(round(16000 * 10.0 / 1000.0)) + _nmel = (num_samples + _fl // 2 - (_fl + 1)) // _hl + 1 + _t = _nmel + for _ in range(2): + _t = (_t + 2 - 3) // 2 + 1 + n_audio_tokens = min(_t, 750) + media += "<|audio>" + "<|audio|>" * n_audio_tokens + "" + llama_text = f"{system}<|turn>user\n{media}{text}\n<|turn>model\n" + + text_tokens = super().tokenize_with_weights(llama_text, return_word_ids) + + def _replace_placeholders(token_list, token_id, embeds): + """Replace first placeholder with embed dict, remove remaining consecutive ones.""" + embed_idx = 0 + i = 0 + while i < len(token_list): + if token_list[i][0] == token_id and embed_idx < len(embeds): + token_list[i] = (embeds[embed_idx],) + token_list[i][1:] + embed_idx += 1 + i += 1 + while i < len(token_list) and token_list[i][0] == token_id: + token_list.pop(i) + else: + i += 1 + + if len(images) > 0: + img_token_id = 258884 if is_video else 258880 + img_embeds = [{"type": "image", "data": img["pixels"], "max_soft_tokens": img["max_soft_tokens"]} for img in images] + for r in text_tokens: + _replace_placeholders(r, img_token_id, img_embeds) + + if len(audio_features) > 0: + aud_embeds = [{"type": "audio", "data": mel, "mask": mask} for mel, mask in audio_features] + for r in text_tokens: + _replace_placeholders(r, 258881, aud_embeds) + + return text_tokens + + +class _Gemma4Tokenizer: + """Tokenizer using the tokenizers (Gemma4 doesn't come with sentencepiece model)""" + def __init__(self, tokenizer_json_bytes=None, **kwargs): + from tokenizers import Tokenizer + if isinstance(tokenizer_json_bytes, torch.Tensor): + tokenizer_json_bytes = bytes(tokenizer_json_bytes.tolist()) + self.tokenizer = Tokenizer.from_str(tokenizer_json_bytes.decode("utf-8")) + + @classmethod + def from_pretrained(cls, tokenizer_data, **kwargs): + return cls(tokenizer_json_bytes=tokenizer_data, **kwargs) + + def __call__(self, text): + return {"input_ids": self.tokenizer.encode(text, add_special_tokens=False).ids} + + def get_vocab(self): + return self.tokenizer.get_vocab() + + def convert_tokens_to_ids(self, tokens): + return [self.tokenizer.token_to_id(t) for t in tokens] + + def decode(self, ids, **kwargs): + return self.tokenizer.decode(ids, skip_special_tokens=kwargs.get("skip_special_tokens", False)) + + +# Tokenizer +class Gemma4SDTokenizer(Gemma4_Tokenizer, sd1_clip.SDTokenizer): + embedding_size = 2560 + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer_json = tokenizer_data.get("tokenizer_json", None) + self.tokenizer_json_data = tokenizer_json + super().__init__(tokenizer_json, pad_with_end=False, embedding_size=self.embedding_size, embedding_key='gemma4', tokenizer_class=_Gemma4Tokenizer, has_start_token=True, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_left=True, disable_weights=True, start_token=2, tokenizer_data=tokenizer_data) + + def decode(self, token_ids, **kwargs): + text = super().decode(token_ids, skip_special_tokens=False) + # Translate thinking channel markers to standard / tags + text = text.replace("<|channel>thought\n", "\n") + text = text.replace("", "") + # Strip remaining special tokens + text = text.replace("", "").replace("", "").strip() + return text + + +class Gemma4Tokenizer(sd1_clip.SD1Tokenizer): + tokenizer_class = Gemma4SDTokenizer + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma4", tokenizer=self.tokenizer_class) + + +# Model wrappers +class Gemma4Model(sd1_clip.SDClipModel): + model_class = None + def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}): + self.dtypes = set() + self.dtypes.add(dtype) + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=self.model_class, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + + def process_tokens(self, tokens, device): + embeds, _, _, _ = super().process_tokens(tokens, device) + return embeds + + def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=0.0): + if isinstance(tokens, dict): + tokens = next(iter(tokens.values())) + tokens_only = [[t[0] for t in b] for b in tokens] + embeds, _, _, embeds_info = sd1_clip.SDClipModel.process_tokens(self, tokens_only, self.execution_device) + seq_len = embeds.shape[1] + ids = [0] * seq_len + expanded_idx = 0 + embed_map = {info["index"]: info["size"] for info in embeds_info} + for t in tokens_only[0]: + if expanded_idx in embed_map: + expanded_idx += embed_map[expanded_idx] + elif isinstance(t, int): + if expanded_idx < seq_len: + ids[expanded_idx] = t + expanded_idx += 1 + else: + expanded_idx += 1 + initial_token_ids = [ids] + input_ids = torch.tensor(initial_token_ids, device=self.execution_device) + return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, initial_tokens=initial_token_ids[0], presence_penalty=presence_penalty, initial_input_ids=input_ids) + + +def gemma4_te(dtype_llama=None, llama_quantization_metadata=None, model_class=None): + clip_model = type('Gemma4Model_', (Gemma4Model,), {'model_class': model_class}) + class Gemma4TEModel_(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + if dtype_llama is not None: + dtype = dtype_llama + super().__init__(device=device, dtype=dtype, name="gemma4", clip_model=clip_model, model_options=model_options) + return Gemma4TEModel_ + + +# Variants + +def _make_variant(config_cls): + audio = config_cls.audio_config is not None + bases = (Gemma4AudioMixin, Gemma4Base) if audio else (Gemma4Base,) + class Variant(*bases): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + self._init_model(config_cls(**config_dict), dtype, device, operations) + if audio: + self._init_audio(self.model.config, dtype, device, operations) + embedding_size = config_cls.hidden_size + if embedding_size != Gemma4SDTokenizer.embedding_size: + tok_cls = type('T', (Gemma4SDTokenizer,), {'embedding_size': embedding_size}) + class Tokenizer(Gemma4Tokenizer): + tokenizer_class = tok_cls + Variant.tokenizer = Tokenizer + else: + Variant.tokenizer = Gemma4Tokenizer + return Variant + +Gemma4_E4B = _make_variant(Gemma4Config) +Gemma4_E2B = _make_variant(Gemma4_E2B_Config) +Gemma4_31B = _make_variant(Gemma4_31B_Config) diff --git a/comfy/text_encoders/hidream_o1.py b/comfy/text_encoders/hidream_o1.py new file mode 100644 index 000000000..5d287b784 --- /dev/null +++ b/comfy/text_encoders/hidream_o1.py @@ -0,0 +1,119 @@ +"""HiDream-O1-Image tokenizer-only text encoder. + +The real Qwen3-VL backbone runs inside diffusion_model.* every step, so this +module just tokenizes the prompt into text_input_ids and emits them as +conditioning. Position ids / token_types / vinput_mask depend on target H/W +and are built later in model_base.HiDreamO1.extra_conds. +""" + +import os + +import torch +from transformers import Qwen2Tokenizer + +from comfy import sd1_clip + + +# Qwen3-VL special tokens +IM_START_ID = 151644 +IM_END_ID = 151645 +ASSISTANT_ID = 77091 +USER_ID = 872 +NEWLINE_ID = 198 +VISION_START_ID = 151652 +VISION_END_ID = 151653 +IMAGE_TOKEN_ID = 151655 +VIDEO_TOKEN_ID = 151656 +# HiDream-O1-specific tokens +BOI_TOKEN_ID = 151669 +BOR_TOKEN_ID = 151670 +EOR_TOKEN_ID = 151671 +BOT_TOKEN_ID = 151672 +TMS_TOKEN_ID = 151673 + + +class HiDreamO1QwenTokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer" + ) + super().__init__( + tokenizer_path, + pad_with_end=False, + embedding_size=4096, + embedding_key="hidream_o1", + tokenizer_class=Qwen2Tokenizer, + has_start_token=False, + has_end_token=False, + pad_to_max_length=False, + max_length=99999999, + min_length=1, + pad_token=151643, + tokenizer_data=tokenizer_data, + ) + + +class HiDreamO1Tokenizer(sd1_clip.SD1Tokenizer): + """Wraps prompt in the upstream chat template ending with boi/tms markers. + Image tokens get spliced in at sample time once target H/W is known. + """ + + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__( + embedding_directory=embedding_directory, + tokenizer_data=tokenizer_data, + name="hidream_o1", + tokenizer=HiDreamO1QwenTokenizer, + ) + + def tokenize_with_weights(self, text, return_word_ids=False, **kwargs): + text_tokens_dict = super().tokenize_with_weights( + text, return_word_ids=return_word_ids, disable_weights=True, **kwargs + ) + text_tuples = text_tokens_dict["hidream_o1"][0] + text_tuples = [t for t in text_tuples if int(t[0]) != 151643] # strip pad + + # <|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n<|boi|><|tms|> + def tok(tid): + return (tid, 1.0) if not return_word_ids else (tid, 1.0, 0) + + prefix = [tok(IM_START_ID), tok(USER_ID), tok(NEWLINE_ID)] + suffix = [ + tok(IM_END_ID), tok(NEWLINE_ID), + tok(IM_START_ID), tok(ASSISTANT_ID), tok(NEWLINE_ID), + tok(BOI_TOKEN_ID), tok(TMS_TOKEN_ID), + ] + full = prefix + list(text_tuples) + suffix + return {"hidream_o1": [full]} + + +class HiDreamO1TE(torch.nn.Module): + """Passthrough TE: emits int token ids; the Qwen3-VL backbone in diffusion_model does the actual encoding.""" + + def __init__(self, device="cpu", dtype=None, model_options={}): + super().__init__() + self.dtypes = {torch.float32} + self.disable_offload = True # skips dynamic VRAM management for this zero-parameter module + self.device = torch.device("cpu") if device is None else torch.device(device) + + def encode_token_weights(self, token_weight_pairs): + tok_pairs = token_weight_pairs["hidream_o1"][0] + ids = [int(t[0]) for t in tok_pairs] + input_ids = torch.tensor([ids], dtype=torch.long) + # Surrogate keeps the cross_attn slot non-empty for CONDITIONING + # plumbing; the model reads text_input_ids out of `extra` instead. + cross_attn = input_ids.unsqueeze(-1).to(torch.float32) + extra = {"text_input_ids": input_ids} + return cross_attn, None, extra + + def load_sd(self, sd): + return [] + + def get_sd(self): + return {} + + def reset_clip_options(self): + pass + + def set_clip_options(self, options): + pass diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py index 06f2fbf74..5087228ca 100644 --- a/comfy/text_encoders/llama.py +++ b/comfy/text_encoders/llama.py @@ -60,6 +60,30 @@ class Mistral3Small24BConfig: final_norm: bool = True lm_head: bool = False +@dataclass +class Ministral3_3BConfig: + vocab_size: int = 131072 + hidden_size: int = 3072 + intermediate_size: int = 9216 + num_hidden_layers: int = 26 + num_attention_heads: int = 32 + num_key_value_heads: int = 8 + max_position_embeddings: int = 262144 + rms_norm_eps: float = 1e-5 + rope_theta: float = 1000000.0 + transformer_type: str = "llama" + head_dim = 128 + rms_norm_add = False + mlp_activation = "silu" + qkv_bias = False + rope_dims = None + q_norm = None + k_norm = None + rope_scale = None + final_norm: bool = True + lm_head: bool = False + stop_tokens = [2] + @dataclass class Qwen25_3BConfig: vocab_size: int = 151936 @@ -373,7 +397,7 @@ class RMSNorm(nn.Module): -def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None): +def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None, interleaved_mrope=False): if not isinstance(theta, list): theta = [theta] @@ -391,16 +415,27 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_di inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) position_ids_expanded = position_ids[:, None, :].float() freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) - emb = torch.cat((freqs, freqs), dim=-1) - cos = emb.cos() - sin = emb.sin() - if rope_dims is not None and position_ids.shape[0] > 1: - mrope_section = rope_dims * 2 - cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0) - sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0) + if rope_dims is not None and position_ids.shape[0] > 1 and interleaved_mrope: + # Qwen3-VL interleaved MRoPE: T-freqs by default, H/W replace every 3rd dim. + freqs_inter = freqs[0].clone() + for axis_idx, offset in ((1, 1), (2, 2)): + length = rope_dims[axis_idx] * 3 + idx = slice(offset, length, 3) + freqs_inter[..., idx] = freqs[axis_idx, ..., idx] + emb = torch.cat((freqs_inter, freqs_inter), dim=-1) + cos = emb.cos().unsqueeze(0) + sin = emb.sin().unsqueeze(0) else: - cos = cos.unsqueeze(1) - sin = sin.unsqueeze(1) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() + sin = emb.sin() + if rope_dims is not None and position_ids.shape[0] > 1: + mrope_section = rope_dims * 2 + cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0) + sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(0) + else: + cos = cos.unsqueeze(1) + sin = sin.unsqueeze(1) sin_split = sin.shape[-1] // 2 out.append((cos, sin[..., : sin_split], -sin[..., sin_split :])) @@ -497,7 +532,7 @@ class Attention(nn.Module): else: present_key_value = (xk, xv, index + num_tokens) - if sliding_window is not None and xk.shape[2] > sliding_window: + if sliding_window is not None and xk.shape[2] > sliding_window and seq_length == 1: xk = xk[:, :, -sliding_window:] xv = xv[:, :, -sliding_window:] attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None @@ -509,12 +544,12 @@ class Attention(nn.Module): return self.o_proj(output), present_key_value class MLP(nn.Module): - def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None): + def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None, intermediate_size=None): super().__init__() - ops = ops or nn - self.gate_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype) - self.up_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype) - self.down_proj = ops.Linear(config.intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype) + intermediate_size = intermediate_size or config.intermediate_size + self.gate_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype) + self.up_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype) + self.down_proj = ops.Linear(intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype) if config.mlp_activation == "silu": self.activation = torch.nn.functional.silu elif config.mlp_activation == "gelu_pytorch_tanh": @@ -623,24 +658,25 @@ class TransformerBlockGemma2(nn.Module): return x, present_key_value +def _make_scaled_embedding(ops, vocab_size, hidden_size, scale, device, dtype): + class ScaledEmbedding(ops.Embedding): + def forward(self, input_ids, out_dtype=None): + return super().forward(input_ids, out_dtype=out_dtype) * scale + return ScaledEmbedding(vocab_size, hidden_size, device=device, dtype=dtype) + + class Llama2_(nn.Module): def __init__(self, config, device=None, dtype=None, ops=None): super().__init__() self.config = config self.vocab_size = config.vocab_size - self.embed_tokens = ops.Embedding( - config.vocab_size, - config.hidden_size, - device=device, - dtype=dtype - ) if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3": transformer = TransformerBlockGemma2 - self.normalize_in = True + self.embed_tokens = _make_scaled_embedding(ops, config.vocab_size, config.hidden_size, config.hidden_size ** 0.5, device, dtype) else: transformer = TransformerBlock - self.normalize_in = False + self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype) self.layers = nn.ModuleList([ transformer(config, index=i, device=device, dtype=dtype, ops=ops) @@ -664,17 +700,15 @@ class Llama2_(nn.Module): self.config.rope_theta, self.config.rope_scale, self.config.rope_dims, + interleaved_mrope=getattr(self.config, "interleaved_mrope", False), device=device) - def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None): + def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None): if embeds is not None: x = embeds else: x = self.embed_tokens(x, out_dtype=dtype) - if self.normalize_in: - x *= self.config.hidden_size ** 0.5 - seq_len = x.shape[1] past_len = 0 if past_key_values is not None and len(past_key_values) > 0: @@ -826,7 +860,7 @@ class BaseGenerate: torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0)) return past_key_values - def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0): + def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None): device = embeds.device if stop_tokens is None: @@ -851,14 +885,16 @@ class BaseGenerate: pbar = comfy.utils.ProgressBar(max_length) # Generation loop + current_input_ids = initial_input_ids for step in tqdm(range(max_length), desc="Generating tokens"): - x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values) + x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids) logits = self.logits(x)[:, -1] next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty) token_id = next_token[0].item() generated_token_ids.append(token_id) embeds = self.model.embed_tokens(next_token).to(execution_dtype) + current_input_ids = next_token if initial_input_ids is not None else None pbar.update(1) if token_id in stop_tokens: @@ -946,6 +982,15 @@ class Mistral3Small24B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype +class Ministral3_3B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Ministral3_3BConfig(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + class Qwen25_3B(BaseLlama, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py index 5aee1f4c0..bc5cbae28 100644 --- a/comfy/text_encoders/lt.py +++ b/comfy/text_encoders/lt.py @@ -93,8 +93,7 @@ class Gemma3_12BModel(sd1_clip.SDClipModel): def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty): tokens_only = [[t[0] for t in b] for b in tokens] - embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device) - comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5) + embeds, _, _, _ = self.process_tokens(tokens_only, self.execution_device) return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106], presence_penalty=presence_penalty) # 106 is class DualLinearProjection(torch.nn.Module): diff --git a/comfy/text_encoders/lumina2.py b/comfy/text_encoders/lumina2.py index 01ebdfabe..b1f1dbb9f 100644 --- a/comfy/text_encoders/lumina2.py +++ b/comfy/text_encoders/lumina2.py @@ -50,8 +50,7 @@ class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel): super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) def process_tokens(self, tokens, device): - embeds, _, _, embeds_info = super().process_tokens(tokens, device) - comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5) + embeds, _, _, _ = super().process_tokens(tokens, device) return embeds class LuminaModel(sd1_clip.SD1ClipModel): diff --git a/comfy/text_encoders/qwen35.py b/comfy/text_encoders/qwen35.py index ce9b07464..416ce9d18 100644 --- a/comfy/text_encoders/qwen35.py +++ b/comfy/text_encoders/qwen35.py @@ -408,8 +408,6 @@ class Qwen35Transformer(Llama2_): nn.Module.__init__(self) self.config = config self.vocab_size = config.vocab_size - self.normalize_in = False - self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype) self.layers = nn.ModuleList([ Qwen35TransformerBlock(config, index=i, device=device, dtype=dtype, ops=ops) @@ -453,9 +451,8 @@ class Qwen35VisionPatchEmbed(nn.Module): self.proj = ops.Conv3d(self.in_channels, self.embed_dim, kernel_size=kernel_size, stride=kernel_size, bias=True, device=device, dtype=dtype) def forward(self, x): - target_dtype = self.proj.weight.dtype x = x.view(-1, self.in_channels, self.temporal_patch_size, self.patch_size, self.patch_size) - return self.proj(x.to(target_dtype)).view(-1, self.embed_dim) + return self.proj(x).view(-1, self.embed_dim) class Qwen35VisionMLP(nn.Module): @@ -653,7 +650,7 @@ class Qwen35VisionModel(nn.Module): x = self.patch_embed(x) pos_embeds = self.fast_pos_embed_interpolate(grid_thw).to(x.device) x = x + pos_embeds - rotary_pos_emb = self.rot_pos_emb(grid_thw) + rotary_pos_emb = self.rot_pos_emb(grid_thw).to(x.device) seq_len = x.shape[0] x = x.reshape(seq_len, -1) rotary_pos_emb = rotary_pos_emb.reshape(seq_len, -1) @@ -763,7 +760,7 @@ class Qwen35ImageTokenizer(sd1_clip.SD1Tokenizer): def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=False, **kwargs): image = kwargs.get("image", None) if image is not None and len(images) == 0: - images = [image] + images = [image[i:i + 1] for i in range(image.shape[0])] skip_template = False if text.startswith('<|im_start|>'): @@ -774,13 +771,16 @@ class Qwen35ImageTokenizer(sd1_clip.SD1Tokenizer): if skip_template: llama_text = text else: - if llama_template is None: - if len(images) > 0: - llama_text = self.llama_template_images.format(text) - else: - llama_text = self.llama_template.format(text) + if llama_template is not None: + template = llama_template + elif len(images) == 0: + template = self.llama_template else: - llama_text = llama_template.format(text) + template = self.llama_template_images + if len(images) > 1: + vision_block = "<|vision_start|><|image_pad|><|vision_end|>" + template = template.replace(vision_block, vision_block * len(images), 1) + llama_text = template.format(text) if not thinking: llama_text += "\n\n" diff --git a/comfy/text_encoders/sam3_clip.py b/comfy/text_encoders/sam3_clip.py new file mode 100644 index 000000000..11cb7d9db --- /dev/null +++ b/comfy/text_encoders/sam3_clip.py @@ -0,0 +1,97 @@ +import re +from comfy import sd1_clip + +SAM3_CLIP_CONFIG = { + "architectures": ["CLIPTextModel"], + "hidden_act": "quick_gelu", + "hidden_size": 1024, + "intermediate_size": 4096, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "max_position_embeddings": 32, + "projection_dim": 512, + "vocab_size": 49408, + "layer_norm_eps": 1e-5, + "eos_token_id": 49407, +} + + +class SAM3ClipModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + super().__init__(device=device, dtype=dtype, max_length=32, layer="last", textmodel_json_config=SAM3_CLIP_CONFIG, special_tokens={"start": 49406, "end": 49407, "pad": 0}, return_projected_pooled=False, return_attention_masks=True, enable_attention_masks=True, model_options=model_options) + + +class SAM3Tokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(max_length=32, pad_with_end=False, pad_token=0, embedding_directory=embedding_directory, embedding_size=1024, embedding_key="sam3_clip", tokenizer_data=tokenizer_data) + self.disable_weights = True + + +def _parse_prompts(text): + """Split comma-separated prompts with optional :N max detections per category""" + text = text.replace("(", "").replace(")", "") + parts = [p.strip() for p in text.split(",") if p.strip()] + result = [] + for part in parts: + m = re.match(r'^(.+?)\s*:\s*([\d.]+)\s*$', part) + if m: + text_part = m.group(1).strip() + val = m.group(2) + max_det = max(1, round(float(val))) + result.append((text_part, max_det)) + else: + result.append((part, 1)) + return result + + +class SAM3TokenizerWrapper(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="l", tokenizer=SAM3Tokenizer, name="sam3_clip") + + def tokenize_with_weights(self, text: str, return_word_ids=False, **kwargs): + parsed = _parse_prompts(text) + if len(parsed) <= 1 and (not parsed or parsed[0][1] == 1): + return super().tokenize_with_weights(text, return_word_ids, **kwargs) + # Tokenize each prompt part separately, store per-part batches and metadata + inner = getattr(self, self.clip) + per_prompt = [] + for prompt_text, max_det in parsed: + batches = inner.tokenize_with_weights(prompt_text, return_word_ids, **kwargs) + per_prompt.append((batches, max_det)) + # Main output uses first prompt's tokens (for compatibility) + out = {self.clip_name: per_prompt[0][0], "sam3_per_prompt": per_prompt} + return out + + +class SAM3ClipModelWrapper(sd1_clip.SD1ClipModel): + def __init__(self, device="cpu", dtype=None, model_options={}, **kwargs): + super().__init__(device=device, dtype=dtype, model_options=model_options, clip_name="l", clip_model=SAM3ClipModel, name="sam3_clip") + + def encode_token_weights(self, token_weight_pairs): + per_prompt = token_weight_pairs.pop("sam3_per_prompt", None) + if per_prompt is None: + return super().encode_token_weights(token_weight_pairs) + + # Encode each prompt separately, pack into extra dict + inner = getattr(self, self.clip) + multi_cond = [] + first_pooled = None + for batches, max_det in per_prompt: + out = inner.encode_token_weights(batches) + cond, pooled = out[0], out[1] + extra = out[2] if len(out) > 2 else {} + if first_pooled is None: + first_pooled = pooled + multi_cond.append({ + "cond": cond, + "attention_mask": extra.get("attention_mask"), + "max_detections": max_det, + }) + + # Return first prompt as main (for non-SAM3 consumers), all prompts in metadata + main = multi_cond[0] + main_extra = {} + if main["attention_mask"] is not None: + main_extra["attention_mask"] = main["attention_mask"] + main_extra["sam3_multi_cond"] = multi_cond + return (main["cond"], first_pooled, main_extra) diff --git a/comfy/utils.py b/comfy/utils.py index 78c491b98..66682690a 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -1164,12 +1164,18 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am o = out o_d = out_div + ps_view = ps + mask_view = mask for d in range(dims): - o = o.narrow(d + 2, upscaled[d], mask.shape[d + 2]) - o_d = o_d.narrow(d + 2, upscaled[d], mask.shape[d + 2]) + l = min(ps_view.shape[d + 2], o.shape[d + 2] - upscaled[d]) + o = o.narrow(d + 2, upscaled[d], l) + o_d = o_d.narrow(d + 2, upscaled[d], l) + if l < ps_view.shape[d + 2]: + ps_view = ps_view.narrow(d + 2, 0, l) + mask_view = mask_view.narrow(d + 2, 0, l) - o.add_(ps * mask) - o_d.add_(mask) + o.add_(ps_view * mask_view) + o_d.add_(mask_view) if pbar is not None: pbar.update(1) @@ -1196,7 +1202,7 @@ def model_trange(*args, **kwargs): pbar.i1_time = time.time() pbar.set_postfix_str(" Model Initialization complete! ") elif pbar._i == 2: - #bring forward the effective start time based the the diff between first and second iteration + #bring forward the effective start time based the diff between first and second iteration #to attempt to remove load overhead from the final step rate estimate. pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time) pbar.set_postfix_str("") @@ -1390,7 +1396,7 @@ def convert_old_quants(state_dict, model_prefix="", metadata={}): k_out = "{}.weight_scale".format(layer) if layer is not None: - layer_conf = {"format": "float8_e4m3fn"} # TODO: check if anyone did some non e4m3fn scaled checkpoints + layer_conf = {"format": "float8_e4m3fn"} if full_precision_matrix_mult: layer_conf["full_precision_matrix_mult"] = full_precision_matrix_mult layers[layer] = layer_conf @@ -1446,10 +1452,3 @@ def deepcopy_list_dict(obj, memo=None): memo[obj_id] = res return res -def normalize_image_embeddings(embeds, embeds_info, scale_factor): - """Normalize image embeddings to match text embedding scale""" - for info in embeds_info: - if info.get("type") == "image": - start_idx = info["index"] - end_idx = start_idx + info["size"] - embeds[:, start_idx:end_idx, :] /= scale_factor diff --git a/comfy_api/feature_flags.py b/comfy_api/feature_flags.py index 9f6918315..adb5a3144 100644 --- a/comfy_api/feature_flags.py +++ b/comfy_api/feature_flags.py @@ -5,12 +5,95 @@ This module handles capability negotiation between frontend and backend, allowing graceful protocol evolution while maintaining backward compatibility. """ -from typing import Any +import logging +from typing import Any, TypedDict from comfy.cli_args import args + +class FeatureFlagInfo(TypedDict): + type: str + default: Any + description: str + + +# Registry of known CLI-settable feature flags. +# Launchers can query this via --list-feature-flags to discover valid flags. +CLI_FEATURE_FLAG_REGISTRY: dict[str, FeatureFlagInfo] = { + "show_signin_button": { + "type": "bool", + "default": False, + "description": "Show the sign-in button in the frontend even when not signed in", + }, +} + + +def _coerce_bool(v: str) -> bool: + """Strict bool coercion: only 'true'/'false' (case-insensitive). + + Anything else raises ValueError so the caller can warn and drop the flag, + rather than silently treating typos like 'ture' or 'yes' as False. + """ + lower = v.lower() + if lower == "true": + return True + if lower == "false": + return False + raise ValueError(f"expected 'true' or 'false', got {v!r}") + + +_COERCE_FNS: dict[str, Any] = { + "bool": _coerce_bool, + "int": lambda v: int(v), + "float": lambda v: float(v), +} + + +def _coerce_flag_value(key: str, raw_value: str) -> Any: + """Coerce a raw string value using the registry type, or keep as string. + + Returns the raw string if the key is unregistered or the type is unknown. + Raises ValueError/TypeError if the key is registered with a known type but + the value cannot be coerced; callers are expected to warn and drop the flag. + """ + info = CLI_FEATURE_FLAG_REGISTRY.get(key) + if info is None: + return raw_value + coerce = _COERCE_FNS.get(info["type"]) + if coerce is None: + return raw_value + return coerce(raw_value) + + +def _parse_cli_feature_flags() -> dict[str, Any]: + """Parse --feature-flag key=value pairs from CLI args into a dict. + + Items without '=' default to the value 'true' (bare flag form). + Flags whose value cannot be coerced to the registered type are dropped + with a warning, so a typo like '--feature-flag some_bool=ture' does not + silently take effect as the wrong value. + """ + result: dict[str, Any] = {} + for item in getattr(args, "feature_flag", []): + key, sep, raw_value = item.partition("=") + key = key.strip() + if not key: + continue + if not sep: + raw_value = "true" + try: + result[key] = _coerce_flag_value(key, raw_value.strip()) + except (ValueError, TypeError) as e: + info = CLI_FEATURE_FLAG_REGISTRY.get(key, {}) + logging.warning( + "Could not coerce --feature-flag %s=%r to %s (%s); dropping flag.", + key, raw_value.strip(), info.get("type", "?"), e, + ) + return result + + # Default server capabilities -SERVER_FEATURE_FLAGS: dict[str, Any] = { +_CORE_FEATURE_FLAGS: dict[str, Any] = { "supports_preview_metadata": True, "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes "extension": {"manager": {"supports_v4": True}}, @@ -18,6 +101,11 @@ SERVER_FEATURE_FLAGS: dict[str, Any] = { "assets": args.enable_assets, } +# CLI-provided flags cannot overwrite core flags +_cli_flags = {k: v for k, v in _parse_cli_feature_flags().items() if k not in _CORE_FEATURE_FLAGS} + +SERVER_FEATURE_FLAGS: dict[str, Any] = {**_CORE_FEATURE_FLAGS, **_cli_flags} + def get_connection_feature( sockets_metadata: dict[str, dict[str, Any]], diff --git a/comfy_api/input/__init__.py b/comfy_api/input/__init__.py index 16d4acfd1..dc33533cc 100644 --- a/comfy_api/input/__init__.py +++ b/comfy_api/input/__init__.py @@ -9,6 +9,7 @@ from comfy_api.latest._input import ( CurveInput, MonotoneCubicCurve, LinearCurve, + RangeInput, ) __all__ = [ @@ -21,4 +22,5 @@ __all__ = [ "CurveInput", "MonotoneCubicCurve", "LinearCurve", + "RangeInput", ] diff --git a/comfy_api/latest/_input/__init__.py b/comfy_api/latest/_input/__init__.py index 05cd3d40a..f0229717e 100644 --- a/comfy_api/latest/_input/__init__.py +++ b/comfy_api/latest/_input/__init__.py @@ -1,5 +1,6 @@ from .basic_types import ImageInput, AudioInput, MaskInput, LatentInput from .curve_types import CurvePoint, CurveInput, MonotoneCubicCurve, LinearCurve +from .range_types import RangeInput from .video_types import VideoInput __all__ = [ @@ -12,4 +13,5 @@ __all__ = [ "CurveInput", "MonotoneCubicCurve", "LinearCurve", + "RangeInput", ] diff --git a/comfy_api/latest/_input/range_types.py b/comfy_api/latest/_input/range_types.py new file mode 100644 index 000000000..f4c5cb290 --- /dev/null +++ b/comfy_api/latest/_input/range_types.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import logging +import math +import numpy as np + +logger = logging.getLogger(__name__) + + +class RangeInput: + """Represents a levels/range adjustment: input range [min, max] with + optional midpoint (gamma control). + + Generates a 1D LUT identical to GIMP's levels mapping: + 1. Normalize input to [0, 1] using [min, max] + 2. Apply gamma correction: pow(value, 1/gamma) + 3. Clamp to [0, 1] + + The midpoint field is a position in [0, 1] representing where the + midtone falls within [min, max]. It maps to gamma via: + gamma = -log2(midpoint) + So midpoint=0.5 → gamma=1.0 (linear). + """ + + def __init__(self, min_val: float, max_val: float, midpoint: float | None = None): + self.min_val = min_val + self.max_val = max_val + self.midpoint = midpoint + + @staticmethod + def from_raw(data) -> RangeInput: + if isinstance(data, RangeInput): + return data + if isinstance(data, dict): + return RangeInput( + min_val=float(data.get("min", 0.0)), + max_val=float(data.get("max", 1.0)), + midpoint=float(data["midpoint"]) if data.get("midpoint") is not None else None, + ) + raise TypeError(f"Cannot convert {type(data)} to RangeInput") + + def to_lut(self, size: int = 256) -> np.ndarray: + """Generate a float64 lookup table mapping [0, 1] input through this + levels adjustment. + + The LUT maps normalized input values (0..1) to output values (0..1), + matching the GIMP levels formula. + """ + xs = np.linspace(0.0, 1.0, size, dtype=np.float64) + + in_range = self.max_val - self.min_val + if abs(in_range) < 1e-10: + return np.where(xs >= self.min_val, 1.0, 0.0).astype(np.float64) + + # Normalize: map [min, max] → [0, 1] + result = (xs - self.min_val) / in_range + result = np.clip(result, 0.0, 1.0) + + # Gamma correction from midpoint + if self.midpoint is not None and self.midpoint > 0 and self.midpoint != 0.5: + gamma = max(-math.log2(self.midpoint), 0.001) + inv_gamma = 1.0 / gamma + mask = result > 0 + result[mask] = np.power(result[mask], inv_gamma) + + return result + + def __repr__(self) -> str: + mid = f", midpoint={self.midpoint}" if self.midpoint is not None else "" + return f"RangeInput(min={self.min_val}, max={self.max_val}{mid})" diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 1b4993aa7..942278d88 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -12,6 +12,7 @@ import numpy as np import math import torch from .._util import VideoContainer, VideoCodec, VideoComponents +import logging def container_to_output_format(container_format: str | None) -> str | None: @@ -238,64 +239,125 @@ class VideoFromFile(VideoInput): start_time = max(self._get_raw_duration() + self.__start_time, 0) else: start_time = self.__start_time + # Get video frames frames = [] + audio_frames = [] + alphas = None start_pts = int(start_time / video_stream.time_base) end_pts = int((start_time + self.__duration) / video_stream.time_base) - container.seek(start_pts, stream=video_stream) - for frame in container.decode(video_stream): - if frame.pts < start_pts: - continue - if self.__duration and frame.pts >= end_pts: - break - img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3) - img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3) - frames.append(img) - images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0) + if start_pts != 0: + container.seek(start_pts, stream=video_stream) + + image_format = 'gbrpf32le' + process_image_format = lambda a: a + audio = None + + streams = [video_stream] + has_first_audio_frame = False + checked_alpha = False + + # Default to False so we decode until EOF if duration is 0 + video_done = False + audio_done = True + + if len(container.streams.audio): + audio_stream = container.streams.audio[-1] + streams += [audio_stream] + resampler = av.audio.resampler.AudioResampler(format='fltp') + audio_done = False + + for packet in container.demux(*streams): + if video_done and audio_done: + break + + if packet.stream.type == "video": + if video_done: + continue + try: + for frame in packet.decode(): + if frame.pts < start_pts: + continue + if self.__duration and frame.pts >= end_pts: + video_done = True + break + + if not checked_alpha: + alpha_channel = False + for comp in frame.format.components: + if comp.is_alpha or frame.format.name == "pal8": + alphas = [] + alpha_channel = True + break + if frame.format.name in ("yuvj420p", "yuvj422p", "yuvj444p", "rgb24", "rgba", "pal8"): + process_image_format = lambda a: a.float() / 255.0 + if alpha_channel: + image_format = 'rgba' + else: + image_format = 'rgb24' + else: + process_image_format = lambda a: a + if alpha_channel: + image_format = 'gbrapf32le' + else: + image_format = 'gbrpf32le' + + checked_alpha = True + + img = frame.to_ndarray(format=image_format) # shape: (H, W, 4) + if frame.rotation != 0: + k = int(round(frame.rotation // 90)) + img = np.rot90(img, k=k, axes=(0, 1)).copy() + if alphas is None: + frames.append(torch.from_numpy(img)) + else: + frames.append(torch.from_numpy(img[..., :-1])) + alphas.append(torch.from_numpy(img[..., -1:])) + except av.error.InvalidDataError: + logging.info("pyav decode error") + + elif packet.stream.type == "audio": + if audio_done: + continue + + aframes = itertools.chain.from_iterable( + map(resampler.resample, packet.decode()) + ) + for frame in aframes: + if self.__duration and frame.time > start_time + self.__duration: + audio_done = True + break + + if not has_first_audio_frame: + offset_seconds = start_time - frame.pts * audio_stream.time_base + to_skip = max(0, int(offset_seconds * audio_stream.sample_rate)) + if to_skip < frame.samples: + has_first_audio_frame = True + audio_frames.append(frame.to_ndarray()[..., to_skip:]) + else: + audio_frames.append(frame.to_ndarray()) + + images = process_image_format(torch.stack(frames)) if len(frames) > 0 else torch.zeros(0, 0, 0, 3) + if alphas is not None: + alphas = process_image_format(torch.stack(alphas)) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1) # Get frame rate frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1) - # Get audio if available - audio = None - container.seek(start_pts, stream=video_stream) - # Use last stream for consistency - if len(container.streams.audio): - audio_stream = container.streams.audio[-1] - audio_frames = [] - resample = av.audio.resampler.AudioResampler(format='fltp').resample - frames = itertools.chain.from_iterable( - map(resample, container.decode(audio_stream)) - ) + if len(audio_frames) > 0: + audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) + if self.__duration: + audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] - has_first_frame = False - for frame in frames: - offset_seconds = start_time - frame.pts * audio_stream.time_base - to_skip = max(0, int(offset_seconds * audio_stream.sample_rate)) - if to_skip < frame.samples: - has_first_frame = True - break - if has_first_frame: - audio_frames.append(frame.to_ndarray()[..., to_skip:]) - - for frame in frames: - if self.__duration and frame.time > start_time + self.__duration: - break - audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) - if len(audio_frames) > 0: - audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) - if self.__duration: - audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] - - audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) - audio = AudioInput({ - "waveform": audio_tensor, - "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, - }) + audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) + audio = AudioInput({ + "waveform": audio_tensor, + "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, + }) metadata = container.metadata - return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata) + return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata) def get_components(self) -> VideoComponents: if isinstance(self.__file, io.BytesIO): diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index fdeffea2d..5ed968960 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -17,6 +17,7 @@ if TYPE_CHECKING: from spandrel import ImageModelDescriptor from comfy.clip_vision import ClipVisionModel from comfy.clip_vision import Output as ClipVisionOutput_ + from comfy.bg_removal_model import BackgroundRemovalModel from comfy.controlnet import ControlNet from comfy.hooks import HookGroup, HookKeyframeGroup from comfy.model_patcher import ModelPatcher @@ -395,7 +396,6 @@ class Combo(ComfyTypeIO): @comfytype(io_type="COMBO") class MultiCombo(ComfyTypeI): '''Multiselect Combo input (dropdown for selecting potentially more than one value).''' - # TODO: something is wrong with the serialization, frontend does not recognize it as multiselect Type = list[str] class Input(Combo.Input): def __init__(self, id: str, options: list[str], display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None, @@ -408,12 +408,14 @@ class MultiCombo(ComfyTypeI): self.default: list[str] def as_dict(self): - to_return = super().as_dict() | prune_dict({ - "multi_select": self.multiselect, - "placeholder": self.placeholder, - "chip": self.chip, + # Frontend expects `multi_select` to be an object config (not a boolean). + # Keep top-level `multiselect` from Combo.Input for backwards compatibility. + return super().as_dict() | prune_dict({ + "multi_select": prune_dict({ + "placeholder": self.placeholder, + "chip": self.chip, + }), }) - return to_return @comfytype(io_type="IMAGE") class Image(ComfyTypeIO): @@ -613,6 +615,11 @@ class Model(ComfyTypeIO): if TYPE_CHECKING: Type = ModelPatcher +@comfytype(io_type="BACKGROUND_REMOVAL") +class BackgroundRemoval(ComfyTypeIO): + if TYPE_CHECKING: + Type = BackgroundRemovalModel + @comfytype(io_type="CLIP_VISION") class ClipVision(ComfyTypeIO): if TYPE_CHECKING: @@ -1266,6 +1273,43 @@ class Histogram(ComfyTypeIO): Type = list[int] +@comfytype(io_type="RANGE") +class Range(ComfyTypeIO): + from comfy_api.input import RangeInput + if TYPE_CHECKING: + Type = RangeInput + + class Input(WidgetInput): + def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, + socketless: bool=True, default: dict=None, + display: str=None, + gradient_stops: list=None, + show_midpoint: bool=None, + midpoint_scale: str=None, + value_min: float=None, + value_max: float=None, + advanced: bool=None): + super().__init__(id, display_name, optional, tooltip, None, default, socketless, None, None, None, None, advanced) + if default is None: + self.default = {"min": 0.0, "max": 1.0} + self.display = display + self.gradient_stops = gradient_stops + self.show_midpoint = show_midpoint + self.midpoint_scale = midpoint_scale + self.value_min = value_min + self.value_max = value_max + + def as_dict(self): + return super().as_dict() | prune_dict({ + "display": self.display, + "gradient_stops": self.gradient_stops, + "show_midpoint": self.show_midpoint, + "midpoint_scale": self.midpoint_scale, + "value_min": self.value_min, + "value_max": self.value_max, + }) + + DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {} def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]): DYNAMIC_INPUT_LOOKUP[io_type] = func @@ -2219,6 +2263,7 @@ __all__ = [ "ModelPatch", "ClipVision", "ClipVisionOutput", + "BackgroundRemoval", "AudioEncoder", "AudioEncoderOutput", "StyleModel", @@ -2276,5 +2321,6 @@ __all__ = [ "BoundingBox", "Curve", "Histogram", + "Range", "NodeReplace", ] diff --git a/comfy_api/latest/_util/geometry_types.py b/comfy_api/latest/_util/geometry_types.py index b586fceb3..cdde60b10 100644 --- a/comfy_api/latest/_util/geometry_types.py +++ b/comfy_api/latest/_util/geometry_types.py @@ -12,9 +12,24 @@ class VOXEL: class MESH: - def __init__(self, vertices: torch.Tensor, faces: torch.Tensor): - self.vertices = vertices - self.faces = faces + def __init__(self, vertices: torch.Tensor, faces: torch.Tensor, + uvs: torch.Tensor | None = None, + vertex_colors: torch.Tensor | None = None, + texture: torch.Tensor | None = None, + vertex_counts: torch.Tensor | None = None, + face_counts: torch.Tensor | None = None): + + assert (vertex_counts is None) == (face_counts is None), \ + "vertex_counts and face_counts must be provided together (both or neither)" + self.vertices = vertices # vertices: (B, N, 3) + self.faces = faces # faces: (B, M, 3) + self.uvs = uvs # uvs: (B, N, 2) + self.vertex_colors = vertex_colors # vertex_colors: (B, N, 3 or 4) + self.texture = texture # texture: (B, H, W, 3) + # When vertices/faces are zero-padded to a common N/M across the batch (variable-size mesh batch), + # these hold the real per-item lengths (B,). None means rows are uniform and no slicing is needed. + self.vertex_counts = vertex_counts + self.face_counts = face_counts class File3D: diff --git a/comfy_api/latest/_util/video_types.py b/comfy_api/latest/_util/video_types.py index fd3b5a510..c92477f08 100644 --- a/comfy_api/latest/_util/video_types.py +++ b/comfy_api/latest/_util/video_types.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from enum import Enum from fractions import Fraction from typing import Optional -from .._input import ImageInput, AudioInput +from .._input import ImageInput, AudioInput, MaskInput class VideoCodec(str, Enum): AUTO = "auto" @@ -48,5 +48,4 @@ class VideoComponents: frame_rate: Fraction audio: Optional[AudioInput] = None metadata: Optional[dict] = None - - + alpha: Optional[MaskInput] = None diff --git a/comfy_api_nodes/apis/anthropic.py b/comfy_api_nodes/apis/anthropic.py new file mode 100644 index 000000000..6cac537ea --- /dev/null +++ b/comfy_api_nodes/apis/anthropic.py @@ -0,0 +1,75 @@ +from enum import Enum +from typing import Literal + +from pydantic import BaseModel, Field + + +class AnthropicRole(str, Enum): + user = "user" + assistant = "assistant" + + +class AnthropicTextContent(BaseModel): + type: Literal["text"] = "text" + text: str = Field(...) + + +class AnthropicImageSourceBase64(BaseModel): + type: Literal["base64"] = "base64" + media_type: str = Field(..., description="MIME type of the image, e.g. image/png, image/jpeg") + data: str = Field(..., description="Base64-encoded image data") + + +class AnthropicImageSourceUrl(BaseModel): + type: Literal["url"] = "url" + url: str = Field(...) + + +class AnthropicImageContent(BaseModel): + type: Literal["image"] = "image" + source: AnthropicImageSourceBase64 | AnthropicImageSourceUrl = Field(...) + + +class AnthropicMessage(BaseModel): + role: AnthropicRole = Field(...) + content: list[AnthropicTextContent | AnthropicImageContent] = Field(...) + + +class AnthropicMessagesRequest(BaseModel): + model: str = Field(...) + messages: list[AnthropicMessage] = Field(...) + max_tokens: int = Field(..., ge=1) + system: str | None = Field(None, description="Top-level system prompt") + temperature: float | None = Field(None, ge=0.0, le=1.0) + top_p: float | None = Field(None, ge=0.0, le=1.0) + top_k: int | None = Field(None, ge=0) + stop_sequences: list[str] | None = Field(None) + + +class AnthropicResponseTextBlock(BaseModel): + type: Literal["text"] = "text" + text: str = Field(...) + + +class AnthropicCacheCreationUsage(BaseModel): + ephemeral_5m_input_tokens: int | None = Field(None) + ephemeral_1h_input_tokens: int | None = Field(None) + + +class AnthropicMessagesUsage(BaseModel): + input_tokens: int | None = Field(None) + output_tokens: int | None = Field(None) + cache_creation_input_tokens: int | None = Field(None) + cache_read_input_tokens: int | None = Field(None) + cache_creation: AnthropicCacheCreationUsage | None = Field(None) + + +class AnthropicMessagesResponse(BaseModel): + id: str | None = Field(None) + type: str | None = Field(None) + role: str | None = Field(None) + model: str | None = Field(None) + content: list[AnthropicResponseTextBlock] | None = Field(None) + stop_reason: str | None = Field(None) + stop_sequence: str | None = Field(None) + usage: AnthropicMessagesUsage | None = Field(None) diff --git a/comfy_api_nodes/apis/bria.py b/comfy_api_nodes/apis/bria.py index 8c496b56c..e08a519a8 100644 --- a/comfy_api_nodes/apis/bria.py +++ b/comfy_api_nodes/apis/bria.py @@ -23,7 +23,7 @@ class BriaEditImageRequest(BaseModel): None, description="Mask image (black and white). Black areas will be preserved, white areas will be edited. " "If omitted, the edit applies to the entire image. " - "The input image and the the input mask must be of the same size.", + "The input image and the input mask must be of the same size.", ) negative_prompt: str | None = Field(None) guidance_scale: float = Field(...) diff --git a/comfy_api_nodes/apis/bytedance.py b/comfy_api_nodes/apis/bytedance.py index 18455396d..03f4c445b 100644 --- a/comfy_api_nodes/apis/bytedance.py +++ b/comfy_api_nodes/apis/bytedance.py @@ -52,6 +52,26 @@ class TaskImageContent(BaseModel): role: Literal["first_frame", "last_frame", "reference_image"] | None = Field(None) +class TaskVideoContentUrl(BaseModel): + url: str = Field(...) + + +class TaskVideoContent(BaseModel): + type: str = Field("video_url") + video_url: TaskVideoContentUrl = Field(...) + role: str = Field("reference_video") + + +class TaskAudioContentUrl(BaseModel): + url: str = Field(...) + + +class TaskAudioContent(BaseModel): + type: str = Field("audio_url") + audio_url: TaskAudioContentUrl = Field(...) + role: str = Field("reference_audio") + + class Text2VideoTaskCreationRequest(BaseModel): model: str = Field(...) content: list[TaskTextContent] = Field(..., min_length=1) @@ -64,6 +84,17 @@ class Image2VideoTaskCreationRequest(BaseModel): generate_audio: bool | None = Field(...) +class Seedance2TaskCreationRequest(BaseModel): + model: str = Field(...) + content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = Field(..., min_length=1) + generate_audio: bool | None = Field(None) + resolution: str | None = Field(None) + ratio: str | None = Field(None) + duration: int | None = Field(None, ge=4, le=15) + seed: int | None = Field(None, ge=0, le=2147483647) + watermark: bool | None = Field(None) + + class TaskCreationResponse(BaseModel): id: str = Field(...) @@ -77,12 +108,67 @@ class TaskStatusResult(BaseModel): video_url: str = Field(...) +class TaskStatusUsage(BaseModel): + completion_tokens: int = Field(0) + total_tokens: int = Field(0) + + class TaskStatusResponse(BaseModel): id: str = Field(...) model: str = Field(...) status: Literal["queued", "running", "cancelled", "succeeded", "failed"] = Field(...) error: TaskStatusError | None = Field(None) content: TaskStatusResult | None = Field(None) + usage: TaskStatusUsage | None = Field(None) + + +class GetAssetResponse(BaseModel): + id: str = Field(...) + name: str | None = Field(None) + url: str | None = Field(None) + asset_type: str = Field(...) + group_id: str = Field(...) + status: str = Field(...) + error: TaskStatusError | None = Field(None) + + +class SeedanceCreateVisualValidateSessionResponse(BaseModel): + session_id: str = Field(...) + h5_link: str = Field(...) + + +class SeedanceGetVisualValidateSessionResponse(BaseModel): + session_id: str = Field(...) + status: str = Field(...) + group_id: str | None = Field(None) + error_code: str | None = Field(None) + error_message: str | None = Field(None) + + +class SeedanceCreateAssetRequest(BaseModel): + group_id: str = Field(...) + url: str = Field(...) + asset_type: str = Field(...) + name: str | None = Field(None, max_length=64) + project_name: str | None = Field(None) + + +class SeedanceCreateAssetResponse(BaseModel): + asset_id: str = Field(...) + + +class SeedanceVirtualLibraryCreateAssetRequest(BaseModel): + url: str = Field(..., description="Publicly accessible URL of the image asset to upload.") + hash: str = Field(..., description="Dedup key. Re-submitting the same hash returns the existing asset id.") + + +# Dollars per 1K tokens, keyed by (model_id, has_video_input). +SEEDANCE2_PRICE_PER_1K_TOKENS = { + ("dreamina-seedance-2-0-260128", False): 0.007, + ("dreamina-seedance-2-0-260128", True): 0.0043, + ("dreamina-seedance-2-0-fast-260128", False): 0.0056, + ("dreamina-seedance-2-0-fast-260128", True): 0.0033, +} RECOMMENDED_PRESETS = [ @@ -112,6 +198,75 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [ ("Custom", None, None), ] +_PRESETS_SEEDREAM_1K = [ + ("(1K) 1024x1024 (1:1)", 1024, 1024), + ("(1K) 864x1152 (3:4)", 864, 1152), + ("(1K) 1152x864 (4:3)", 1152, 864), + ("(1K) 1312x736 (16:9)", 1312, 736), + ("(1K) 736x1312 (9:16)", 736, 1312), + ("(1K) 832x1248 (2:3)", 832, 1248), + ("(1K) 1248x832 (3:2)", 1248, 832), + ("(1K) 1568x672 (21:9)", 1568, 672), +] + +_PRESETS_SEEDREAM_2K = [ + ("(2K) 2048x2048 (1:1)", 2048, 2048), + ("(2K) 1728x2304 (3:4)", 1728, 2304), + ("(2K) 2304x1728 (4:3)", 2304, 1728), + ("(2K) 2848x1600 (16:9)", 2848, 1600), + ("(2K) 1600x2848 (9:16)", 1600, 2848), + ("(2K) 1664x2496 (2:3)", 1664, 2496), + ("(2K) 2496x1664 (3:2)", 2496, 1664), + ("(2K) 3136x1344 (21:9)", 3136, 1344), +] + +_PRESETS_SEEDREAM_3K = [ + ("(3K) 3072x3072 (1:1)", 3072, 3072), + ("(3K) 2592x3456 (3:4)", 2592, 3456), + ("(3K) 3456x2592 (4:3)", 3456, 2592), + ("(3K) 4096x2304 (16:9)", 4096, 2304), + ("(3K) 2304x4096 (9:16)", 2304, 4096), + ("(3K) 2496x3744 (2:3)", 2496, 3744), + ("(3K) 3744x2496 (3:2)", 3744, 2496), + ("(3K) 4704x2016 (21:9)", 4704, 2016), +] + +_PRESETS_SEEDREAM_4K = [ + ("(4K) 4096x4096 (1:1)", 4096, 4096), + ("(4K) 3520x4704 (3:4)", 3520, 4704), + ("(4K) 4704x3520 (4:3)", 4704, 3520), + ("(4K) 5504x3040 (16:9)", 5504, 3040), + ("(4K) 3040x5504 (9:16)", 3040, 5504), + ("(4K) 3328x4992 (2:3)", 3328, 4992), + ("(4K) 4992x3328 (3:2)", 4992, 3328), + ("(4K) 6240x2656 (21:9)", 6240, 2656), +] + +_CUSTOM_PRESET = [("Custom", None, None)] + +RECOMMENDED_PRESETS_SEEDREAM_5_LITE = ( + _PRESETS_SEEDREAM_2K + _PRESETS_SEEDREAM_3K + _PRESETS_SEEDREAM_4K + _CUSTOM_PRESET +) +RECOMMENDED_PRESETS_SEEDREAM_4_5 = ( + _PRESETS_SEEDREAM_2K + _PRESETS_SEEDREAM_4K + _CUSTOM_PRESET +) +RECOMMENDED_PRESETS_SEEDREAM_4_0 = ( + _PRESETS_SEEDREAM_1K + _PRESETS_SEEDREAM_2K + _PRESETS_SEEDREAM_4K + _CUSTOM_PRESET +) + +# Seedance 2.0 reference video pixel count limits per model and output resolution. +SEEDANCE2_REF_VIDEO_PIXEL_LIMITS = { + "dreamina-seedance-2-0-260128": { + "480p": {"min": 409_600, "max": 927_408}, + "720p": {"min": 409_600, "max": 927_408}, + "1080p": {"min": 409_600, "max": 2_073_600}, + }, + "dreamina-seedance-2-0-fast-260128": { + "480p": {"min": 409_600, "max": 927_408}, + "720p": {"min": 409_600, "max": 927_408}, + }, +} + # The time in this dictionary are given for 10 seconds duration. VIDEO_TASKS_EXECUTION_TIME = { "seedance-1-0-lite-t2v-250428": { diff --git a/comfy_api_nodes/apis/luma.py b/comfy_api_nodes/apis/luma.py index 632c4ab96..8c6db2022 100644 --- a/comfy_api_nodes/apis/luma.py +++ b/comfy_api_nodes/apis/luma.py @@ -1,15 +1,12 @@ from __future__ import annotations - -import torch - from enum import Enum from typing import Optional, Union +import torch from pydantic import BaseModel, Field, confloat - class LumaIO: LUMA_REF = "LUMA_REF" LUMA_CONCEPTS = "LUMA_CONCEPTS" @@ -183,13 +180,13 @@ class LumaAssets(BaseModel): class LumaImageRef(BaseModel): - '''Used for image gen''' + """Used for image gen""" url: str = Field(..., description='The URL of the image reference') weight: confloat(ge=0.0, le=1.0) = Field(..., description='The weight of the image reference') class LumaImageReference(BaseModel): - '''Used for video gen''' + """Used for video gen""" type: Optional[str] = Field('image', description='Input type, defaults to image') url: str = Field(..., description='The URL of the image') @@ -251,3 +248,32 @@ class LumaGeneration(BaseModel): assets: Optional[LumaAssets] = Field(None, description='The assets of the generation') model: str = Field(..., description='The model used for the generation') request: Union[LumaGenerationRequest, LumaImageGenerationRequest] = Field(..., description="The request used for the generation") + + +class Luma2ImageRef(BaseModel): + url: str | None = None + data: str | None = None + media_type: str | None = None + + +class Luma2GenerationRequest(BaseModel): + prompt: str = Field(..., min_length=1, max_length=6000) + model: str | None = None + type: str | None = None + aspect_ratio: str | None = None + style: str | None = None + output_format: str | None = None + web_search: bool | None = None + image_ref: list[Luma2ImageRef] | None = None + source: Luma2ImageRef | None = None + + +class Luma2Generation(BaseModel): + id: str | None = None + type: str | None = None + state: str | None = None + model: str | None = None + created_at: str | None = None + output: list[LumaImageReference] | None = None + failure_reason: str | None = None + failure_code: str | None = None diff --git a/comfy_api_nodes/apis/moonvalley.py b/comfy_api_nodes/apis/moonvalley.py deleted file mode 100644 index 7ec7a4ade..000000000 --- a/comfy_api_nodes/apis/moonvalley.py +++ /dev/null @@ -1,152 +0,0 @@ -from enum import Enum -from typing import Optional, Dict, Any - -from pydantic import BaseModel, Field, StrictBytes - - -class MoonvalleyPromptResponse(BaseModel): - error: Optional[Dict[str, Any]] = None - frame_conditioning: Optional[Dict[str, Any]] = None - id: Optional[str] = None - inference_params: Optional[Dict[str, Any]] = None - meta: Optional[Dict[str, Any]] = None - model_params: Optional[Dict[str, Any]] = None - output_url: Optional[str] = None - prompt_text: Optional[str] = None - status: Optional[str] = None - - -class MoonvalleyTextToVideoInferenceParams(BaseModel): - add_quality_guidance: Optional[bool] = Field( - True, description='Whether to add quality guidance' - ) - caching_coefficient: Optional[float] = Field( - 0.3, description='Caching coefficient for optimization' - ) - caching_cooldown: Optional[int] = Field( - 3, description='Number of caching cooldown steps' - ) - caching_warmup: Optional[int] = Field( - 3, description='Number of caching warmup steps' - ) - clip_value: Optional[float] = Field( - 3, description='CLIP value for generation control' - ) - conditioning_frame_index: Optional[int] = Field( - 0, description='Index of the conditioning frame' - ) - cooldown_steps: Optional[int] = Field( - 75, description='Number of cooldown steps (calculated based on num_frames)' - ) - fps: Optional[int] = Field( - 24, description='Frames per second of the generated video' - ) - guidance_scale: Optional[float] = Field( - 10, description='Guidance scale for generation control' - ) - height: Optional[int] = Field( - 1080, description='Height of the generated video in pixels' - ) - negative_prompt: Optional[str] = Field(None, description='Negative prompt text') - num_frames: Optional[int] = Field(64, description='Number of frames to generate') - seed: Optional[int] = Field( - None, description='Random seed for generation (default: random)' - ) - shift_value: Optional[float] = Field( - 3, description='Shift value for generation control' - ) - steps: Optional[int] = Field(80, description='Number of denoising steps') - use_guidance_schedule: Optional[bool] = Field( - True, description='Whether to use guidance scheduling' - ) - use_negative_prompts: Optional[bool] = Field( - False, description='Whether to use negative prompts' - ) - use_timestep_transform: Optional[bool] = Field( - True, description='Whether to use timestep transformation' - ) - warmup_steps: Optional[int] = Field( - 0, description='Number of warmup steps (calculated based on num_frames)' - ) - width: Optional[int] = Field( - 1920, description='Width of the generated video in pixels' - ) - - -class MoonvalleyTextToVideoRequest(BaseModel): - image_url: Optional[str] = None - inference_params: Optional[MoonvalleyTextToVideoInferenceParams] = None - prompt_text: Optional[str] = None - webhook_url: Optional[str] = None - - -class MoonvalleyUploadFileRequest(BaseModel): - file: Optional[StrictBytes] = None - - -class MoonvalleyUploadFileResponse(BaseModel): - access_url: Optional[str] = None - - -class MoonvalleyVideoToVideoInferenceParams(BaseModel): - add_quality_guidance: Optional[bool] = Field( - True, description='Whether to add quality guidance' - ) - caching_coefficient: Optional[float] = Field( - 0.3, description='Caching coefficient for optimization' - ) - caching_cooldown: Optional[int] = Field( - 3, description='Number of caching cooldown steps' - ) - caching_warmup: Optional[int] = Field( - 3, description='Number of caching warmup steps' - ) - clip_value: Optional[float] = Field( - 3, description='CLIP value for generation control' - ) - conditioning_frame_index: Optional[int] = Field( - 0, description='Index of the conditioning frame' - ) - cooldown_steps: Optional[int] = Field( - 36, description='Number of cooldown steps (calculated based on num_frames)' - ) - guidance_scale: Optional[float] = Field( - 15, description='Guidance scale for generation control' - ) - negative_prompt: Optional[str] = Field(None, description='Negative prompt text') - seed: Optional[int] = Field( - None, description='Random seed for generation (default: random)' - ) - shift_value: Optional[float] = Field( - 3, description='Shift value for generation control' - ) - steps: Optional[int] = Field(80, description='Number of denoising steps') - use_guidance_schedule: Optional[bool] = Field( - True, description='Whether to use guidance scheduling' - ) - use_negative_prompts: Optional[bool] = Field( - False, description='Whether to use negative prompts' - ) - use_timestep_transform: Optional[bool] = Field( - True, description='Whether to use timestep transformation' - ) - warmup_steps: Optional[int] = Field( - 24, description='Number of warmup steps (calculated based on num_frames)' - ) - - -class ControlType(str, Enum): - motion_control = 'motion_control' - pose_control = 'pose_control' - - -class MoonvalleyVideoToVideoRequest(BaseModel): - control_type: ControlType = Field( - ..., description='Supported types for video control' - ) - inference_params: Optional[MoonvalleyVideoToVideoInferenceParams] = None - prompt_text: str = Field(..., description='Describes the video to generate') - video_url: str = Field(..., description='Url to control video') - webhook_url: Optional[str] = Field( - None, description='Optional webhook URL for notifications' - ) diff --git a/comfy_api_nodes/apis/openai.py b/comfy_api_nodes/apis/openai.py index b85ef252b..bee75d639 100644 --- a/comfy_api_nodes/apis/openai.py +++ b/comfy_api_nodes/apis/openai.py @@ -56,14 +56,14 @@ class ModelResponseProperties(BaseModel): instructions: str | None = Field(None) max_output_tokens: int | None = Field(None) model: str | None = Field(None) - temperature: float | None = Field(1, description="Controls randomness in the response", ge=0.0, le=2.0) + temperature: float | None = Field(None, description="Controls randomness in the response", ge=0.0, le=2.0) top_p: float | None = Field( - 1, + None, description="Controls diversity of the response via nucleus sampling", ge=0.0, le=1.0, ) - truncation: str | None = Field("disabled", description="Allowed values: 'auto' or 'disabled'") + truncation: str | None = Field(None, description="Allowed values: 'auto' or 'disabled'") class ResponseProperties(BaseModel): diff --git a/comfy_api_nodes/apis/topaz.py b/comfy_api_nodes/apis/topaz.py index a9e6235a7..f91980e3d 100644 --- a/comfy_api_nodes/apis/topaz.py +++ b/comfy_api_nodes/apis/topaz.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional from pydantic import BaseModel, Field @@ -72,8 +72,11 @@ class VideoEnhancementFilter(BaseModel): grain: Optional[float] = Field(None, description="Grain after AI model processing") grainSize: Optional[float] = Field(None, description="Size of generated grain") recoverOriginalDetailValue: Optional[float] = Field(None, description="Source details into the output video") - creativity: Optional[str] = Field(None, description="Creativity level(high, low) for slc-1 only") + creativity: float | str | None = Field(None, description="slc-1/slp-2.5: enum (low/middle/high). ast-2: decimal 0.0-1.0.") isOptimizedMode: Optional[bool] = Field(None, description="Set to true for Starlight Creative (slc-1) only") + prompt: str | None = Field(None, description="Descriptive scene prompt (ast-2 only)") + sharp: float | None = Field(None, description="ast-2 pre-enhance sharpness") + realism: float | None = Field(None, description="ast-2 realism control") class OutputInformationVideo(BaseModel): @@ -90,7 +93,7 @@ class Overrides(BaseModel): class CreateVideoRequest(BaseModel): source: CreateVideoRequestSource = Field(...) - filters: list[Union[VideoFrameInterpolationFilter, VideoEnhancementFilter]] = Field(...) + filters: list[VideoFrameInterpolationFilter | VideoEnhancementFilter] = Field(...) output: OutputInformationVideo = Field(...) overrides: Overrides = Field(Overrides(isPaidDiffusion=True)) diff --git a/comfy_api_nodes/apis/tripo.py b/comfy_api_nodes/apis/tripo.py index ffaaa7dc1..bce6b0e89 100644 --- a/comfy_api_nodes/apis/tripo.py +++ b/comfy_api_nodes/apis/tripo.py @@ -1,10 +1,11 @@ -from __future__ import annotations from enum import Enum -from typing import Optional, List, Dict, Any, Union +from typing import Optional, Any from pydantic import BaseModel, Field, RootModel + class TripoModelVersion(str, Enum): + v3_1_20260211 = 'v3.1-20260211' v3_0_20250812 = 'v3.0-20250812' v2_5_20250123 = 'v2.5-20250123' v2_0_20240919 = 'v2.0-20240919' @@ -142,7 +143,7 @@ class TripoFileEmptyReference(BaseModel): pass class TripoFileReference(RootModel): - root: Union[TripoFileTokenReference, TripoUrlReference, TripoObjectReference, TripoFileEmptyReference] + root: TripoFileTokenReference | TripoUrlReference | TripoObjectReference | TripoFileEmptyReference class TripoGetStsTokenRequest(BaseModel): format: str = Field(..., description='The format of the image') @@ -183,7 +184,7 @@ class TripoImageToModelRequest(BaseModel): class TripoMultiviewToModelRequest(BaseModel): type: TripoTaskType = TripoTaskType.MULTIVIEW_TO_MODEL - files: List[TripoFileReference] = Field(..., description='The file references to convert to a model') + files: list[TripoFileReference] = Field(..., description='The file references to convert to a model') model_version: Optional[TripoModelVersion] = Field(None, description='The model version to use for generation') orthographic_projection: Optional[bool] = Field(False, description='Whether to use orthographic projection') face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') @@ -251,27 +252,13 @@ class TripoConvertModelRequest(BaseModel): with_animation: Optional[bool] = Field(None, description='Whether to include animations') pack_uv: Optional[bool] = Field(None, description='Whether to pack the UVs') bake: Optional[bool] = Field(None, description='Whether to bake the model') - part_names: Optional[List[str]] = Field(None, description='The names of the parts to include') + part_names: Optional[list[str]] = Field(None, description='The names of the parts to include') fbx_preset: Optional[TripoFbxPreset] = Field(None, description='The preset for the FBX export') export_vertex_colors: Optional[bool] = Field(None, description='Whether to export the vertex colors') export_orientation: Optional[TripoOrientation] = Field(None, description='The orientation for the export') animate_in_place: Optional[bool] = Field(None, description='Whether to animate in place') -class TripoTaskRequest(RootModel): - root: Union[ - TripoTextToModelRequest, - TripoImageToModelRequest, - TripoMultiviewToModelRequest, - TripoTextureModelRequest, - TripoRefineModelRequest, - TripoAnimatePrerigcheckRequest, - TripoAnimateRigRequest, - TripoAnimateRetargetRequest, - TripoStylizeModelRequest, - TripoConvertModelRequest - ] - class TripoTaskOutput(BaseModel): model: Optional[str] = Field(None, description='URL to the model') base_model: Optional[str] = Field(None, description='URL to the base model') @@ -283,12 +270,13 @@ class TripoTask(BaseModel): task_id: str = Field(..., description='The task ID') type: Optional[str] = Field(None, description='The type of task') status: Optional[TripoTaskStatus] = Field(None, description='The status of the task') - input: Optional[Dict[str, Any]] = Field(None, description='The input parameters for the task') + input: Optional[dict[str, Any]] = Field(None, description='The input parameters for the task') output: Optional[TripoTaskOutput] = Field(None, description='The output of the task') progress: Optional[int] = Field(None, description='The progress of the task', ge=0, le=100) create_time: Optional[int] = Field(None, description='The creation time of the task') running_left_time: Optional[int] = Field(None, description='The estimated time left for the task') queue_position: Optional[int] = Field(None, description='The position in the queue') + consumed_credit: int | None = Field(None) class TripoTaskResponse(BaseModel): code: int = Field(0, description='The response code') @@ -296,7 +284,7 @@ class TripoTaskResponse(BaseModel): class TripoGeneralResponse(BaseModel): code: int = Field(0, description='The response code') - data: Dict[str, str] = Field(..., description='The task ID data') + data: dict[str, str] = Field(..., description='The task ID data') class TripoBalanceData(BaseModel): balance: float = Field(..., description='The account balance') diff --git a/comfy_api_nodes/apis/wan.py b/comfy_api_nodes/apis/wan.py new file mode 100644 index 000000000..c64acae97 --- /dev/null +++ b/comfy_api_nodes/apis/wan.py @@ -0,0 +1,226 @@ +from pydantic import BaseModel, Field + + +class Text2ImageInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + + +class Image2ImageInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + images: list[str] = Field(..., min_length=1, max_length=2) + + +class Text2VideoInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + audio_url: str | None = Field(None) + + +class Image2VideoInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + img_url: str = Field(...) + audio_url: str | None = Field(None) + + +class Reference2VideoInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + reference_video_urls: list[str] = Field(...) + + +class Txt2ImageParametersField(BaseModel): + size: str = Field(...) + n: int = Field(1, description="Number of images to generate.") # we support only value=1 + seed: int = Field(..., ge=0, le=2147483647) + prompt_extend: bool = Field(True) + watermark: bool = Field(False) + + +class Image2ImageParametersField(BaseModel): + size: str | None = Field(None) + n: int = Field(1, description="Number of images to generate.") # we support only value=1 + seed: int = Field(..., ge=0, le=2147483647) + watermark: bool = Field(False) + + +class Text2VideoParametersField(BaseModel): + size: str = Field(...) + seed: int = Field(..., ge=0, le=2147483647) + duration: int = Field(5, ge=5, le=15) + prompt_extend: bool = Field(True) + watermark: bool = Field(False) + audio: bool = Field(False, description="Whether to generate audio automatically.") + shot_type: str = Field("single") + + +class Image2VideoParametersField(BaseModel): + resolution: str = Field(...) + seed: int = Field(..., ge=0, le=2147483647) + duration: int = Field(5, ge=5, le=15) + prompt_extend: bool = Field(True) + watermark: bool = Field(False) + audio: bool = Field(False, description="Whether to generate audio automatically.") + shot_type: str = Field("single") + + +class Reference2VideoParametersField(BaseModel): + size: str = Field(...) + duration: int = Field(5, ge=5, le=15) + shot_type: str = Field("single") + seed: int = Field(..., ge=0, le=2147483647) + watermark: bool = Field(False) + + +class Text2ImageTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Text2ImageInputField = Field(...) + parameters: Txt2ImageParametersField = Field(...) + + +class Image2ImageTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Image2ImageInputField = Field(...) + parameters: Image2ImageParametersField = Field(...) + + +class Text2VideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Text2VideoInputField = Field(...) + parameters: Text2VideoParametersField = Field(...) + + +class Image2VideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Image2VideoInputField = Field(...) + parameters: Image2VideoParametersField = Field(...) + + +class Reference2VideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Reference2VideoInputField = Field(...) + parameters: Reference2VideoParametersField = Field(...) + + +class Wan27MediaItem(BaseModel): + type: str = Field(...) + url: str = Field(...) + + +class Wan27ReferenceVideoInputField(BaseModel): + prompt: str = Field(...) + negative_prompt: str | None = Field(None) + media: list[Wan27MediaItem] = Field(...) + + +class Wan27ReferenceVideoParametersField(BaseModel): + resolution: str = Field(...) + ratio: str | None = Field(None) + duration: int = Field(5, ge=2, le=15) + watermark: bool = Field(False) + seed: int = Field(..., ge=0, le=2147483647) + + +class Wan27ReferenceVideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Wan27ReferenceVideoInputField = Field(...) + parameters: Wan27ReferenceVideoParametersField = Field(...) + + +class Wan27ImageToVideoInputField(BaseModel): + prompt: str | None = Field(None) + negative_prompt: str | None = Field(None) + media: list[Wan27MediaItem] = Field(...) + + +class Wan27ImageToVideoParametersField(BaseModel): + resolution: str = Field(...) + duration: int = Field(5, ge=2, le=15) + prompt_extend: bool = Field(True) + watermark: bool = Field(False) + seed: int = Field(..., ge=0, le=2147483647) + + +class Wan27ImageToVideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Wan27ImageToVideoInputField = Field(...) + parameters: Wan27ImageToVideoParametersField = Field(...) + + +class Wan27VideoEditInputField(BaseModel): + prompt: str = Field(...) + media: list[Wan27MediaItem] = Field(...) + + +class Wan27VideoEditParametersField(BaseModel): + resolution: str = Field(...) + ratio: str | None = Field(None) + duration: int | None = Field(0) + audio_setting: str = Field("auto") + watermark: bool = Field(False) + seed: int = Field(..., ge=0, le=2147483647) + + +class Wan27VideoEditTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Wan27VideoEditInputField = Field(...) + parameters: Wan27VideoEditParametersField = Field(...) + + +class Wan27Text2VideoParametersField(BaseModel): + resolution: str = Field(...) + ratio: str | None = Field(None) + duration: int = Field(5, ge=2, le=15) + prompt_extend: bool = Field(True) + watermark: bool = Field(False) + seed: int = Field(..., ge=0, le=2147483647) + + +class Wan27Text2VideoTaskCreationRequest(BaseModel): + model: str = Field(...) + input: Text2VideoInputField = Field(...) + parameters: Wan27Text2VideoParametersField = Field(...) + + +class TaskCreationOutputField(BaseModel): + task_id: str = Field(...) + task_status: str = Field(...) + + +class TaskCreationResponse(BaseModel): + output: TaskCreationOutputField | None = Field(None) + request_id: str = Field(...) + code: str | None = Field(None, description="Error code for the failed request.") + message: str | None = Field(None, description="Details about the failed request.") + + +class TaskResult(BaseModel): + url: str | None = Field(None) + code: str | None = Field(None) + message: str | None = Field(None) + + +class ImageTaskStatusOutputField(TaskCreationOutputField): + task_id: str = Field(...) + task_status: str = Field(...) + results: list[TaskResult] | None = Field(None) + + +class VideoTaskStatusOutputField(TaskCreationOutputField): + task_id: str = Field(...) + task_status: str = Field(...) + video_url: str | None = Field(None) + code: str | None = Field(None) + message: str | None = Field(None) + + +class ImageTaskStatusResponse(BaseModel): + output: ImageTaskStatusOutputField | None = Field(None) + request_id: str = Field(...) + + +class VideoTaskStatusResponse(BaseModel): + output: VideoTaskStatusOutputField | None = Field(None) + request_id: str = Field(...) diff --git a/comfy_api_nodes/nodes_anthropic.py b/comfy_api_nodes/nodes_anthropic.py new file mode 100644 index 000000000..28dd70d4e --- /dev/null +++ b/comfy_api_nodes/nodes_anthropic.py @@ -0,0 +1,245 @@ +"""API Nodes for Anthropic Claude (Messages API). See: https://docs.anthropic.com/en/api/messages""" + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.anthropic import ( + AnthropicImageContent, + AnthropicImageSourceUrl, + AnthropicMessage, + AnthropicMessagesRequest, + AnthropicMessagesResponse, + AnthropicRole, + AnthropicTextContent, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + get_number_of_images, + sync_op, + upload_images_to_comfyapi, + validate_string, +) + +ANTHROPIC_MESSAGES_ENDPOINT = "/proxy/anthropic/v1/messages" +ANTHROPIC_IMAGE_MAX_PIXELS = 1568 * 1568 +CLAUDE_MAX_IMAGES = 20 + +CLAUDE_MODELS: dict[str, str] = { + "Opus 4.7": "claude-opus-4-7", + "Opus 4.6": "claude-opus-4-6", + "Sonnet 4.6": "claude-sonnet-4-6", + "Sonnet 4.5": "claude-sonnet-4-5-20250929", + "Haiku 4.5": "claude-haiku-4-5-20251001", +} + + +def _claude_model_inputs(): + return [ + IO.Int.Input( + "max_tokens", + default=16000, + min=32, + max=32000, + tooltip="Maximum number of tokens to generate before stopping.", + advanced=True, + ), + IO.Float.Input( + "temperature", + default=1.0, + min=0.0, + max=1.0, + step=0.01, + tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random. Ignored for Opus 4.7.", + advanced=True, + ), + ] + + +def _model_price_per_million(model: str) -> tuple[float, float] | None: + """Return (input_per_1M, output_per_1M) USD for a Claude model, or None if unknown.""" + if "opus-4-7" in model or "opus-4-6" in model or "opus-4-5" in model: + return 5.0, 25.0 + if "sonnet-4" in model: + return 3.0, 15.0 + if "haiku-4-5" in model: + return 1.0, 5.0 + return None + + +def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None: + """Compute approximate USD price from response usage. Server-side billing is authoritative.""" + if not response.usage or not response.model: + return None + rates = _model_price_per_million(response.model) + if rates is None: + return None + input_rate, output_rate = rates + input_tokens = response.usage.input_tokens or 0 + output_tokens = response.usage.output_tokens or 0 + cache_read = response.usage.cache_read_input_tokens or 0 + cache_5m = 0 + cache_1h = 0 + if response.usage.cache_creation: + cache_5m = response.usage.cache_creation.ephemeral_5m_input_tokens or 0 + cache_1h = response.usage.cache_creation.ephemeral_1h_input_tokens or 0 + total = ( + input_tokens * input_rate + + output_tokens * output_rate + + cache_read * input_rate * 0.1 + + cache_5m * input_rate * 1.25 + + cache_1h * input_rate * 2.0 + ) + return total / 1_000_000.0 + + +def _get_text_from_response(response: AnthropicMessagesResponse) -> str: + if not response.content: + return "" + return "\n".join(block.text for block in response.content if block.text) + + +async def _build_image_content_blocks( + cls: type[IO.ComfyNode], + image_tensors: list[Input.Image], +) -> list[AnthropicImageContent]: + urls = await upload_images_to_comfyapi( + cls, + image_tensors, + max_images=CLAUDE_MAX_IMAGES, + total_pixels=ANTHROPIC_IMAGE_MAX_PIXELS, + wait_label="Uploading reference images", + ) + return [AnthropicImageContent(source=AnthropicImageSourceUrl(url=url)) for url in urls] + + +class ClaudeNode(IO.ComfyNode): + """Generate text responses from an Anthropic Claude model.""" + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ClaudeNode", + display_name="Anthropic Claude", + category="api node/text/Anthropic", + essentials_category="Text Generation", + description="Generate text responses with Anthropic's Claude models. " + "Provide a text prompt and optionally one or more images for multimodal context.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text input to the model.", + ), + IO.DynamicCombo.Input( + "model", + options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS], + tooltip="The Claude model used to generate the response.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, CLAUDE_MAX_IMAGES + 1)], + min=0, + ), + tooltip=f"Optional image(s) to use as context for the model. Up to {CLAUDE_MAX_IMAGES} images.", + ), + IO.String.Input( + "system_prompt", + multiline=True, + default="", + optional=True, + advanced=True, + tooltip="Foundational instructions that dictate the model's behavior.", + ), + ], + outputs=[IO.String.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m, "opus") ? { + "type": "list_usd", + "usd": [0.005, 0.025], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "sonnet") ? { + "type": "list_usd", + "usd": [0.003, 0.015], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "haiku") ? { + "type": "list_usd", + "usd": [0.001, 0.005], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : {"type":"text", "text":"Token-based"} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + images: dict | None = None, + system_prompt: str = "", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + model_label = model["model"] + max_tokens = model["max_tokens"] + temperature = None if model_label == "Opus 4.7" else model["temperature"] + + image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None] + if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES: + raise ValueError(f"Up to {CLAUDE_MAX_IMAGES} images are supported per request.") + + content: list[AnthropicTextContent | AnthropicImageContent] = [] + if image_tensors: + content.extend(await _build_image_content_blocks(cls, image_tensors)) + content.append(AnthropicTextContent(text=prompt)) + + response = await sync_op( + cls, + ApiEndpoint(path=ANTHROPIC_MESSAGES_ENDPOINT, method="POST"), + response_model=AnthropicMessagesResponse, + data=AnthropicMessagesRequest( + model=CLAUDE_MODELS[model_label], + max_tokens=max_tokens, + messages=[AnthropicMessage(role=AnthropicRole.user, content=content)], + system=system_prompt or None, + temperature=temperature, + ), + price_extractor=calculate_tokens_price, + ) + return IO.NodeOutput(_get_text_from_response(response) or "Empty response from Claude model.") + + +class AnthropicExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ClaudeNode] + + +async def comfy_entrypoint() -> AnthropicExtension: + return AnthropicExtension() diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py index 23590bf24..3f0ce29d8 100644 --- a/comfy_api_nodes/nodes_bfl.py +++ b/comfy_api_nodes/nodes_bfl.py @@ -596,6 +596,7 @@ class Flux2ProImageNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["width", "height"], inputs=["images"]), expr=cls.PRICE_BADGE_EXPR, ), + is_deprecated=True, ) @classmethod @@ -674,6 +675,175 @@ class Flux2MaxImageNode(Flux2ProImageNode): """ +_FLUX2_MODEL_ENDPOINTS = { + "Flux.2 [pro]": "/proxy/bfl/flux-2-pro/generate", + "Flux.2 [max]": "/proxy/bfl/flux-2-max/generate", +} + + +def _flux2_model_inputs(): + return [ + IO.Int.Input( + "width", + default=1024, + min=256, + max=2048, + step=32, + ), + IO.Int.Input( + "height", + default=768, + min=256, + max=2048, + step=32, + ), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 9)], + min=0, + ), + tooltip="Optional reference image(s) for image-to-image generation. Up to 8 images.", + ), + ] + + +class Flux2ImageNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="Flux2ImageNode", + display_name="Flux.2 Image", + category="api node/image/BFL", + description="Generate images via Flux.2 [pro] or Flux.2 [max] from a prompt and optional reference images.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt for the image generation or edit", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Flux.2 [pro]", _flux2_model_inputs()), + IO.DynamicCombo.Option("Flux.2 [max]", _flux2_model_inputs()), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="The random seed used for creating the noise.", + ), + ], + outputs=[IO.Image.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model", "model.width", "model.height"], + input_groups=["model.images"], + ), + expr=""" + ( + $isMax := widgets.model = "flux.2 [max]"; + $MP := 1024 * 1024; + $w := $lookup(widgets, "model.width"); + $h := $lookup(widgets, "model.height"); + $outMP := $max([1, $floor((($w * $h) + $MP - 1) / $MP)]); + $outputCost := $isMax + ? (0.07 + 0.03 * ($outMP - 1)) + : (0.03 + 0.015 * ($outMP - 1)); + $refMin := $isMax ? 0.03 : 0.015; + $refMax := $isMax ? 0.24 : 0.12; + $hasRefs := $lookup(inputGroups, "model.images") > 0; + $hasRefs + ? { + "type": "range_usd", + "min_usd": $outputCost + $refMin, + "max_usd": $outputCost + $refMax, + "format": { "approximate": true } + } + : {"type": "usd", "usd": $outputCost} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + ) -> IO.NodeOutput: + model_choice = model["model"] + endpoint = _FLUX2_MODEL_ENDPOINTS[model_choice] + width = model["width"] + height = model["height"] + images_dict = model.get("images") or {} + + image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None] + n_images = sum(get_number_of_images(t) for t in image_tensors) + if n_images > 8: + raise ValueError("The current maximum number of supported images is 8.") + + flat_tensors: list[torch.Tensor] = [] + for tensor in image_tensors: + if len(tensor.shape) == 4: + flat_tensors.extend(tensor[i] for i in range(tensor.shape[0])) + else: + flat_tensors.append(tensor) + + reference_images: dict[str, str] = {} + for idx, tensor in enumerate(flat_tensors): + key_name = f"input_image_{idx + 1}" if idx else "input_image" + reference_images[key_name] = tensor_to_base64_string(tensor, total_pixels=2048 * 2048) + + initial_response = await sync_op( + cls, + ApiEndpoint(path=endpoint, method="POST"), + response_model=BFLFluxProGenerateResponse, + data=Flux2ProGenerateRequest( + prompt=prompt, + width=width, + height=height, + seed=seed, + **reference_images, + ), + ) + + def price_extractor(_r: BaseModel) -> float | None: + return None if initial_response.cost is None else initial_response.cost / 100 + + response = await poll_op( + cls, + ApiEndpoint(initial_response.polling_url), + response_model=BFLFluxStatusResponse, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + price_extractor=price_extractor, + completed_statuses=[BFLStatus.ready], + failed_statuses=[ + BFLStatus.request_moderated, + BFLStatus.content_moderated, + BFLStatus.error, + BFLStatus.task_not_found, + ], + queued_statuses=[], + ) + return IO.NodeOutput(await download_url_to_image_tensor(response.result["sample"])) + + class BFLExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -685,6 +855,7 @@ class BFLExtension(ComfyExtension): FluxProFillNode, Flux2ProImageNode, Flux2MaxImageNode, + Flux2ImageNode, ] diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index de0c22e70..d6b479336 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -1,5 +1,7 @@ +import hashlib import logging import math +import re import torch from typing_extensions import override @@ -8,16 +10,32 @@ from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.bytedance import ( RECOMMENDED_PRESETS, RECOMMENDED_PRESETS_SEEDREAM_4, + RECOMMENDED_PRESETS_SEEDREAM_4_0, + RECOMMENDED_PRESETS_SEEDREAM_4_5, + RECOMMENDED_PRESETS_SEEDREAM_5_LITE, + SEEDANCE2_PRICE_PER_1K_TOKENS, + SEEDANCE2_REF_VIDEO_PIXEL_LIMITS, VIDEO_TASKS_EXECUTION_TIME, + GetAssetResponse, Image2VideoTaskCreationRequest, ImageTaskCreationResponse, + Seedance2TaskCreationRequest, + SeedanceCreateAssetRequest, + SeedanceCreateAssetResponse, + SeedanceCreateVisualValidateSessionResponse, + SeedanceGetVisualValidateSessionResponse, + SeedanceVirtualLibraryCreateAssetRequest, Seedream4Options, Seedream4TaskCreationRequest, + TaskAudioContent, + TaskAudioContentUrl, TaskCreationResponse, TaskImageContent, TaskImageContentUrl, TaskStatusResponse, TaskTextContent, + TaskVideoContent, + TaskVideoContentUrl, Text2ImageTaskCreationRequest, Text2VideoTaskCreationRequest, ) @@ -28,30 +46,280 @@ from comfy_api_nodes.util import ( get_number_of_images, image_tensor_pair_to_batch, poll_op, + resize_video_to_pixel_budget, sync_op, + upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_images_to_comfyapi, + upload_video_to_comfyapi, validate_image_aspect_ratio, validate_image_dimensions, validate_string, + validate_video_dimensions, + validate_video_duration, ) +from server import PromptServer BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations" +_VERIFICATION_POLL_TIMEOUT_SEC = 120 +_VERIFICATION_POLL_INTERVAL_SEC = 3 + SEEDREAM_MODELS = { "seedream 5.0 lite": "seedream-5-0-260128", "seedream-4-5-251128": "seedream-4-5-251128", "seedream-4-0-250828": "seedream-4-0-250828", } +SEEDREAM_PRESETS = { + "seedream-5-0-260128": RECOMMENDED_PRESETS_SEEDREAM_5_LITE, + "seedream-4-5-251128": RECOMMENDED_PRESETS_SEEDREAM_4_5, + "seedream-4-0-250828": RECOMMENDED_PRESETS_SEEDREAM_4_0, +} + # Long-running tasks endpoints(e.g., video) BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id} +BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT = "/proxy/byteplus-seedance2/api/v3/contents/generations/tasks" # + /{task_id} + +SEEDANCE_MODELS = { + "Seedance 2.0": "dreamina-seedance-2-0-260128", + "Seedance 2.0 Fast": "dreamina-seedance-2-0-fast-260128", +} DEPRECATED_MODELS = {"seedance-1-0-lite-t2v-250428", "seedance-1-0-lite-i2v-250428"} + logger = logging.getLogger(__name__) +def _validate_ref_video_pixels(video: Input.Video, model_id: str, resolution: str, index: int) -> None: + """Validate reference video pixel count against Seedance 2.0 model limits for the selected resolution.""" + model_limits = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id) + if not model_limits: + return + limits = model_limits.get(resolution) + if not limits: + return + try: + w, h = video.get_dimensions() + except Exception: + return + pixels = w * h + min_px = limits.get("min") + max_px = limits.get("max") + if min_px and pixels < min_px: + raise ValueError( + f"Reference video {index} is too small: {w}x{h} = {pixels:,}px. " f"Minimum is {min_px:,}px for this model." + ) + if max_px and pixels > max_px: + raise ValueError( + f"Reference video {index} is too large: {w}x{h} = {pixels:,}px. " + f"Maximum is {max_px:,}px for this model. Try downscaling the video." + ) + + +async def _resolve_reference_assets( + cls: type[IO.ComfyNode], + asset_ids: list[str], +) -> tuple[dict[str, str], dict[str, str], dict[str, str]]: + """Look up each asset, validate Active status, group by asset_type. + + Returns (image_assets, video_assets, audio_assets), each mapping asset_id -> "asset://". + """ + image_assets: dict[str, str] = {} + video_assets: dict[str, str] = {} + audio_assets: dict[str, str] = {} + for i, raw_id in enumerate(asset_ids, 1): + asset_id = (raw_id or "").strip() + if not asset_id: + continue + result = await sync_op( + cls, + ApiEndpoint(path=f"/proxy/seedance/assets/{asset_id}"), + response_model=GetAssetResponse, + ) + if result.status != "Active": + extra = f" {result.error.code}: {result.error.message}" if result.error else "" + raise ValueError(f"Reference asset {i} (Id={asset_id}) is not Active (Status={result.status}).{extra}") + asset_uri = f"asset://{asset_id}" + if result.asset_type == "Image": + image_assets[asset_id] = asset_uri + elif result.asset_type == "Video": + video_assets[asset_id] = asset_uri + elif result.asset_type == "Audio": + audio_assets[asset_id] = asset_uri + return image_assets, video_assets, audio_assets + + +_ASSET_REF_RE = re.compile(r"\basset ?(\d{1,2})\b", re.IGNORECASE) + + +def _build_asset_labels( + reference_assets: dict[str, str], + image_asset_uris: dict[str, str], + video_asset_uris: dict[str, str], + audio_asset_uris: dict[str, str], + n_reference_images: int, + n_reference_videos: int, + n_reference_audios: int, +) -> dict[int, str]: + """Map asset slot number (from 'asset_N' keys) to its positional label. + + Asset entries are appended to `content` after the reference_images/videos/audios, + so their 1-indexed labels continue from the count of existing same-type refs: + one reference_images entry + one Image-type asset -> asset labelled "Image 2". + """ + image_n = n_reference_images + video_n = n_reference_videos + audio_n = n_reference_audios + labels: dict[int, str] = {} + for slot_key, raw_id in reference_assets.items(): + asset_id = (raw_id or "").strip() + if not asset_id: + continue + try: + slot_num = int(slot_key.rsplit("_", 1)[-1]) + except ValueError: + continue + if asset_id in image_asset_uris: + image_n += 1 + labels[slot_num] = f"Image {image_n}" + elif asset_id in video_asset_uris: + video_n += 1 + labels[slot_num] = f"Video {video_n}" + elif asset_id in audio_asset_uris: + audio_n += 1 + labels[slot_num] = f"Audio {audio_n}" + return labels + + +def _rewrite_asset_refs(prompt: str, labels: dict[int, str]) -> str: + """Case-insensitively replace 'assetNN' (1-2 digit) tokens with their labels.""" + if not labels: + return prompt + + def _sub(m: "re.Match[str]") -> str: + return labels.get(int(m.group(1)), m.group(0)) + + return _ASSET_REF_RE.sub(_sub, prompt) + + +async def _obtain_group_id_via_h5_auth(cls: type[IO.ComfyNode]) -> str: + session = await sync_op( + cls, + ApiEndpoint(path="/proxy/seedance/visual-validate/sessions", method="POST"), + response_model=SeedanceCreateVisualValidateSessionResponse, + ) + logger.warning("Seedance authentication required. Open link: %s", session.h5_link) + + h5_text = f"Open this link in your browser and complete face verification:\n\n{session.h5_link}" + + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/seedance/visual-validate/sessions/{session.session_id}"), + response_model=SeedanceGetVisualValidateSessionResponse, + status_extractor=lambda r: r.status, + completed_statuses=["completed"], + failed_statuses=["failed"], + poll_interval=_VERIFICATION_POLL_INTERVAL_SEC, + max_poll_attempts=(_VERIFICATION_POLL_TIMEOUT_SEC // _VERIFICATION_POLL_INTERVAL_SEC) - 1, + estimated_duration=_VERIFICATION_POLL_TIMEOUT_SEC - 1, + extra_text=h5_text, + ) + + if not result.group_id: + raise RuntimeError(f"Seedance session {session.session_id} completed without a group_id") + + logger.warning("Seedance authentication complete. New GroupId: %s", result.group_id) + PromptServer.instance.send_progress_text( + f"Authentication complete. New GroupId: {result.group_id}", cls.hidden.unique_id + ) + return result.group_id + + +async def _resolve_group_id(cls: type[IO.ComfyNode], group_id: str) -> str: + if group_id and group_id.strip(): + return group_id.strip() + return await _obtain_group_id_via_h5_auth(cls) + + +async def _create_seedance_asset( + cls: type[IO.ComfyNode], + *, + group_id: str, + url: str, + name: str, + asset_type: str, +) -> str: + req = SeedanceCreateAssetRequest( + group_id=group_id, + url=url, + asset_type=asset_type, + name=name or None, + ) + result = await sync_op( + cls, + ApiEndpoint(path="/proxy/seedance/assets", method="POST"), + response_model=SeedanceCreateAssetResponse, + data=req, + ) + return result.asset_id + + +async def _wait_for_asset_active(cls: type[IO.ComfyNode], asset_id: str, group_id: str) -> GetAssetResponse: + """Poll the newly created asset until its status becomes Active.""" + return await poll_op( + cls, + ApiEndpoint(path=f"/proxy/seedance/assets/{asset_id}"), + response_model=GetAssetResponse, + status_extractor=lambda r: r.status, + completed_statuses=["Active"], + failed_statuses=["Failed"], + poll_interval=5, + max_poll_attempts=1200, + extra_text=f"Waiting for asset pre-processing...\n\nasset_id: {asset_id}\n\ngroup_id: {group_id}", + ) + + +async def _seedance_virtual_library_upload_image_asset( + cls: type[IO.ComfyNode], + image: torch.Tensor, + *, + wait_label: str = "Uploading image", +) -> str: + """Upload an image into the caller's per-customer Seedance virtual library.""" + public_url = await upload_image_to_comfyapi(cls, image, wait_label=wait_label) + normalized = image.detach().cpu().contiguous().to(torch.float32) + digest = hashlib.sha256() + digest.update(str(tuple(normalized.shape)).encode("utf-8")) + digest.update(b"\0") + digest.update(normalized.numpy().tobytes()) + image_hash = digest.hexdigest() + create_resp = await sync_op( + cls, + ApiEndpoint(path="/proxy/seedance/virtual-library/assets", method="POST"), + response_model=SeedanceCreateAssetResponse, + data=SeedanceVirtualLibraryCreateAssetRequest(url=public_url, hash=image_hash), + ) + await _wait_for_asset_active(cls, create_resp.asset_id, group_id="virtual-library") + return f"asset://{create_resp.asset_id}" + + +def _seedance2_price_extractor(model_id: str, has_video_input: bool): + """Returns a price_extractor closure for Seedance 2.0 poll_op.""" + rate = SEEDANCE2_PRICE_PER_1K_TOKENS.get((model_id, has_video_input)) + if rate is None: + return None + + def extractor(response: TaskStatusResponse) -> float | None: + if response.usage is None: + return None + return response.usage.total_tokens * 1.43 * rate / 1_000.0 + + return extractor + + def get_image_url_from_response(response: ImageTaskCreationResponse) -> str: if response.error: error_msg = f"ByteDance request failed. Code: {response.error['code']}, message: {response.error['message']}" @@ -303,6 +571,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): ) """, ), + is_deprecated=True, ) @classmethod @@ -335,8 +604,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): mp_provided = out_num_pixels / 1_000_000.0 if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400: raise ValueError( - f"Minimum image resolution for the selected model is 3.68MP, " - f"but {mp_provided:.2f}MP provided." + f"Minimum image resolution for the selected model is 3.68MP, " f"but {mp_provided:.2f}MP provided." ) if "seedream-4-0" in model and out_num_pixels < 921600: raise ValueError( @@ -393,6 +661,226 @@ class ByteDanceSeedreamNode(IO.ComfyNode): return IO.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls])) +def _seedream_model_inputs(*, max_ref_images: int, presets: list): + return [ + IO.Combo.Input( + "size_preset", + options=[label for label, _, _ in presets], + tooltip="Pick a recommended size. Select Custom to use the width and height below.", + ), + IO.Int.Input( + "width", + default=2048, + min=1024, + max=6240, + step=2, + tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`", + ), + IO.Int.Input( + "height", + default=2048, + min=1024, + max=4992, + step=2, + tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`", + ), + IO.Int.Input( + "max_images", + default=1, + min=1, + max=max_ref_images, + step=1, + display_mode=IO.NumberDisplay.number, + tooltip="Maximum number of images to generate. With 1, exactly one image is produced. " + "With >1, the model generates between 1 and max_images related images " + "(e.g., story scenes, character variations). " + "Total images (input + generated) cannot exceed 15.", + ), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, max_ref_images + 1)], + min=0, + ), + tooltip=f"Optional reference image(s) for image-to-image or multi-reference generation. " + f"Up to {max_ref_images} images.", + ), + IO.Boolean.Input( + "fail_on_partial", + default=False, + tooltip="If enabled, abort execution if any requested images are missing or return an error.", + advanced=True, + ), + ] + + +class ByteDanceSeedreamNodeV2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDanceSeedreamNodeV2", + display_name="ByteDance Seedream 4.5 & 5.0", + category="api node/image/ByteDance", + description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text prompt for creating or editing an image.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "seedream 5.0 lite", + _seedream_model_inputs(max_ref_images=14, presets=RECOMMENDED_PRESETS_SEEDREAM_5_LITE), + ), + IO.DynamicCombo.Option( + "seedream-4-5-251128", + _seedream_model_inputs(max_ref_images=10, presets=RECOMMENDED_PRESETS_SEEDREAM_4_5), + ), + IO.DynamicCombo.Option( + "seedream-4-0-250828", + _seedream_model_inputs(max_ref_images=10, presets=RECOMMENDED_PRESETS_SEEDREAM_4_0), + ), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip='Whether to add an "AI generated" watermark to the image.', + advanced=True, + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $price := $contains(widgets.model, "5.0 lite") ? 0.035 : + $contains(widgets.model, "4-5") ? 0.04 : 0.03; + { + "type":"usd", + "usd": $price, + "format": { "suffix":" x images/Run", "approximate": true } + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int = 0, + watermark: bool = False, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + model_id = SEEDREAM_MODELS[model["model"]] + presets = SEEDREAM_PRESETS[model_id] + + size_preset = model.get("size_preset", presets[0][0]) + width = model.get("width", 2048) + height = model.get("height", 2048) + max_images = model.get("max_images", 1) + sequential_image_generation = "disabled" if max_images == 1 else "auto" + images_dict = model.get("images") or {} + fail_on_partial = model.get("fail_on_partial", False) + + w = h = None + for label, tw, th in presets: + if label == size_preset: + w, h = tw, th + break + if w is None or h is None: + w, h = width, height + + out_num_pixels = w * h + mp_provided = out_num_pixels / 1_000_000.0 + if ("seedream-4-5" in model_id or "seedream-5-0" in model_id) and out_num_pixels < 3686400: + raise ValueError( + f"Minimum image resolution for the selected model is 3.68MP, but {mp_provided:.2f}MP provided." + ) + if "seedream-4-0" in model_id and out_num_pixels < 921600: + raise ValueError( + f"Minimum image resolution that the selected model can generate is 0.92MP, " + f"but {mp_provided:.2f}MP provided." + ) + if out_num_pixels > 16_777_216: + raise ValueError( + f"Maximum image resolution for the selected model is 16.78MP, but {mp_provided:.2f}MP provided." + ) + + image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None] + n_input_images = sum(get_number_of_images(t) for t in image_tensors) + max_num_of_images = 14 if model_id == "seedream-5-0-260128" else 10 + if n_input_images > max_num_of_images: + raise ValueError( + f"Maximum of {max_num_of_images} reference images are supported, but {n_input_images} received." + ) + if sequential_image_generation == "auto" and n_input_images + max_images > 15: + raise ValueError( + "The maximum number of generated images plus the number of reference images cannot exceed 15." + ) + + reference_images_urls: list[str] = [] + if image_tensors: + for tensor in image_tensors: + validate_image_aspect_ratio(tensor, (1, 3), (3, 1)) + reference_images_urls = await upload_images_to_comfyapi( + cls, + image_tensors, + max_images=n_input_images, + mime_type="image/png", + wait_label="Uploading reference images", + ) + + response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_IMAGE_ENDPOINT, method="POST"), + response_model=ImageTaskCreationResponse, + data=Seedream4TaskCreationRequest( + model=model_id, + prompt=prompt, + image=reference_images_urls, + size=f"{w}x{h}", + seed=seed, + sequential_image_generation=sequential_image_generation, + sequential_image_generation_options=Seedream4Options(max_images=max_images), + watermark=watermark, + ), + ) + if len(response.data) == 1: + return IO.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response))) + urls = [str(d["url"]) for d in response.data if isinstance(d, dict) and "url" in d] + if fail_on_partial and len(urls) < len(response.data): + raise RuntimeError(f"Only {len(urls)} of {len(response.data)} images were generated before error.") + return IO.NodeOutput(torch.cat([await download_url_to_image_tensor(i) for i in urls])) + + class ByteDanceTextToVideoNode(IO.ComfyNode): @classmethod @@ -952,33 +1440,6 @@ class ByteDanceImageReferenceNode(IO.ComfyNode): ) -async def process_video_task( - cls: type[IO.ComfyNode], - payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest, - estimated_duration: int | None, -) -> IO.NodeOutput: - if payload.model in DEPRECATED_MODELS: - logger.warning( - "Model '%s' is deprecated and will be deactivated on May 13, 2026. " - "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.", - payload.model, - ) - initial_response = await sync_op( - cls, - ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), - data=payload, - response_model=TaskCreationResponse, - ) - response = await poll_op( - cls, - ApiEndpoint(path=f"{BYTEPLUS_TASK_STATUS_ENDPOINT}/{initial_response.id}"), - status_extractor=lambda r: r.status, - estimated_duration=estimated_duration, - response_model=TaskStatusResponse, - ) - return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) - - def raise_if_text_params(prompt: str, text_params: list[str]) -> None: for i in text_params: if f"--{i} " in prompt: @@ -1040,16 +1501,850 @@ PRICE_BADGE_VIDEO = IO.PriceBadge( ) +def _seedance2_text_inputs(resolutions: list[str], default_ratio: str = "16:9"): + return [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text prompt for video generation.", + ), + IO.Combo.Input( + "resolution", + options=resolutions, + tooltip="Resolution of the output video.", + ), + IO.Combo.Input( + "ratio", + options=["16:9", "4:3", "1:1", "3:4", "9:16", "21:9", "adaptive"], + default=default_ratio, + tooltip="Aspect ratio of the output video.", + ), + IO.Int.Input( + "duration", + default=7, + min=4, + max=15, + step=1, + tooltip="Duration of the output video in seconds (4-15).", + display_mode=IO.NumberDisplay.slider, + ), + IO.Boolean.Input( + "generate_audio", + default=True, + tooltip="Enable audio generation for the output video.", + ), + ] + + +class ByteDance2TextToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2TextToVideoNode", + display_name="ByteDance Seedance 2.0 Text to Video", + category="api node/video/ByteDance", + description="Generate video using Seedance 2.0 models based on a text prompt.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Seedance 2.0", _seedance2_text_inputs(["480p", "720p", "1080p"])), + IO.DynamicCombo.Option("Seedance 2.0 Fast", _seedance2_text_inputs(["480p", "720p"])), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $rate1080 := 48800; + $m := widgets.model; + $pricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "1080p" ? $rate1080 : + $res = "720p" ? $rate720 : + $rate480; + $cost := $dur * $rate * $pricePer1K / 1000; + {"type": "usd", "usd": $cost, "format": {"approximate": true}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + model_id = SEEDANCE_MODELS[model["model"]] + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=[TaskTextContent(text=model["prompt"])], + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=False), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +class ByteDance2FirstLastFrameNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2FirstLastFrameNode", + display_name="ByteDance Seedance 2.0 First-Last-Frame to Video", + category="api node/video/ByteDance", + description="Generate video using Seedance 2.0 from a first frame image and optional last frame image.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "Seedance 2.0", + _seedance2_text_inputs(["480p", "720p", "1080p"], default_ratio="adaptive"), + ), + IO.DynamicCombo.Option( + "Seedance 2.0 Fast", + _seedance2_text_inputs(["480p", "720p"], default_ratio="adaptive"), + ), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Image.Input( + "first_frame", + tooltip="First frame image for the video.", + optional=True, + ), + IO.Image.Input( + "last_frame", + tooltip="Last frame image for the video.", + optional=True, + ), + IO.String.Input( + "first_frame_asset_id", + default="", + tooltip="Seedance asset_id to use as the first frame. " + "Mutually exclusive with the first_frame image input.", + optional=True, + ), + IO.String.Input( + "last_frame_asset_id", + default="", + tooltip="Seedance asset_id to use as the last frame. " + "Mutually exclusive with the last_frame image input.", + optional=True, + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $rate1080 := 48800; + $m := widgets.model; + $pricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "1080p" ? $rate1080 : + $res = "720p" ? $rate720 : + $rate480; + $cost := $dur * $rate * $pricePer1K / 1000; + {"type": "usd", "usd": $cost, "format": {"approximate": true}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + first_frame: Input.Image | None = None, + last_frame: Input.Image | None = None, + first_frame_asset_id: str = "", + last_frame_asset_id: str = "", + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + model_id = SEEDANCE_MODELS[model["model"]] + + first_frame_asset_id = first_frame_asset_id.strip() + last_frame_asset_id = last_frame_asset_id.strip() + + if first_frame is not None and first_frame_asset_id: + raise ValueError("Provide only one of first_frame or first_frame_asset_id, not both.") + if first_frame is None and not first_frame_asset_id: + raise ValueError("Either first_frame or first_frame_asset_id is required.") + if last_frame is not None and last_frame_asset_id: + raise ValueError("Provide only one of last_frame or last_frame_asset_id, not both.") + + asset_ids_to_resolve = [a for a in (first_frame_asset_id, last_frame_asset_id) if a] + image_assets: dict[str, str] = {} + if asset_ids_to_resolve: + image_assets, _, _ = await _resolve_reference_assets(cls, asset_ids_to_resolve) + for aid in asset_ids_to_resolve: + if aid not in image_assets: + raise ValueError(f"Asset {aid} is not an Image asset.") + + if first_frame_asset_id: + first_frame_url = image_assets[first_frame_asset_id] + else: + first_frame_url = await _seedance_virtual_library_upload_image_asset( + cls, first_frame, wait_label="Uploading first frame." + ) + + content: list[TaskTextContent | TaskImageContent] = [ + TaskTextContent(text=model["prompt"]), + TaskImageContent( + image_url=TaskImageContentUrl(url=first_frame_url), + role="first_frame", + ), + ] + if last_frame_asset_id: + content.append( + TaskImageContent( + image_url=TaskImageContentUrl(url=image_assets[last_frame_asset_id]), + role="last_frame", + ), + ) + elif last_frame is not None: + content.append( + TaskImageContent( + image_url=TaskImageContentUrl( + url=await _seedance_virtual_library_upload_image_asset( + cls, last_frame, wait_label="Uploading last frame." + ) + ), + role="last_frame", + ), + ) + + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=content, + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=False), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +def _seedance2_reference_inputs(resolutions: list[str], default_ratio: str = "16:9"): + return [ + *_seedance2_text_inputs(resolutions, default_ratio=default_ratio), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=[ + "image_1", + "image_2", + "image_3", + "image_4", + "image_5", + "image_6", + "image_7", + "image_8", + "image_9", + ], + min=0, + ), + ), + IO.Autogrow.Input( + "reference_videos", + template=IO.Autogrow.TemplateNames( + IO.Video.Input("reference_video"), + names=["video_1", "video_2", "video_3"], + min=0, + ), + ), + IO.Autogrow.Input( + "reference_audios", + template=IO.Autogrow.TemplateNames( + IO.Audio.Input("reference_audio"), + names=["audio_1", "audio_2", "audio_3"], + min=0, + ), + ), + IO.Boolean.Input( + "auto_downscale", + default=False, + advanced=True, + optional=True, + tooltip="Automatically downscale reference videos that exceed the model's pixel budget " + "for the selected resolution. Aspect ratio is preserved; videos already within limits are untouched.", + ), + IO.Autogrow.Input( + "reference_assets", + template=IO.Autogrow.TemplateNames( + IO.String.Input("reference_asset"), + names=[ + "asset_1", + "asset_2", + "asset_3", + "asset_4", + "asset_5", + "asset_6", + "asset_7", + "asset_8", + "asset_9", + ], + min=0, + ), + ), + ] + + +class ByteDance2ReferenceNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2ReferenceNode", + display_name="ByteDance Seedance 2.0 Reference to Video", + category="api node/video/ByteDance", + description="Generate, edit, or extend video using Seedance 2.0 with reference images, " + "videos, and audio. Supports multimodal reference, video editing, and video extension.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "Seedance 2.0", + _seedance2_reference_inputs(["480p", "720p", "1080p"], default_ratio="adaptive"), + ), + IO.DynamicCombo.Option( + "Seedance 2.0 Fast", + _seedance2_reference_inputs(["480p", "720p"], default_ratio="adaptive"), + ), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model", "model.resolution", "model.duration"], + input_groups=["model.reference_videos"], + ), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $rate1080 := 48800; + $m := widgets.model; + $hasVideo := $lookup(inputGroups, "model.reference_videos") > 0; + $noVideoPricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $videoPricePer1K := $contains($m, "fast") ? 0.004719 : 0.006149; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "1080p" ? $rate1080 : + $res = "720p" ? $rate720 : + $rate480; + $noVideoCost := $dur * $rate * $noVideoPricePer1K / 1000; + $minVideoFactor := $ceil($dur * 5 / 3); + $minVideoCost := $minVideoFactor * $rate * $videoPricePer1K / 1000; + $maxVideoCost := (15 + $dur) * $rate * $videoPricePer1K / 1000; + $hasVideo + ? { + "type": "range_usd", + "min_usd": $minVideoCost, + "max_usd": $maxVideoCost, + "format": {"approximate": true} + } + : { + "type": "usd", + "usd": $noVideoCost, + "format": {"approximate": true} + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + + reference_images = model.get("reference_images", {}) + reference_videos = model.get("reference_videos", {}) + reference_audios = model.get("reference_audios", {}) + reference_assets = model.get("reference_assets", {}) + + reference_image_assets, reference_video_assets, reference_audio_assets = await _resolve_reference_assets( + cls, list(reference_assets.values()) + ) + + if not reference_images and not reference_videos and not reference_image_assets and not reference_video_assets: + raise ValueError("At least one reference image or video or asset is required.") + + total_images = len(reference_images) + len(reference_image_assets) + if total_images > 9: + raise ValueError( + f"Too many reference images: {total_images} " + f"(images={len(reference_images)}, image assets={len(reference_image_assets)}). Maximum is 9." + ) + total_videos = len(reference_videos) + len(reference_video_assets) + if total_videos > 3: + raise ValueError( + f"Too many reference videos: {total_videos} " + f"(videos={len(reference_videos)}, video assets={len(reference_video_assets)}). Maximum is 3." + ) + total_audios = len(reference_audios) + len(reference_audio_assets) + if total_audios > 3: + raise ValueError( + f"Too many reference audios: {total_audios} " + f"(audios={len(reference_audios)}, audio assets={len(reference_audio_assets)}). Maximum is 3." + ) + + model_id = SEEDANCE_MODELS[model["model"]] + has_video_input = total_videos > 0 + + if model.get("auto_downscale") and reference_videos: + max_px = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id, {}).get(model["resolution"], {}).get("max") + if max_px: + for key in reference_videos: + reference_videos[key] = resize_video_to_pixel_budget(reference_videos[key], max_px) + + total_video_duration = 0.0 + for i, key in enumerate(reference_videos, 1): + video = reference_videos[key] + _validate_ref_video_pixels(video, model_id, model["resolution"], i) + try: + dur = video.get_duration() + if dur < 1.8: + raise ValueError(f"Reference video {i} is too short: {dur:.1f}s. Minimum duration is 1.8 seconds.") + total_video_duration += dur + except ValueError: + raise + except Exception: + pass + if total_video_duration > 15.1: + raise ValueError(f"Total reference video duration is {total_video_duration:.1f}s. Maximum is 15.1 seconds.") + + total_audio_duration = 0.0 + for i, key in enumerate(reference_audios, 1): + audio = reference_audios[key] + dur = int(audio["waveform"].shape[-1]) / int(audio["sample_rate"]) + if dur < 1.8: + raise ValueError(f"Reference audio {i} is too short: {dur:.1f}s. Minimum duration is 1.8 seconds.") + total_audio_duration += dur + if total_audio_duration > 15.1: + raise ValueError(f"Total reference audio duration is {total_audio_duration:.1f}s. Maximum is 15.1 seconds.") + + asset_labels = _build_asset_labels( + reference_assets, + reference_image_assets, + reference_video_assets, + reference_audio_assets, + len(reference_images), + len(reference_videos), + len(reference_audios), + ) + prompt_text = _rewrite_asset_refs(model["prompt"], asset_labels) + + content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = [ + TaskTextContent(text=prompt_text), + ] + for i, key in enumerate(reference_images, 1): + content.append( + TaskImageContent( + image_url=TaskImageContentUrl( + url=await _seedance_virtual_library_upload_image_asset( + cls, + reference_images[key], + wait_label=f"Uploading image {i}", + ), + ), + role="reference_image", + ), + ) + for i, key in enumerate(reference_videos, 1): + content.append( + TaskVideoContent( + video_url=TaskVideoContentUrl( + url=await upload_video_to_comfyapi( + cls, + reference_videos[key], + wait_label=f"Uploading video {i}", + ), + ), + ), + ) + for key in reference_audios: + content.append( + TaskAudioContent( + audio_url=TaskAudioContentUrl( + url=await upload_audio_to_comfyapi( + cls, + reference_audios[key], + container_format="mp3", + codec_name="libmp3lame", + mime_type="audio/mpeg", + ), + ), + ), + ) + for url in reference_image_assets.values(): + content.append( + TaskImageContent( + image_url=TaskImageContentUrl(url=url), + role="reference_image", + ), + ) + for url in reference_video_assets.values(): + content.append( + TaskVideoContent(video_url=TaskVideoContentUrl(url=url)), + ) + for url in reference_audio_assets.values(): + content.append( + TaskAudioContent(audio_url=TaskAudioContentUrl(url=url)), + ) + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=content, + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=has_video_input), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +async def process_video_task( + cls: type[IO.ComfyNode], + payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest, + estimated_duration: int | None, +) -> IO.NodeOutput: + if payload.model in DEPRECATED_MODELS: + logger.warning( + "Model '%s' is deprecated and will be deactivated on May 13, 2026. " + "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.", + payload.model, + ) + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=payload, + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + status_extractor=lambda r: r.status, + estimated_duration=estimated_duration, + response_model=TaskStatusResponse, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +class ByteDanceCreateImageAsset(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ByteDanceCreateImageAsset", + display_name="ByteDance Create Image Asset", + category="api node/image/ByteDance", + description=( + "Create a Seedance 2.0 personal image asset. Uploads the input image and " + "registers it in the given asset group. If group_id is empty, runs a real-person " + "H5 authentication flow to create a new group before adding the asset." + ), + inputs=[ + IO.Image.Input("image", tooltip="Image to register as a personal asset."), + IO.String.Input( + "group_id", + default="", + tooltip="Reuse an existing Seedance asset group ID to skip repeated human verification for the " + "same person. Leave empty to run real-person authentication in the browser and create a new group.", + ), + # IO.String.Input( + # "name", + # default="", + # tooltip="Asset name (up to 64 characters).", + # ), + ], + outputs=[ + IO.String.Output(display_name="asset_id"), + IO.String.Output(display_name="group_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + # is_api_node=True, + ) + + @classmethod + async def execute( + cls, + image: Input.Image, + group_id: str = "", + # name: str = "", + ) -> IO.NodeOutput: + # if len(name) > 64: + # raise ValueError("Name of asset can not be greater then 64 symbols") + validate_image_dimensions(image, min_width=300, max_width=6000, min_height=300, max_height=6000) + validate_image_aspect_ratio(image, min_ratio=(0.4, 1), max_ratio=(2.5, 1)) + resolved_group = await _resolve_group_id(cls, group_id) + asset_id = await _create_seedance_asset( + cls, + group_id=resolved_group, + url=await upload_image_to_comfyapi(cls, image), + name="", + asset_type="Image", + ) + await _wait_for_asset_active(cls, asset_id, resolved_group) + PromptServer.instance.send_progress_text( + f"Please save the asset_id and group_id for reuse.\n\nasset_id: {asset_id}\n\n" + f"group_id: {resolved_group}", + cls.hidden.unique_id, + ) + return IO.NodeOutput(asset_id, resolved_group) + + +class ByteDanceCreateVideoAsset(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ByteDanceCreateVideoAsset", + display_name="ByteDance Create Video Asset", + category="api node/video/ByteDance", + description=( + "Create a Seedance 2.0 personal video asset. Uploads the input video and " + "registers it in the given asset group. If group_id is empty, runs a real-person " + "H5 authentication flow to create a new group before adding the asset." + ), + inputs=[ + IO.Video.Input("video", tooltip="Video to register as a personal asset."), + IO.String.Input( + "group_id", + default="", + tooltip="Reuse an existing Seedance asset group ID to skip repeated human verification for the " + "same person. Leave empty to run real-person authentication in the browser and create a new group.", + ), + # IO.String.Input( + # "name", + # default="", + # tooltip="Asset name (up to 64 characters).", + # ), + ], + outputs=[ + IO.String.Output(display_name="asset_id"), + IO.String.Output(display_name="group_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + # is_api_node=True, + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + group_id: str = "", + # name: str = "", + ) -> IO.NodeOutput: + # if len(name) > 64: + # raise ValueError("Name of asset can not be greater then 64 symbols") + validate_video_duration(video, min_duration=2, max_duration=15) + validate_video_dimensions(video, min_width=300, max_width=6000, min_height=300, max_height=6000) + + w, h = video.get_dimensions() + if h > 0: + ratio = w / h + if not (0.4 <= ratio <= 2.5): + raise ValueError(f"Asset video aspect ratio (W/H) must be in [0.4, 2.5], got {ratio:.3f} ({w}x{h}).") + pixels = w * h + if not (409_600 <= pixels <= 927_408): + raise ValueError( + f"Asset video total pixels (W×H) must be in [409600, 927408], " f"got {pixels:,} ({w}x{h})." + ) + + fps = float(video.get_frame_rate()) + if not (24 <= fps <= 60): + raise ValueError(f"Asset video FPS must be in [24, 60], got {fps:.2f}.") + + resolved_group = await _resolve_group_id(cls, group_id) + asset_id = await _create_seedance_asset( + cls, + group_id=resolved_group, + url=await upload_video_to_comfyapi(cls, video), + name="", + asset_type="Video", + ) + await _wait_for_asset_active(cls, asset_id, resolved_group) + PromptServer.instance.send_progress_text( + f"Please save the asset_id and group_id for reuse.\n\nasset_id: {asset_id}\n\n" + f"group_id: {resolved_group}", + cls.hidden.unique_id, + ) + return IO.NodeOutput(asset_id, resolved_group) + + class ByteDanceExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ ByteDanceImageNode, ByteDanceSeedreamNode, + ByteDanceSeedreamNodeV2, ByteDanceTextToVideoNode, ByteDanceImageToVideoNode, ByteDanceFirstLastFrameNode, ByteDanceImageReferenceNode, + ByteDance2TextToVideoNode, + ByteDance2FirstLastFrameNode, + ByteDance2ReferenceNode, + ByteDanceCreateImageAsset, + ByteDanceCreateVideoAsset, ] diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py index 2b77a022e..d18c958a8 100644 --- a/comfy_api_nodes/nodes_gemini.py +++ b/comfy_api_nodes/nodes_gemini.py @@ -83,13 +83,16 @@ class GeminiImageModel(str, Enum): async def create_image_parts( cls: type[IO.ComfyNode], - images: Input.Image, + images: Input.Image | list[Input.Image], image_limit: int = 0, ) -> list[GeminiPart]: image_parts: list[GeminiPart] = [] if image_limit < 0: raise ValueError("image_limit must be greater than or equal to 0 when creating Gemini image parts.") - total_images = get_number_of_images(images) + + # Accept either a single (possibly-batched) tensor or a list of them; share URL budget across all. + images_list: list[Input.Image] = images if isinstance(images, list) else [images] + total_images = sum(get_number_of_images(img) for img in images_list) if total_images <= 0: raise ValueError("No images provided to create_image_parts; at least one image is required.") @@ -98,10 +101,18 @@ async def create_image_parts( # Number of images we'll send as URLs (fileData) num_url_images = min(effective_max, 10) # Vertex API max number of image links + upload_kwargs: dict = {"wait_label": "Uploading reference images"} + if effective_max > num_url_images: + # Split path (e.g. 11+ images): suppress per-image counter to avoid a confusing dual-fraction label. + upload_kwargs = { + "wait_label": f"Uploading reference images ({num_url_images}+)", + "show_batch_index": False, + } reference_images_urls = await upload_images_to_comfyapi( cls, - images, + images_list, max_images=num_url_images, + **upload_kwargs, ) for reference_image_url in reference_images_urls: image_parts.append( @@ -112,15 +123,22 @@ async def create_image_parts( ) ) ) - for idx in range(num_url_images, effective_max): - image_parts.append( - GeminiPart( - inlineData=GeminiInlineData( - mimeType=GeminiMimeType.image_png, - data=tensor_to_base64_string(images[idx]), + if effective_max > num_url_images: + flat: list[torch.Tensor] = [] + for tensor in images_list: + if len(tensor.shape) == 4: + flat.extend(tensor[i] for i in range(tensor.shape[0])) + else: + flat.append(tensor) + for idx in range(num_url_images, effective_max): + image_parts.append( + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.image_png, + data=tensor_to_base64_string(flat[idx]), + ) ) ) - ) return image_parts @@ -891,10 +909,6 @@ class GeminiNanoBanana2(IO.ComfyNode): "9:16", "16:9", "21:9", - # "1:4", - # "4:1", - # "8:1", - # "1:8", ], default="auto", tooltip="If set to 'auto', matches your input image's aspect ratio; " @@ -902,12 +916,7 @@ class GeminiNanoBanana2(IO.ComfyNode): ), IO.Combo.Input( "resolution", - options=[ - # "512px", - "1K", - "2K", - "4K", - ], + options=["1K", "2K", "4K"], tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.", ), IO.Combo.Input( @@ -956,6 +965,7 @@ class GeminiNanoBanana2(IO.ComfyNode): ], is_api_node=True, price_badge=GEMINI_IMAGE_2_PRICE_BADGE, + is_deprecated=True, ) @classmethod @@ -1016,6 +1026,197 @@ class GeminiNanoBanana2(IO.ComfyNode): ) +def _nano_banana_2_v2_model_inputs(): + return [ + IO.Combo.Input( + "aspect_ratio", + options=[ + "auto", + "1:1", + "2:3", + "3:2", + "3:4", + "4:3", + "4:5", + "5:4", + "9:16", + "16:9", + "21:9", + "1:4", + "4:1", + "8:1", + "1:8", + ], + default="auto", + tooltip="If set to 'auto', matches your input image's aspect ratio; " + "if no image is provided, a 16:9 square is usually generated.", + ), + IO.Combo.Input( + "resolution", + options=["1K", "2K", "4K"], + tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.", + ), + IO.Combo.Input( + "thinking_level", + options=["MINIMAL", "HIGH"], + ), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 15)], + min=0, + ), + tooltip="Optional reference image(s). Up to 14 images total.", + ), + IO.Custom("GEMINI_INPUT_FILES").Input( + "files", + optional=True, + tooltip="Optional file(s) to use as context for the model. " + "Accepts inputs from the Gemini Generate Content Input Files node.", + ), + ] + + +class GeminiNanoBanana2V2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="GeminiNanoBanana2V2", + display_name="Nano Banana 2", + category="api node/image/Gemini", + description="Generate or edit images synchronously via Google Vertex API.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + tooltip="Text prompt describing the image to generate or the edits to apply. " + "Include any constraints, styles, or details the model should follow.", + default="", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "Nano Banana 2 (Gemini 3.1 Flash Image)", + _nano_banana_2_v2_model_inputs(), + ), + ], + ), + IO.Int.Input( + "seed", + default=42, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide " + "the same response for repeated requests. Deterministic output isn't guaranteed. " + "Also, changing the model or parameter settings, such as the temperature, " + "can cause variations in the response even when you use the same seed value. " + "By default, a random seed value is used.", + ), + IO.Combo.Input( + "response_modalities", + options=["IMAGE", "IMAGE+TEXT"], + advanced=True, + ), + IO.String.Input( + "system_prompt", + multiline=True, + default=GEMINI_IMAGE_SYS_PROMPT, + optional=True, + tooltip="Foundational instructions that dictate an AI's behavior.", + advanced=True, + ), + ], + outputs=[ + IO.Image.Output(), + IO.String.Output(), + IO.Image.Output( + display_name="thought_image", + tooltip="First image from the model's thinking process. " + "Only available with thinking_level HIGH and IMAGE+TEXT modality.", + ), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]), + expr=""" + ( + $r := $lookup(widgets, "model.resolution"); + $prices := {"1k": 0.0696, "2k": 0.1014, "4k": 0.154}; + {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + response_modalities: str, + system_prompt: str = "", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + model_choice = model["model"] + if model_choice == "Nano Banana 2 (Gemini 3.1 Flash Image)": + model_id = "gemini-3.1-flash-image-preview" + else: + model_id = model_choice + + images = model.get("images") or {} + parts: list[GeminiPart] = [GeminiPart(text=prompt)] + if images: + image_tensors: list[Input.Image] = [t for t in images.values() if t is not None] + if image_tensors: + if sum(get_number_of_images(t) for t in image_tensors) > 14: + raise ValueError("The current maximum number of supported images is 14.") + parts.extend(await create_image_parts(cls, image_tensors)) + files = model.get("files") + if files is not None: + parts.extend(files) + + image_config = GeminiImageConfig(imageSize=model["resolution"]) + if model["aspect_ratio"] != "auto": + image_config.aspectRatio = model["aspect_ratio"] + + gemini_system_prompt = None + if system_prompt: + gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None) + + response = await sync_op( + cls, + ApiEndpoint(path=f"/proxy/vertexai/gemini/{model_id}", method="POST"), + data=GeminiImageGenerateContentRequest( + contents=[ + GeminiContent(role=GeminiRole.user, parts=parts), + ], + generationConfig=GeminiImageGenerationConfig( + responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]), + imageConfig=image_config, + thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]), + ), + systemInstruction=gemini_system_prompt, + ), + response_model=GeminiGenerateContentResponse, + price_extractor=calculate_tokens_price, + ) + return IO.NodeOutput( + await get_image_from_response(response), + get_text_from_response(response), + await get_image_from_response(response, thought=True), + ) + + class GeminiExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -1024,6 +1225,7 @@ class GeminiExtension(ComfyExtension): GeminiImage, GeminiImage2, GeminiNanoBanana2, + GeminiNanoBanana2V2, GeminiInputFiles, ] diff --git a/comfy_api_nodes/nodes_grok.py b/comfy_api_nodes/nodes_grok.py index dabc899d6..a103f24ee 100644 --- a/comfy_api_nodes/nodes_grok.py +++ b/comfy_api_nodes/nodes_grok.py @@ -54,7 +54,12 @@ class GrokImageNode(IO.ComfyNode): inputs=[ IO.Combo.Input( "model", - options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"], + options=[ + "grok-imagine-image-quality", + "grok-imagine-image-pro", + "grok-imagine-image", + "grok-imagine-image-beta", + ], ), IO.String.Input( "prompt", @@ -111,10 +116,12 @@ class GrokImageNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]), expr=""" ( - $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02; + $rate := widgets.model = "grok-imagine-image-quality" + ? (widgets.resolution = "1k" ? 0.05 : 0.07) + : ($contains(widgets.model, "pro") ? 0.07 : 0.02); {"type":"usd","usd": $rate * widgets.number_of_images} ) """, @@ -155,6 +162,61 @@ class GrokImageNode(IO.ComfyNode): ) +_GROK_IMAGE_EDIT_ASPECT_RATIO_OPTIONS = [ + "auto", + "1:1", + "2:3", + "3:2", + "3:4", + "4:3", + "9:16", + "16:9", + "9:19.5", + "19.5:9", + "9:20", + "20:9", + "1:2", + "2:1", +] + + +def _grok_image_edit_model_inputs(*, max_ref_images: int, with_aspect_ratio: bool): + inputs = [ + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, max_ref_images + 1)], + min=1, + ), + tooltip=( + "Reference image to edit." + if max_ref_images == 1 + else f"Reference image(s) to edit. Up to {max_ref_images} images." + ), + ), + IO.Combo.Input("resolution", options=["1K", "2K"]), + IO.Int.Input( + "number_of_images", + default=1, + min=1, + max=10, + step=1, + tooltip="Number of edited images to generate", + display_mode=IO.NumberDisplay.number, + ), + ] + if with_aspect_ratio: + inputs.append( + IO.Combo.Input( + "aspect_ratio", + options=_GROK_IMAGE_EDIT_ASPECT_RATIO_OPTIONS, + tooltip="Only allowed when multiple images are connected.", + ) + ) + return inputs + + class GrokImageEditNode(IO.ComfyNode): @classmethod @@ -167,7 +229,12 @@ class GrokImageEditNode(IO.ComfyNode): inputs=[ IO.Combo.Input( "model", - options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"], + options=[ + "grok-imagine-image-quality", + "grok-imagine-image-pro", + "grok-imagine-image", + "grok-imagine-image-beta", + ], ), IO.Image.Input("image", display_name="images"), IO.String.Input( @@ -228,14 +295,23 @@ class GrokImageEditNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]), expr=""" ( - $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02; - {"type":"usd","usd": 0.002 + $rate * widgets.number_of_images} + $isQualityModel := widgets.model = "grok-imagine-image-quality"; + $isPro := $contains(widgets.model, "pro"); + $rate := $isQualityModel + ? (widgets.resolution = "1k" ? 0.05 : 0.07) + : ($isPro ? 0.07 : 0.02); + $base := $isQualityModel ? 0.01 : 0.002; + $output := $rate * widgets.number_of_images; + $isPro + ? {"type":"usd","usd": $base + $output} + : {"type":"range_usd","min_usd": $base + $output, "max_usd": 3 * $base + $output} ) """, ), + is_deprecated=True, ) @classmethod @@ -283,6 +359,143 @@ class GrokImageEditNode(IO.ComfyNode): ) +class GrokImageEditNodeV2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="GrokImageEditNodeV2", + display_name="Grok Image Edit", + category="api node/image/Grok", + description="Modify an existing image based on a text prompt", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="The text prompt used to generate the image", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "grok-imagine-image-quality", + _grok_image_edit_model_inputs(max_ref_images=3, with_aspect_ratio=True), + ), + IO.DynamicCombo.Option( + "grok-imagine-image-pro", + _grok_image_edit_model_inputs(max_ref_images=1, with_aspect_ratio=False), + ), + IO.DynamicCombo.Option( + "grok-imagine-image", + _grok_image_edit_model_inputs(max_ref_images=3, with_aspect_ratio=True), + ), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to determine if node should re-run; " + "actual results are nondeterministic regardless of seed.", + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model", "model.resolution", "model.number_of_images"], + ), + expr=""" + ( + $isQualityModel := widgets.model = "grok-imagine-image-quality"; + $isPro := $contains(widgets.model, "pro"); + $res := $lookup(widgets, "model.resolution"); + $n := $lookup(widgets, "model.number_of_images"); + $rate := $isQualityModel + ? ($res = "1k" ? 0.05 : 0.07) + : ($isPro ? 0.07 : 0.02); + $base := $isQualityModel ? 0.01 : 0.002; + $output := $rate * $n; + $isPro + ? {"type":"usd","usd": $base + $output} + : {"type":"range_usd","min_usd": $base + $output, "max_usd": 3 * $base + $output} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + model_id = model["model"] + resolution = model["resolution"] + number_of_images = model["number_of_images"] + images_dict = model.get("images") or {} + aspect_ratio = model.get("aspect_ratio", "auto") + + image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None] + n_images = sum(get_number_of_images(t) for t in image_tensors) + if n_images < 1: + raise ValueError("At least one image is required for editing.") + if model_id == "grok-imagine-image-pro" and n_images > 1: + raise ValueError("The pro model supports only 1 input image.") + if model_id != "grok-imagine-image-pro" and n_images > 3: + raise ValueError("A maximum of 3 input images is supported.") + if aspect_ratio != "auto" and n_images == 1: + raise ValueError( + "Custom aspect ratio is only allowed when multiple images are connected to the image input." + ) + + flat_tensors: list[torch.Tensor] = [] + for tensor in image_tensors: + if len(tensor.shape) == 4: + flat_tensors.extend(tensor[i] for i in range(tensor.shape[0])) + else: + flat_tensors.append(tensor) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/xai/v1/images/edits", method="POST"), + data=ImageEditRequest( + model=model_id, + images=[ + InputUrlObject(url=f"data:image/png;base64,{tensor_to_base64_string(i)}") for i in flat_tensors + ], + prompt=prompt, + resolution=resolution.lower(), + n=number_of_images, + seed=seed, + aspect_ratio=None if aspect_ratio == "auto" else aspect_ratio, + ), + response_model=ImageGenerationResponse, + price_extractor=_extract_grok_price, + ) + if len(response.data) == 1: + return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url)) + return IO.NodeOutput( + torch.cat( + [await download_url_to_image_tensor(i) for i in [str(d.url) for d in response.data if d.url]], + ) + ) + + class GrokVideoNode(IO.ComfyNode): @classmethod @@ -558,7 +771,7 @@ class GrokVideoReferenceNode(IO.ComfyNode): ( $res := $lookup(widgets, "model.resolution"); $dur := $lookup(widgets, "model.duration"); - $refs := inputGroups["model.reference_images"]; + $refs := $lookup(inputGroups, "model.reference_images"); $rate := $res = "720p" ? 0.07 : 0.05; $price := ($rate * $dur + 0.002 * $refs) * 1.43; {"type":"usd","usd": $price} @@ -717,6 +930,7 @@ class GrokExtension(ComfyExtension): return [ GrokImageNode, GrokImageEditNode, + GrokImageEditNodeV2, GrokVideoNode, GrokVideoReferenceNode, GrokVideoEditNode, diff --git a/comfy_api_nodes/nodes_hitpaw.py b/comfy_api_nodes/nodes_hitpaw.py index 488080a74..bca5170e4 100644 --- a/comfy_api_nodes/nodes_hitpaw.py +++ b/comfy_api_nodes/nodes_hitpaw.py @@ -178,7 +178,6 @@ class HitPawGeneralImageEnhance(IO.ComfyNode): status_extractor=lambda x: x.data.status, price_extractor=lambda x: request_price, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.res_url)) @@ -324,7 +323,6 @@ class HitPawVideoEnhance(IO.ComfyNode): status_extractor=lambda x: x.data.status, price_extractor=lambda x: request_price, poll_interval=10.0, - max_poll_attempts=320, ) return IO.NodeOutput(await download_url_to_video_output(final_response.data.res_url)) diff --git a/comfy_api_nodes/nodes_hunyuan3d.py b/comfy_api_nodes/nodes_hunyuan3d.py index 44c94a98e..5fc31bccd 100644 --- a/comfy_api_nodes/nodes_hunyuan3d.py +++ b/comfy_api_nodes/nodes_hunyuan3d.py @@ -221,14 +221,17 @@ class TencentTextToModelNode(IO.ComfyNode): response_model=To3DProTaskResultResponse, status_extractor=lambda r: r.Status, ) - obj_result = await download_and_extract_obj_zip(get_file_from_response(result.ResultFile3Ds, "obj").Url) + obj_file_response = get_file_from_response(result.ResultFile3Ds, "obj", raise_if_not_found=False) + obj_result = None + if obj_file_response: + obj_result = await download_and_extract_obj_zip(obj_file_response.Url) return IO.NodeOutput( f"{task_id}.glb", await download_url_to_file_3d( get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id ), - obj_result.obj, - obj_result.texture, + obj_result.obj if obj_result else None, + obj_result.texture if obj_result else None, ) @@ -378,17 +381,30 @@ class TencentImageToModelNode(IO.ComfyNode): response_model=To3DProTaskResultResponse, status_extractor=lambda r: r.Status, ) - obj_result = await download_and_extract_obj_zip(get_file_from_response(result.ResultFile3Ds, "obj").Url) + obj_file_response = get_file_from_response(result.ResultFile3Ds, "obj", raise_if_not_found=False) + if obj_file_response: + obj_result = await download_and_extract_obj_zip(obj_file_response.Url) + return IO.NodeOutput( + f"{task_id}.glb", + await download_url_to_file_3d( + get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id + ), + obj_result.obj, + obj_result.texture, + obj_result.metallic if obj_result.metallic is not None else torch.zeros(1, 1, 1, 3), + obj_result.normal if obj_result.normal is not None else torch.zeros(1, 1, 1, 3), + obj_result.roughness if obj_result.roughness is not None else torch.zeros(1, 1, 1, 3), + ) return IO.NodeOutput( f"{task_id}.glb", await download_url_to_file_3d( get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id ), - obj_result.obj, - obj_result.texture, - obj_result.metallic if obj_result.metallic is not None else torch.zeros(1, 1, 1, 3), - obj_result.normal if obj_result.normal is not None else torch.zeros(1, 1, 1, 3), - obj_result.roughness if obj_result.roughness is not None else torch.zeros(1, 1, 1, 3), + None, + None, + None, + None, + None, ) diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index 9a37ccc53..7586f1816 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -862,7 +862,7 @@ class OmniProTextToVideoNode(IO.ComfyNode): ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), - IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True), IO.DynamicCombo.Input( "storyboards", options=[ @@ -904,12 +904,13 @@ class OmniProTextToVideoNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( - $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $res := widgets.resolution; + $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro"); $isV3 := $contains(widgets.model_name, "v3"); $audio := $isV3 and widgets.generate_audio; $rates := $audio - ? {"std": 0.112, "pro": 0.14} - : {"std": 0.084, "pro": 0.112}; + ? {"std": 0.112, "pro": 0.14, "4k": 0.42} + : {"std": 0.084, "pro": 0.112, "4k": 0.42}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -934,6 +935,8 @@ class OmniProTextToVideoNode(IO.ComfyNode): raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.") if generate_audio: raise ValueError("kling-video-o1 does not support audio generation.") + if resolution == "4k": + raise ValueError("kling-video-o1 does not support 4k resolution.") stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" if stories_enabled and model_name == "kling-video-o1": raise ValueError("kling-video-o1 does not support storyboards.") @@ -963,6 +966,12 @@ class OmniProTextToVideoNode(IO.ComfyNode): f"must equal the global duration ({duration}s)." ) + if resolution == "4k": + mode = "4k" + elif resolution == "1080p": + mode = "pro" + else: + mode = "std" response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"), @@ -972,7 +981,7 @@ class OmniProTextToVideoNode(IO.ComfyNode): prompt=prompt, aspect_ratio=aspect_ratio, duration=str(duration), - mode="pro" if resolution == "1080p" else "std", + mode=mode, multi_shot=multi_shot, multi_prompt=multi_prompt_list, shot_type="customize" if multi_shot else None, @@ -1014,7 +1023,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): optional=True, tooltip="Up to 6 additional reference images.", ), - IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True), IO.DynamicCombo.Input( "storyboards", options=[ @@ -1061,12 +1070,13 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( - $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $res := widgets.resolution; + $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro"); $isV3 := $contains(widgets.model_name, "v3"); $audio := $isV3 and widgets.generate_audio; $rates := $audio - ? {"std": 0.112, "pro": 0.14} - : {"std": 0.084, "pro": 0.112}; + ? {"std": 0.112, "pro": 0.14, "4k": 0.42} + : {"std": 0.084, "pro": 0.112, "4k": 0.42}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -1093,6 +1103,8 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") if generate_audio: raise ValueError("kling-video-o1 does not support audio generation.") + if resolution == "4k": + raise ValueError("kling-video-o1 does not support 4k resolution.") stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" if stories_enabled and model_name == "kling-video-o1": raise ValueError("kling-video-o1 does not support storyboards.") @@ -1161,6 +1173,12 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1)) for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference frame(s)"): image_list.append(OmniParamImage(image_url=i)) + if resolution == "4k": + mode = "4k" + elif resolution == "1080p": + mode = "pro" + else: + mode = "std" response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"), @@ -1170,7 +1188,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): prompt=prompt, duration=str(duration), image_list=image_list, - mode="pro" if resolution == "1080p" else "std", + mode=mode, sound="on" if generate_audio else "off", multi_shot=multi_shot, multi_prompt=multi_prompt_list, @@ -1204,7 +1222,7 @@ class OmniProImageToVideoNode(IO.ComfyNode): "reference_images", tooltip="Up to 7 reference images.", ), - IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True), IO.DynamicCombo.Input( "storyboards", options=[ @@ -1251,12 +1269,13 @@ class OmniProImageToVideoNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( - $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $res := widgets.resolution; + $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro"); $isV3 := $contains(widgets.model_name, "v3"); $audio := $isV3 and widgets.generate_audio; $rates := $audio - ? {"std": 0.112, "pro": 0.14} - : {"std": 0.084, "pro": 0.112}; + ? {"std": 0.112, "pro": 0.14, "4k": 0.42} + : {"std": 0.084, "pro": 0.112, "4k": 0.42}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -1282,6 +1301,8 @@ class OmniProImageToVideoNode(IO.ComfyNode): raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") if generate_audio: raise ValueError("kling-video-o1 does not support audio generation.") + if resolution == "4k": + raise ValueError("kling-video-o1 does not support 4k resolution.") stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" if stories_enabled and model_name == "kling-video-o1": raise ValueError("kling-video-o1 does not support storyboards.") @@ -1320,6 +1341,12 @@ class OmniProImageToVideoNode(IO.ComfyNode): image_list: list[OmniParamImage] = [] for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"): image_list.append(OmniParamImage(image_url=i)) + if resolution == "4k": + mode = "4k" + elif resolution == "1080p": + mode = "pro" + else: + mode = "std" response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"), @@ -1330,7 +1357,7 @@ class OmniProImageToVideoNode(IO.ComfyNode): aspect_ratio=aspect_ratio, duration=str(duration), image_list=image_list, - mode="pro" if resolution == "1080p" else "std", + mode=mode, sound="on" if generate_audio else "off", multi_shot=multi_shot, multi_prompt=multi_prompt_list, @@ -2760,11 +2787,15 @@ class MotionControl(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["mode"]), + depends_on=IO.PriceBadgeDepends(widgets=["mode", "model"]), expr=""" ( - $prices := {"std": 0.07, "pro": 0.112}; - {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}} + $prices := { + "kling-v3": {"std": 0.126, "pro": 0.168}, + "kling-v2-6": {"std": 0.07, "pro": 0.112} + }; + $modelPrices := $lookup($prices, widgets.model); + {"type":"usd","usd": $lookup($modelPrices, widgets.mode), "format":{"suffix":"/second"}} ) """, ), @@ -2860,7 +2891,7 @@ class KlingVideoNode(IO.ComfyNode): IO.DynamicCombo.Option( "kling-v3", [ - IO.Combo.Input("resolution", options=["1080p", "720p"]), + IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"), IO.Combo.Input( "aspect_ratio", options=["16:9", "9:16", "1:1"], @@ -2913,7 +2944,11 @@ class KlingVideoNode(IO.ComfyNode): ), expr=""" ( - $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $rates := { + "4k": {"off": 0.42, "on": 0.42}, + "1080p": {"off": 0.112, "on": 0.168}, + "720p": {"off": 0.084, "on": 0.126} + }; $res := $lookup(widgets, "model.resolution"); $audio := widgets.generate_audio ? "on" : "off"; $rate := $lookup($lookup($rates, $res), $audio); @@ -2943,7 +2978,12 @@ class KlingVideoNode(IO.ComfyNode): start_frame: Input.Image | None = None, ) -> IO.NodeOutput: _ = seed - mode = "pro" if model["resolution"] == "1080p" else "std" + if model["resolution"] == "4k": + mode = "4k" + elif model["resolution"] == "1080p": + mode = "pro" + else: + mode = "std" custom_multi_shot = False if multi_shot["multi_shot"] == "disabled": shot_type = None @@ -3057,7 +3097,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode): IO.DynamicCombo.Option( "kling-v3", [ - IO.Combo.Input("resolution", options=["1080p", "720p"]), + IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"), ], ), ], @@ -3089,7 +3129,11 @@ class KlingFirstLastFrameNode(IO.ComfyNode): ), expr=""" ( - $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $rates := { + "4k": {"off": 0.42, "on": 0.42}, + "1080p": {"off": 0.112, "on": 0.168}, + "720p": {"off": 0.084, "on": 0.126} + }; $res := $lookup(widgets, "model.resolution"); $audio := widgets.generate_audio ? "on" : "off"; $rate := $lookup($lookup($rates, $res), $audio); @@ -3118,6 +3162,12 @@ class KlingFirstLastFrameNode(IO.ComfyNode): validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1)) image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame") image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame") + if model["resolution"] == "4k": + mode = "4k" + elif model["resolution"] == "1080p": + mode = "pro" + else: + mode = "std" response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"), @@ -3127,7 +3177,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode): image=image_url, image_tail=image_tail_url, prompt=prompt, - mode="pro" if model["resolution"] == "1080p" else "std", + mode=mode, duration=str(duration), sound="on" if generate_audio else "off", ), diff --git a/comfy_api_nodes/nodes_luma.py b/comfy_api_nodes/nodes_luma.py index 9ed6cd299..d92a7c382 100644 --- a/comfy_api_nodes/nodes_luma.py +++ b/comfy_api_nodes/nodes_luma.py @@ -1,10 +1,11 @@ -from typing import Optional - import torch from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension +from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.luma import ( + Luma2Generation, + Luma2GenerationRequest, + Luma2ImageRef, LumaAspectRatio, LumaCharacterRef, LumaConceptChain, @@ -30,6 +31,7 @@ from comfy_api_nodes.util import ( download_url_to_video_output, poll_op, sync_op, + upload_image_to_comfyapi, upload_images_to_comfyapi, validate_string, ) @@ -212,9 +214,9 @@ class LumaImageGenerationNode(IO.ComfyNode): aspect_ratio: str, seed, style_image_weight: float, - image_luma_ref: Optional[LumaReferenceChain] = None, - style_image: Optional[torch.Tensor] = None, - character_image: Optional[torch.Tensor] = None, + image_luma_ref: LumaReferenceChain | None = None, + style_image: torch.Tensor | None = None, + character_image: torch.Tensor | None = None, ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=True, min_length=3) # handle image_luma_ref @@ -434,7 +436,7 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode): duration: str, loop: bool, seed, - luma_concepts: Optional[LumaConceptChain] = None, + luma_concepts: LumaConceptChain | None = None, ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=False, min_length=3) duration = duration if model != LumaVideoModel.ray_1_6 else None @@ -533,7 +535,6 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode): ], is_api_node=True, price_badge=PRICE_BADGE_VIDEO, - ) @classmethod @@ -644,6 +645,293 @@ PRICE_BADGE_VIDEO = IO.PriceBadge( ) +def _luma2_uni1_common_inputs(max_image_refs: int) -> list: + return [ + IO.Combo.Input( + "style", + options=["auto", "manga"], + default="auto", + tooltip="Style preset. 'auto' picks based on the prompt; " + "'manga' applies a manga/anime aesthetic and requires a portrait " + "aspect ratio (2:3, 9:16, 1:2, 1:3).", + ), + IO.Boolean.Input( + "web_search", + default=False, + tooltip="Search the web for visual references before generating.", + ), + IO.Autogrow.Input( + "image_ref", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, max_image_refs + 1)], + min=0, + ), + optional=True, + tooltip=f"Up to {max_image_refs} reference images for style/content guidance.", + ), + ] + + +async def _luma2_upload_image_refs( + cls: type[IO.ComfyNode], + refs: dict | None, + max_count: int, +) -> list[Luma2ImageRef] | None: + if not refs: + return None + out: list[Luma2ImageRef] = [] + for key in refs: + url = await upload_image_to_comfyapi(cls, refs[key]) + out.append(Luma2ImageRef(url=url)) + if len(out) > max_count: + raise ValueError(f"Maximum {max_count} reference images are allowed.") + return out or None + + +async def _luma2_submit_and_poll( + cls: type[IO.ComfyNode], + request: Luma2GenerationRequest, +) -> Input.Image: + initial = await sync_op( + cls, + ApiEndpoint(path="/proxy/luma_2/generations", method="POST"), + response_model=Luma2Generation, + data=request, + ) + if not initial.id: + raise RuntimeError("Luma 2 API did not return a generation id.") + final = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/luma_2/generations/{initial.id}", method="GET"), + response_model=Luma2Generation, + status_extractor=lambda r: r.state, + progress_extractor=lambda r: None, + ) + if not final.output: + msg = final.failure_reason or "no output returned" + raise RuntimeError(f"Luma 2 generation failed: {msg}") + url = final.output[0].url + if not url: + raise RuntimeError("Luma 2 generation completed without an output URL.") + return await download_url_to_image_tensor(url) + + +class LumaImageNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="LumaImageNode2", + display_name="Luma UNI-1 Image", + category="api node/image/Luma", + description="Generate images from text using the Luma UNI-1 model.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text description of the desired image. 1–6000 characters.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "uni-1", + [ + IO.Combo.Input( + "aspect_ratio", + options=[ + "auto", + "3:1", + "2:1", + "16:9", + "3:2", + "1:1", + "2:3", + "9:16", + "1:2", + "1:3", + ], + default="auto", + tooltip="Output image aspect ratio. 'auto' lets " + "the model pick based on the prompt.", + ), + *_luma2_uni1_common_inputs(max_image_refs=9), + ], + ), + IO.DynamicCombo.Option( + "uni-1-max", + [ + IO.Combo.Input( + "aspect_ratio", + options=[ + "auto", + "3:1", + "2:1", + "16:9", + "3:2", + "1:1", + "2:3", + "9:16", + "1:2", + "1:3", + ], + default="auto", + tooltip="Output image aspect ratio. 'auto' lets " + "the model pick based on the prompt.", + ), + *_luma2_uni1_common_inputs(max_image_refs=9), + ], + ), + ], + tooltip="Model to use for generation.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Image.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"], input_groups=["model.image_ref"]), + expr=""" + ( + $m := widgets.model; + $refs := $lookup(inputGroups, "model.image_ref"); + $base := $m = "uni-1-max" ? 0.1 : 0.0404; + {"type":"usd","usd": $round($base + 0.003 * $refs, 4)} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, min_length=1, max_length=6000) + aspect_ratio = model["aspect_ratio"] + style = model["style"] + allowed_manga_ratios = {"2:3", "9:16", "1:2", "1:3"} + if style == "manga" and aspect_ratio != "auto" and aspect_ratio not in allowed_manga_ratios: + raise ValueError( + f"'manga' style requires a portrait aspect ratio " + f"({', '.join(sorted(allowed_manga_ratios))}) or 'auto'; got '{aspect_ratio}'." + ) + request = Luma2GenerationRequest( + prompt=prompt, + model=model["model"], + type="image", + aspect_ratio=aspect_ratio if aspect_ratio != "auto" else None, + style=style if style != "auto" else None, + output_format="png", + web_search=model["web_search"], + image_ref=await _luma2_upload_image_refs(cls, model.get("image_ref"), max_count=9), + ) + return IO.NodeOutput(await _luma2_submit_and_poll(cls, request)) + + +class LumaImageEditNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="LumaImageEditNode2", + display_name="Luma UNI-1 Image Edit", + category="api node/image/Luma", + description="Edit an existing image with a text prompt using the Luma UNI-1 model.", + inputs=[ + IO.Image.Input( + "source", + tooltip="Source image to edit.", + ), + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Description of the desired edit. 1–6000 characters.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "uni-1", + _luma2_uni1_common_inputs(max_image_refs=8), + ), + IO.DynamicCombo.Option( + "uni-1-max", + _luma2_uni1_common_inputs(max_image_refs=8), + ), + ], + tooltip="Model to use for editing.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Image.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"], input_groups=["model.image_ref"]), + expr=""" + ( + $m := widgets.model; + $refs := $lookup(inputGroups, "model.image_ref"); + $base := $m = "uni-1-max" ? 0.103 : 0.0434; + {"type":"usd","usd": $round($base + 0.003 * $refs, 4)} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + source: Input.Image, + prompt: str, + model: dict, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, min_length=1, max_length=6000) + request = Luma2GenerationRequest( + prompt=prompt, + model=model["model"], + type="image_edit", + source=Luma2ImageRef(url=await upload_image_to_comfyapi(cls, source)), + style=model["style"] if model["style"] != "auto" else None, + output_format="png", + web_search=model["web_search"], + image_ref=await _luma2_upload_image_refs(cls, model.get("image_ref"), max_count=8), + ) + return IO.NodeOutput(await _luma2_submit_and_poll(cls, request)) + + class LumaExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -654,6 +942,8 @@ class LumaExtension(ComfyExtension): LumaImageToVideoGenerationNode, LumaReferenceNode, LumaConceptsNode, + LumaImageNode, + LumaImageEditNode, ] diff --git a/comfy_api_nodes/nodes_magnific.py b/comfy_api_nodes/nodes_magnific.py index 0f53208d4..38b881fea 100644 --- a/comfy_api_nodes/nodes_magnific.py +++ b/comfy_api_nodes/nodes_magnific.py @@ -230,7 +230,6 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode): status_extractor=lambda x: x.status, price_extractor=lambda _: price_usd, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0])) @@ -391,7 +390,6 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode): status_extractor=lambda x: x.status, price_extractor=lambda _: price_usd, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0])) @@ -541,7 +539,6 @@ class MagnificImageStyleTransferNode(IO.ComfyNode): response_model=TaskResponse, status_extractor=lambda x: x.status, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0])) @@ -782,7 +779,6 @@ class MagnificImageRelightNode(IO.ComfyNode): response_model=TaskResponse, status_extractor=lambda x: x.status, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0])) @@ -924,7 +920,6 @@ class MagnificImageSkinEnhancerNode(IO.ComfyNode): response_model=TaskResponse, status_extractor=lambda x: x.status, poll_interval=10.0, - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0])) diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py deleted file mode 100644 index 78a230529..000000000 --- a/comfy_api_nodes/nodes_moonvalley.py +++ /dev/null @@ -1,534 +0,0 @@ -import logging - -from typing_extensions import override - -from comfy_api.latest import IO, ComfyExtension, Input -from comfy_api_nodes.apis.moonvalley import ( - MoonvalleyPromptResponse, - MoonvalleyTextToVideoInferenceParams, - MoonvalleyTextToVideoRequest, - MoonvalleyVideoToVideoInferenceParams, - MoonvalleyVideoToVideoRequest, -) -from comfy_api_nodes.util import ( - ApiEndpoint, - download_url_to_video_output, - poll_op, - sync_op, - trim_video, - upload_images_to_comfyapi, - upload_video_to_comfyapi, - validate_container_format_is_mp4, - validate_image_dimensions, - validate_string, -) - -API_UPLOADS_ENDPOINT = "/proxy/moonvalley/uploads" -API_PROMPTS_ENDPOINT = "/proxy/moonvalley/prompts" -API_VIDEO2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/video-to-video" -API_TXT2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/text-to-video" -API_IMG2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/image-to-video" - -MIN_WIDTH = 300 -MIN_HEIGHT = 300 - -MAX_WIDTH = 10000 -MAX_HEIGHT = 10000 - -MIN_VID_WIDTH = 300 -MIN_VID_HEIGHT = 300 - -MAX_VID_WIDTH = 10000 -MAX_VID_HEIGHT = 10000 - -MAX_VIDEO_SIZE = 1024 * 1024 * 1024 # 1 GB max for in-memory video processing - -MOONVALLEY_MAREY_MAX_PROMPT_LENGTH = 5000 - - -def is_valid_task_creation_response(response: MoonvalleyPromptResponse) -> bool: - """Verifies that the initial response contains a task ID.""" - return bool(response.id) - - -def validate_task_creation_response(response) -> None: - if not is_valid_task_creation_response(response): - error_msg = f"Moonvalley Marey API: Initial request failed. Code: {response.code}, Message: {response.message}, Data: {response}" - logging.error(error_msg) - raise RuntimeError(error_msg) - - -def validate_video_to_video_input(video: Input.Video) -> Input.Video: - """ - Validates and processes video input for Moonvalley Video-to-Video generation. - - Args: - video: Input video to validate - - Returns: - Validated and potentially trimmed video - - Raises: - ValueError: If video doesn't meet requirements - MoonvalleyApiError: If video duration is too short - """ - width, height = _get_video_dimensions(video) - _validate_video_dimensions(width, height) - validate_container_format_is_mp4(video) - - return _validate_and_trim_duration(video) - - -def _get_video_dimensions(video: Input.Video) -> tuple[int, int]: - """Extracts video dimensions with error handling.""" - try: - return video.get_dimensions() - except Exception as e: - logging.error("Error getting dimensions of video: %s", e) - raise ValueError(f"Cannot get video dimensions: {e}") from e - - -def _validate_video_dimensions(width: int, height: int) -> None: - """Validates video dimensions meet Moonvalley V2V requirements.""" - supported_resolutions = { - (1920, 1080), - (1080, 1920), - (1152, 1152), - (1536, 1152), - (1152, 1536), - } - - if (width, height) not in supported_resolutions: - supported_list = ", ".join([f"{w}x{h}" for w, h in sorted(supported_resolutions)]) - raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}") - - -def _validate_and_trim_duration(video: Input.Video) -> Input.Video: - """Validates video duration and trims to 5 seconds if needed.""" - duration = video.get_duration() - _validate_minimum_duration(duration) - return _trim_if_too_long(video, duration) - - -def _validate_minimum_duration(duration: float) -> None: - """Ensures video is at least 5 seconds long.""" - if duration < 5: - raise ValueError("Input video must be at least 5 seconds long.") - - -def _trim_if_too_long(video: Input.Video, duration: float) -> Input.Video: - """Trims video to 5 seconds if longer.""" - if duration > 5: - return trim_video(video, 5) - return video - - -def parse_width_height_from_res(resolution: str): - # Accepts a string like "16:9 (1920 x 1080)" and returns width, height as a dict - res_map = { - "16:9 (1920 x 1080)": {"width": 1920, "height": 1080}, - "9:16 (1080 x 1920)": {"width": 1080, "height": 1920}, - "1:1 (1152 x 1152)": {"width": 1152, "height": 1152}, - "4:3 (1536 x 1152)": {"width": 1536, "height": 1152}, - "3:4 (1152 x 1536)": {"width": 1152, "height": 1536}, - # "21:9 (2560 x 1080)": {"width": 2560, "height": 1080}, - } - return res_map.get(resolution, {"width": 1920, "height": 1080}) - - -def parse_control_parameter(value): - control_map = { - "Motion Transfer": "motion_control", - "Canny": "canny_control", - "Pose Transfer": "pose_control", - "Depth": "depth_control", - } - return control_map.get(value, control_map["Motion Transfer"]) - - -async def get_response(cls: type[IO.ComfyNode], task_id: str) -> MoonvalleyPromptResponse: - return await poll_op( - cls, - ApiEndpoint(path=f"{API_PROMPTS_ENDPOINT}/{task_id}"), - response_model=MoonvalleyPromptResponse, - status_extractor=lambda r: (r.status if r and r.status else None), - poll_interval=16.0, - max_poll_attempts=240, - ) - - -class MoonvalleyImg2VideoNode(IO.ComfyNode): - - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="MoonvalleyImg2VideoNode", - display_name="Moonvalley Marey Image to Video", - category="api node/video/Moonvalley Marey", - description="Moonvalley Marey Image to Video Node", - inputs=[ - IO.Image.Input( - "image", - tooltip="The reference image used to generate the video", - ), - IO.String.Input( - "prompt", - multiline=True, - ), - IO.String.Input( - "negative_prompt", - multiline=True, - default=" gopro, bright, contrast, static, overexposed, vignette, " - "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " - "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " - "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " - "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " - "wobbly, weird, low quality, plastic, stock footage, video camera, boring", - tooltip="Negative prompt text", - ), - IO.Combo.Input( - "resolution", - options=[ - "16:9 (1920 x 1080)", - "9:16 (1080 x 1920)", - "1:1 (1152 x 1152)", - "4:3 (1536 x 1152)", - "3:4 (1152 x 1536)", - # "21:9 (2560 x 1080)", - ], - default="16:9 (1920 x 1080)", - tooltip="Resolution of the output video", - ), - IO.Float.Input( - "prompt_adherence", - default=4.5, - min=1.0, - max=20.0, - step=1.0, - tooltip="Guidance scale for generation control", - ), - IO.Int.Input( - "seed", - default=9, - min=0, - max=4294967295, - step=1, - display_mode=IO.NumberDisplay.number, - tooltip="Random seed value", - control_after_generate=True, - ), - IO.Int.Input( - "steps", - default=80, - min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) - max=100, - step=1, - tooltip="Number of denoising steps", - ), - ], - outputs=[IO.Video.Output()], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - is_api_node=True, - price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(), - expr="""{"type":"usd","usd": 1.5}""", - ), - ) - - @classmethod - async def execute( - cls, - image: Input.Image, - prompt: str, - negative_prompt: str, - resolution: str, - prompt_adherence: float, - seed: int, - steps: int, - ) -> IO.NodeOutput: - validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH) - validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - width_height = parse_width_height_from_res(resolution) - - inference_params = MoonvalleyTextToVideoInferenceParams( - negative_prompt=negative_prompt, - steps=steps, - seed=seed, - guidance_scale=prompt_adherence, - width=width_height["width"], - height=width_height["height"], - use_negative_prompts=True, - ) - - # Get MIME type from tensor - assuming PNG format for image tensors - mime_type = "image/png" - image_url = (await upload_images_to_comfyapi(cls, image, max_images=1, mime_type=mime_type))[0] - task_creation_response = await sync_op( - cls, - endpoint=ApiEndpoint(path=API_IMG2VIDEO_ENDPOINT, method="POST"), - response_model=MoonvalleyPromptResponse, - data=MoonvalleyTextToVideoRequest( - image_url=image_url, prompt_text=prompt, inference_params=inference_params - ), - ) - validate_task_creation_response(task_creation_response) - final_response = await get_response(cls, task_creation_response.id) - video = await download_url_to_video_output(final_response.output_url) - return IO.NodeOutput(video) - - -class MoonvalleyVideo2VideoNode(IO.ComfyNode): - - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="MoonvalleyVideo2VideoNode", - display_name="Moonvalley Marey Video to Video", - category="api node/video/Moonvalley Marey", - description="", - inputs=[ - IO.String.Input( - "prompt", - multiline=True, - tooltip="Describes the video to generate", - ), - IO.String.Input( - "negative_prompt", - multiline=True, - default=" gopro, bright, contrast, static, overexposed, vignette, " - "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " - "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " - "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " - "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " - "wobbly, weird, low quality, plastic, stock footage, video camera, boring", - tooltip="Negative prompt text", - ), - IO.Int.Input( - "seed", - default=9, - min=0, - max=4294967295, - step=1, - display_mode=IO.NumberDisplay.number, - tooltip="Random seed value", - control_after_generate=False, - ), - IO.Video.Input( - "video", - tooltip="The reference video used to generate the output video. Must be at least 5 seconds long. " - "Videos longer than 5s will be automatically trimmed. Only MP4 format supported.", - ), - IO.Combo.Input( - "control_type", - options=["Motion Transfer", "Pose Transfer"], - default="Motion Transfer", - optional=True, - ), - IO.Int.Input( - "motion_intensity", - default=100, - min=0, - max=100, - step=1, - tooltip="Only used if control_type is 'Motion Transfer'", - optional=True, - ), - IO.Int.Input( - "steps", - default=60, - min=60, # steps should be greater or equal to cooldown_steps(36) + warmup_steps(24) - max=100, - step=1, - display_mode=IO.NumberDisplay.number, - tooltip="Number of inference steps", - ), - ], - outputs=[IO.Video.Output()], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - is_api_node=True, - price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(), - expr="""{"type":"usd","usd": 2.25}""", - ), - ) - - @classmethod - async def execute( - cls, - prompt: str, - negative_prompt: str, - seed: int, - video: Input.Video | None = None, - control_type: str = "Motion Transfer", - motion_intensity: int | None = 100, - steps=60, - prompt_adherence=4.5, - ) -> IO.NodeOutput: - validated_video = validate_video_to_video_input(video) - video_url = await upload_video_to_comfyapi(cls, validated_video) - validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - - # Only include motion_intensity for Motion Transfer - control_params = {} - if control_type == "Motion Transfer" and motion_intensity is not None: - control_params["motion_intensity"] = motion_intensity - - inference_params = MoonvalleyVideoToVideoInferenceParams( - negative_prompt=negative_prompt, - seed=seed, - control_params=control_params, - steps=steps, - guidance_scale=prompt_adherence, - ) - - task_creation_response = await sync_op( - cls, - endpoint=ApiEndpoint(path=API_VIDEO2VIDEO_ENDPOINT, method="POST"), - response_model=MoonvalleyPromptResponse, - data=MoonvalleyVideoToVideoRequest( - control_type=parse_control_parameter(control_type), - video_url=video_url, - prompt_text=prompt, - inference_params=inference_params, - ), - ) - validate_task_creation_response(task_creation_response) - final_response = await get_response(cls, task_creation_response.id) - return IO.NodeOutput(await download_url_to_video_output(final_response.output_url)) - - -class MoonvalleyTxt2VideoNode(IO.ComfyNode): - - @classmethod - def define_schema(cls) -> IO.Schema: - return IO.Schema( - node_id="MoonvalleyTxt2VideoNode", - display_name="Moonvalley Marey Text to Video", - category="api node/video/Moonvalley Marey", - description="", - inputs=[ - IO.String.Input( - "prompt", - multiline=True, - ), - IO.String.Input( - "negative_prompt", - multiline=True, - default=" gopro, bright, contrast, static, overexposed, vignette, " - "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, " - "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, " - "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, " - "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, " - "wobbly, weird, low quality, plastic, stock footage, video camera, boring", - tooltip="Negative prompt text", - ), - IO.Combo.Input( - "resolution", - options=[ - "16:9 (1920 x 1080)", - "9:16 (1080 x 1920)", - "1:1 (1152 x 1152)", - "4:3 (1536 x 1152)", - "3:4 (1152 x 1536)", - "21:9 (2560 x 1080)", - ], - default="16:9 (1920 x 1080)", - tooltip="Resolution of the output video", - ), - IO.Float.Input( - "prompt_adherence", - default=4.0, - min=1.0, - max=20.0, - step=1.0, - tooltip="Guidance scale for generation control", - ), - IO.Int.Input( - "seed", - default=9, - min=0, - max=4294967295, - step=1, - display_mode=IO.NumberDisplay.number, - control_after_generate=True, - tooltip="Random seed value", - ), - IO.Int.Input( - "steps", - default=80, - min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) - max=100, - step=1, - tooltip="Inference steps", - ), - ], - outputs=[IO.Video.Output()], - hidden=[ - IO.Hidden.auth_token_comfy_org, - IO.Hidden.api_key_comfy_org, - IO.Hidden.unique_id, - ], - is_api_node=True, - price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(), - expr="""{"type":"usd","usd": 1.5}""", - ), - ) - - @classmethod - async def execute( - cls, - prompt: str, - negative_prompt: str, - resolution: str, - prompt_adherence: float, - seed: int, - steps: int, - ) -> IO.NodeOutput: - validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH) - width_height = parse_width_height_from_res(resolution) - - inference_params = MoonvalleyTextToVideoInferenceParams( - negative_prompt=negative_prompt, - steps=steps, - seed=seed, - guidance_scale=prompt_adherence, - num_frames=128, - width=width_height["width"], - height=width_height["height"], - ) - - task_creation_response = await sync_op( - cls, - endpoint=ApiEndpoint(path=API_TXT2VIDEO_ENDPOINT, method="POST"), - response_model=MoonvalleyPromptResponse, - data=MoonvalleyTextToVideoRequest(prompt_text=prompt, inference_params=inference_params), - ) - validate_task_creation_response(task_creation_response) - final_response = await get_response(cls, task_creation_response.id) - return IO.NodeOutput(await download_url_to_video_output(final_response.output_url)) - - -class MoonvalleyExtension(ComfyExtension): - @override - async def get_node_list(self) -> list[type[IO.ComfyNode]]: - return [ - MoonvalleyImg2VideoNode, - MoonvalleyTxt2VideoNode, - MoonvalleyVideo2VideoNode, - ] - - -async def comfy_entrypoint() -> MoonvalleyExtension: - return MoonvalleyExtension() diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index 4ee896fa8..a5a188634 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -27,6 +27,7 @@ from comfy_api_nodes.util import ( ApiEndpoint, download_url_to_bytesio, downscale_image_tensor, + get_number_of_images, poll_op, sync_op, tensor_to_base64_string, @@ -39,16 +40,18 @@ STARTING_POINT_ID_PATTERN = r"" class SupportedOpenAIModel(str, Enum): - o4_mini = "o4-mini" - o1 = "o1" - o3 = "o3" - o1_pro = "o1-pro" - gpt_4_1 = "gpt-4.1" - gpt_4_1_mini = "gpt-4.1-mini" - gpt_4_1_nano = "gpt-4.1-nano" + gpt_5_5_pro = "gpt-5.5-pro" + gpt_5_5 = "gpt-5.5" gpt_5 = "gpt-5" gpt_5_mini = "gpt-5-mini" gpt_5_nano = "gpt-5-nano" + gpt_4_1 = "gpt-4.1" + gpt_4_1_mini = "gpt-4.1-mini" + gpt_4_1_nano = "gpt-4.1-nano" + o4_mini = "o4-mini" + o3 = "o3" + o1_pro = "o1-pro" + o1 = "o1" async def validate_and_cast_response(response, timeout: int = None) -> torch.Tensor: @@ -357,15 +360,20 @@ def calculate_tokens_price_image_1_5(response: OpenAIImageGenerationResponse) -> return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 32.0)) / 1_000_000.0 +def calculate_tokens_price_image_2_0(response: OpenAIImageGenerationResponse) -> float | None: + return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 30.0)) / 1_000_000.0 + + class OpenAIGPTImage1(IO.ComfyNode): @classmethod def define_schema(cls): return IO.Schema( node_id="OpenAIGPTImage1", - display_name="OpenAI GPT Image 1.5", + display_name="OpenAI GPT Image 2", category="api node/image/OpenAI", description="Generates images synchronously via OpenAI's GPT Image endpoint.", + is_deprecated=True, inputs=[ IO.String.Input( "prompt", @@ -401,8 +409,19 @@ class OpenAIGPTImage1(IO.ComfyNode): IO.Combo.Input( "size", default="auto", - options=["auto", "1024x1024", "1024x1536", "1536x1024"], - tooltip="Image size", + options=[ + "auto", + "1024x1024", + "1024x1536", + "1536x1024", + "2048x2048", + "2048x1152", + "1152x2048", + "3840x2160", + "2160x3840", + "Custom", + ], + tooltip="Image size. Select 'Custom' to use the custom width and height (GPT Image 2 only).", optional=True, ), IO.Int.Input( @@ -427,8 +446,26 @@ class OpenAIGPTImage1(IO.ComfyNode): ), IO.Combo.Input( "model", - options=["gpt-image-1", "gpt-image-1.5"], - default="gpt-image-1.5", + options=["gpt-image-1", "gpt-image-1.5", "gpt-image-2"], + default="gpt-image-2", + optional=True, + ), + IO.Int.Input( + "custom_width", + default=1024, + min=1024, + max=3840, + step=16, + tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).", + optional=True, + ), + IO.Int.Input( + "custom_height", + default=1024, + min=1024, + max=3840, + step=16, + tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).", optional=True, ), ], @@ -442,23 +479,36 @@ class OpenAIGPTImage1(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["quality", "n"]), + depends_on=IO.PriceBadgeDepends(widgets=["quality", "n", "model"]), expr=""" ( $ranges := { - "low": [0.011, 0.02], - "medium": [0.046, 0.07], - "high": [0.167, 0.3] + "gpt-image-1": { + "low": [0.011, 0.02], + "medium": [0.042, 0.07], + "high": [0.167, 0.25] + }, + "gpt-image-1.5": { + "low": [0.009, 0.02], + "medium": [0.034, 0.062], + "high": [0.133, 0.22] + }, + "gpt-image-2": { + "low": [0.0048, 0.019], + "medium": [0.041, 0.168], + "high": [0.165, 0.67] + } }; - $range := $lookup($ranges, widgets.quality); - $n := widgets.n; + $range := $lookup($lookup($ranges, widgets.model), widgets.quality); + $nRaw := widgets.n; + $n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1; ($n = 1) - ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1]} + ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1], "format": {"approximate": true}} : { "type":"range_usd", - "min_usd": $range[0], - "max_usd": $range[1], - "format": { "suffix": " x " & $string($n) & "/Run" } + "min_usd": $range[0] * $n, + "max_usd": $range[1] * $n, + "format": { "suffix": "/Run", "approximate": true } } ) """, @@ -476,6 +526,8 @@ class OpenAIGPTImage1(IO.ComfyNode): mask: Input.Image | None = None, n: int = 1, size: str = "1024x1024", + custom_width: int = 1024, + custom_height: int = 1024, model: str = "gpt-image-1", ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=False) @@ -483,10 +535,36 @@ class OpenAIGPTImage1(IO.ComfyNode): if mask is not None and image is None: raise ValueError("Cannot use a mask without an input image") + if size == "Custom": + if model != "gpt-image-2": + raise ValueError("Custom resolution is only supported by GPT Image 2 model") + if custom_width % 16 != 0 or custom_height % 16 != 0: + raise ValueError(f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}") + if max(custom_width, custom_height) > 3840: + raise ValueError(f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}") + ratio = max(custom_width, custom_height) / min(custom_width, custom_height) + if ratio > 3: + raise ValueError( + f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}" + ) + total_pixels = custom_width * custom_height + if not 655_360 <= total_pixels <= 8_294_400: + raise ValueError( + f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}" + ) + size = f"{custom_width}x{custom_height}" + elif model in ("gpt-image-1", "gpt-image-1.5"): + if size not in ("auto", "1024x1024", "1024x1536", "1536x1024"): + raise ValueError(f"Resolution {size} is only supported by GPT Image 2 model") + if model == "gpt-image-1": price_extractor = calculate_tokens_price_image_1 elif model == "gpt-image-1.5": price_extractor = calculate_tokens_price_image_1_5 + elif model == "gpt-image-2": + price_extractor = calculate_tokens_price_image_2_0 + if background == "transparent": + raise ValueError("Transparent background is not supported for GPT Image 2 model") else: raise ValueError(f"Unknown model: {model}") @@ -564,6 +642,316 @@ class OpenAIGPTImage1(IO.ComfyNode): return IO.NodeOutput(await validate_and_cast_response(response)) +def _gpt_image_shared_inputs(): + """Inputs shared by all GPT Image models (quality + reference images + mask).""" + return [ + IO.Combo.Input( + "quality", + default="low", + options=["low", "medium", "high"], + tooltip="Image quality, affects cost and generation time.", + ), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 17)], + min=0, + ), + tooltip="Optional reference image(s) for image editing. Up to 16 images.", + ), + IO.Mask.Input( + "mask", + optional=True, + tooltip="Optional mask for inpainting (white areas will be replaced). " + "Requires exactly one reference image.", + ), + ] + + +def _gpt_image_legacy_model_inputs(): + """Per-model widget set for legacy gpt-image-1 / gpt-image-1.5 (4 base sizes, transparent bg allowed).""" + return [ + IO.Combo.Input( + "size", + default="auto", + options=["auto", "1024x1024", "1024x1536", "1536x1024"], + tooltip="Image size.", + ), + IO.Combo.Input( + "background", + default="auto", + options=["auto", "opaque", "transparent"], + tooltip="Return image with or without background.", + ), + *_gpt_image_shared_inputs(), + ] + + +class OpenAIGPTImageNodeV2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="OpenAIGPTImageNodeV2", + display_name="OpenAI GPT Image 2", + category="api node/image/OpenAI", + description="Generates images via OpenAI's GPT Image endpoint.", + inputs=[ + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Text prompt for GPT Image", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "gpt-image-2", + [ + IO.Combo.Input( + "size", + default="auto", + options=[ + "auto", + "1024x1024", + "1024x1536", + "1536x1024", + "2048x2048", + "2048x1152", + "1152x2048", + "3840x2160", + "2160x3840", + "Custom", + ], + tooltip="Image size. Select 'Custom' to use the custom width and height.", + ), + IO.Int.Input( + "custom_width", + default=1024, + min=1024, + max=3840, + step=16, + tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.", + ), + IO.Int.Input( + "custom_height", + default=1024, + min=1024, + max=3840, + step=16, + tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.", + ), + IO.Combo.Input( + "background", + default="auto", + options=["auto", "opaque"], + tooltip="Return image with or without background.", + ), + *_gpt_image_shared_inputs(), + ], + ), + IO.DynamicCombo.Option("gpt-image-1.5", _gpt_image_legacy_model_inputs()), + IO.DynamicCombo.Option("gpt-image-1", _gpt_image_legacy_model_inputs()), + ], + ), + IO.Int.Input( + "n", + default=1, + min=1, + max=8, + step=1, + tooltip="How many images to generate", + display_mode=IO.NumberDisplay.number, + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="not implemented yet in backend", + ), + ], + outputs=[IO.Image.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.quality", "n"]), + expr=""" + ( + $ranges := { + "gpt-image-1": { + "low": [0.011, 0.02], + "medium": [0.042, 0.07], + "high": [0.167, 0.25] + }, + "gpt-image-1.5": { + "low": [0.009, 0.02], + "medium": [0.034, 0.062], + "high": [0.133, 0.22] + }, + "gpt-image-2": { + "low": [0.0048, 0.019], + "medium": [0.041, 0.168], + "high": [0.165, 0.67] + } + }; + $range := $lookup($lookup($ranges, widgets.model), $lookup(widgets, "model.quality")); + $nRaw := widgets.n; + $n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1; + ($n = 1) + ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1], "format": {"approximate": true}} + : { + "type":"range_usd", + "min_usd": $range[0] * $n, + "max_usd": $range[1] * $n, + "format": { "suffix": "/Run", "approximate": true } + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + n: int, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False) + + model_id = model["model"] + size = model["size"] + background = model["background"] + quality = model["quality"] + custom_width = model.get("custom_width", 1024) + custom_height = model.get("custom_height", 1024) + + images_dict = model.get("images") or {} + image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None] + n_images = sum(get_number_of_images(t) for t in image_tensors) + mask = model.get("mask") + + if mask is not None and n_images == 0: + raise ValueError("Cannot use a mask without an input image") + + if size == "Custom": + if custom_width % 16 != 0 or custom_height % 16 != 0: + raise ValueError( + f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}" + ) + if max(custom_width, custom_height) > 3840: + raise ValueError( + f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}" + ) + ratio = max(custom_width, custom_height) / min(custom_width, custom_height) + if ratio > 3: + raise ValueError( + f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}" + ) + total_pixels = custom_width * custom_height + if not 655_360 <= total_pixels <= 8_294_400: + raise ValueError( + f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}" + ) + size = f"{custom_width}x{custom_height}" + + if model_id == "gpt-image-1": + price_extractor = calculate_tokens_price_image_1 + elif model_id == "gpt-image-1.5": + price_extractor = calculate_tokens_price_image_1_5 + elif model_id == "gpt-image-2": + price_extractor = calculate_tokens_price_image_2_0 + else: + raise ValueError(f"Unknown model: {model_id}") + + if image_tensors: + flat: list[torch.Tensor] = [] + for tensor in image_tensors: + if len(tensor.shape) == 4: + flat.extend(tensor[i : i + 1] for i in range(tensor.shape[0])) + else: + flat.append(tensor.unsqueeze(0)) + + files = [] + for i, single_image in enumerate(flat): + scaled_image = downscale_image_tensor(single_image, total_pixels=2048 * 2048).squeeze() + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = BytesIO() + img.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + + if len(flat) == 1: + files.append(("image", (f"image_{i}.png", img_byte_arr, "image/png"))) + else: + files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png"))) + + if mask is not None: + if len(flat) != 1: + raise Exception("Cannot use a mask with multiple image") + ref_image = flat[0] + if mask.shape[1:] != ref_image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + _, height, width = mask.shape + rgba_mask = torch.zeros(height, width, 4, device="cpu") + rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu() + scaled_mask = downscale_image_tensor( + rgba_mask.unsqueeze(0), total_pixels=2048 * 2048 + ).squeeze() + mask_np = (scaled_mask.numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_img_byte_arr = BytesIO() + mask_img.save(mask_img_byte_arr, format="PNG") + mask_img_byte_arr.seek(0) + files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png"))) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/openai/images/edits", method="POST"), + response_model=OpenAIImageGenerationResponse, + data=OpenAIImageEditRequest( + model=model_id, + prompt=prompt, + quality=quality, + background=background, + n=n, + size=size, + moderation="low", + ), + content_type="multipart/form-data", + files=files, + price_extractor=price_extractor, + ) + else: + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/openai/images/generations", method="POST"), + response_model=OpenAIImageGenerationResponse, + data=OpenAIImageGenerationRequest( + model=model_id, + prompt=prompt, + quality=quality, + background=background, + n=n, + size=size, + moderation="low", + ), + price_extractor=price_extractor, + ) + return IO.NodeOutput(await validate_and_cast_response(response)) + + class OpenAIChatNode(IO.ComfyNode): """ Node to generate text responses from an OpenAI model. @@ -665,6 +1053,16 @@ class OpenAIChatNode(IO.ComfyNode): "usd": [0.002, 0.008], "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } } + : $contains($m, "gpt-5.5-pro") ? { + "type": "list_usd", + "usd": [0.03, 0.18], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-5.5") ? { + "type": "list_usd", + "usd": [0.005, 0.03], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } : $contains($m, "gpt-5-nano") ? { "type": "list_usd", "usd": [0.00005, 0.0004], @@ -913,6 +1311,7 @@ class OpenAIExtension(ComfyExtension): OpenAIDalle2, OpenAIDalle3, OpenAIGPTImage1, + OpenAIGPTImageNodeV2, OpenAIChatNode, OpenAIInputFiles, OpenAIChatConfig, diff --git a/comfy_api_nodes/nodes_quiver.py b/comfy_api_nodes/nodes_quiver.py index 61533263f..3269c0afe 100644 --- a/comfy_api_nodes/nodes_quiver.py +++ b/comfy_api_nodes/nodes_quiver.py @@ -17,6 +17,44 @@ from comfy_api_nodes.util import ( ) from comfy_extras.nodes_images import SVG +_ARROW_MODELS = ["arrow-1.1", "arrow-1.1-max", "arrow-preview"] + + +def _arrow_sampling_inputs(): + """Shared sampling inputs for all Arrow model variants.""" + return [ + IO.Float.Input( + "temperature", + default=1.0, + min=0.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Randomness control. Higher values increase randomness.", + advanced=True, + ), + IO.Float.Input( + "top_p", + default=1.0, + min=0.05, + max=1.0, + step=0.05, + display_mode=IO.NumberDisplay.slider, + tooltip="Nucleus sampling parameter.", + advanced=True, + ), + IO.Float.Input( + "presence_penalty", + default=0.0, + min=-2.0, + max=2.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Token presence penalty.", + advanced=True, + ), + ] + class QuiverTextToSVGNode(IO.ComfyNode): @classmethod @@ -39,6 +77,7 @@ class QuiverTextToSVGNode(IO.ComfyNode): default="", tooltip="Additional style or formatting guidance.", optional=True, + advanced=True, ), IO.Autogrow.Input( "reference_images", @@ -53,43 +92,7 @@ class QuiverTextToSVGNode(IO.ComfyNode): ), IO.DynamicCombo.Input( "model", - options=[ - IO.DynamicCombo.Option( - "arrow-preview", - [ - IO.Float.Input( - "temperature", - default=1.0, - min=0.0, - max=2.0, - step=0.1, - display_mode=IO.NumberDisplay.slider, - tooltip="Randomness control. Higher values increase randomness.", - advanced=True, - ), - IO.Float.Input( - "top_p", - default=1.0, - min=0.05, - max=1.0, - step=0.05, - display_mode=IO.NumberDisplay.slider, - tooltip="Nucleus sampling parameter.", - advanced=True, - ), - IO.Float.Input( - "presence_penalty", - default=0.0, - min=-2.0, - max=2.0, - step=0.1, - display_mode=IO.NumberDisplay.slider, - tooltip="Token presence penalty.", - advanced=True, - ), - ], - ), - ], + options=[IO.DynamicCombo.Option(m, _arrow_sampling_inputs()) for m in _ARROW_MODELS], tooltip="Model to use for SVG generation.", ), IO.Int.Input( @@ -112,7 +115,16 @@ class QuiverTextToSVGNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.429}""", + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $contains(widgets.model, "max") + ? {"type":"usd","usd":0.3575} + : $contains(widgets.model, "preview") + ? {"type":"usd","usd":0.429} + : {"type":"usd","usd":0.286} + ) + """, ), ) @@ -131,7 +143,7 @@ class QuiverTextToSVGNode(IO.ComfyNode): if reference_images: references = [] for key in reference_images: - url = await upload_image_to_comfyapi(cls, reference_images[key]) + url = await upload_image_to_comfyapi(cls, reference_images[key], mime_type="image/png") references.append(QuiverImageObject(url=url)) if len(references) > 4: raise ValueError("Maximum 4 reference images are allowed.") @@ -176,12 +188,13 @@ class QuiverImageToSVGNode(IO.ComfyNode): "auto_crop", default=False, tooltip="Automatically crop to the dominant subject.", + advanced=True, ), IO.DynamicCombo.Input( "model", options=[ IO.DynamicCombo.Option( - "arrow-preview", + m, [ IO.Int.Input( "target_size", @@ -189,39 +202,12 @@ class QuiverImageToSVGNode(IO.ComfyNode): min=128, max=4096, tooltip="Square resize target in pixels.", - ), - IO.Float.Input( - "temperature", - default=1.0, - min=0.0, - max=2.0, - step=0.1, - display_mode=IO.NumberDisplay.slider, - tooltip="Randomness control. Higher values increase randomness.", - advanced=True, - ), - IO.Float.Input( - "top_p", - default=1.0, - min=0.05, - max=1.0, - step=0.05, - display_mode=IO.NumberDisplay.slider, - tooltip="Nucleus sampling parameter.", - advanced=True, - ), - IO.Float.Input( - "presence_penalty", - default=0.0, - min=-2.0, - max=2.0, - step=0.1, - display_mode=IO.NumberDisplay.slider, - tooltip="Token presence penalty.", advanced=True, ), + *_arrow_sampling_inputs(), ], - ), + ) + for m in _ARROW_MODELS ], tooltip="Model to use for SVG vectorization.", ), @@ -245,7 +231,16 @@ class QuiverImageToSVGNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.429}""", + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $contains(widgets.model, "max") + ? {"type":"usd","usd":0.3575} + : $contains(widgets.model, "preview") + ? {"type":"usd","usd":0.429} + : {"type":"usd","usd":0.286} + ) + """, ), ) @@ -257,7 +252,7 @@ class QuiverImageToSVGNode(IO.ComfyNode): model: dict, seed: int, ) -> IO.NodeOutput: - image_url = await upload_image_to_comfyapi(cls, image) + image_url = await upload_image_to_comfyapi(cls, image, mime_type="image/png") response = await sync_op( cls, diff --git a/comfy_api_nodes/nodes_sonilo.py b/comfy_api_nodes/nodes_sonilo.py new file mode 100644 index 000000000..5518f5902 --- /dev/null +++ b/comfy_api_nodes/nodes_sonilo.py @@ -0,0 +1,287 @@ +import base64 +import json +import logging +import time +from urllib.parse import urljoin + +import aiohttp +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.util import ( + ApiEndpoint, + audio_bytes_to_audio_input, + upload_video_to_comfyapi, + validate_string, +) +from comfy_api_nodes.util._helpers import ( + default_base_url, + get_auth_header, + get_node_id, + is_processing_interrupted, +) +from comfy_api_nodes.util.common_exceptions import ProcessingInterrupted +from server import PromptServer + +logger = logging.getLogger(__name__) + + +class SoniloVideoToMusic(IO.ComfyNode): + """Generate music from video using Sonilo's AI model.""" + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="SoniloVideoToMusic", + display_name="Sonilo Video to Music", + category="api node/audio/Sonilo", + description="Generate music from video content using Sonilo's AI model. " + "Analyzes the video and creates matching music.", + inputs=[ + IO.Video.Input( + "video", + tooltip="Input video to generate music from. Maximum duration: 6 minutes.", + ), + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Optional text prompt to guide music generation. " + "Leave empty for best quality - the model will fully analyze the video content.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed for reproducibility. Currently ignored by the Sonilo " + "service but kept for graph consistency.", + ), + ], + outputs=[IO.Audio.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr='{"type":"usd","usd":0.009,"format":{"suffix":"/second"}}', + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + prompt: str = "", + seed: int = 0, + ) -> IO.NodeOutput: + video_url = await upload_video_to_comfyapi(cls, video, max_duration=360) + form = aiohttp.FormData() + form.add_field("video_url", video_url) + if prompt.strip(): + form.add_field("prompt", prompt.strip()) + audio_bytes = await _stream_sonilo_music( + cls, + ApiEndpoint(path="/proxy/sonilo/v2m/generate", method="POST"), + form, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(audio_bytes)) + + +class SoniloTextToMusic(IO.ComfyNode): + """Generate music from a text prompt using Sonilo's AI model.""" + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="SoniloTextToMusic", + display_name="Sonilo Text to Music", + category="api node/audio/Sonilo", + description="Generate music from a text prompt using Sonilo's AI model. " + "Leave duration at 0 to let the model infer it from the prompt.", + inputs=[ + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Text prompt describing the music to generate.", + ), + IO.Int.Input( + "duration", + default=0, + min=0, + max=360, + tooltip="Target duration in seconds. Set to 0 to let the model " + "infer the duration from the prompt. Maximum: 6 minutes.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed for reproducibility. Currently ignored by the Sonilo " + "service but kept for graph consistency.", + ), + ], + outputs=[IO.Audio.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration"]), + expr=""" + ( + widgets.duration > 0 + ? {"type":"usd","usd": 0.005 * widgets.duration} + : {"type":"usd","usd": 0.005, "format":{"suffix":"/second"}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + duration: int = 0, + seed: int = 0, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + form = aiohttp.FormData() + form.add_field("prompt", prompt) + if duration > 0: + form.add_field("duration", str(duration)) + audio_bytes = await _stream_sonilo_music( + cls, + ApiEndpoint(path="/proxy/sonilo/t2m/generate", method="POST"), + form, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(audio_bytes)) + + +async def _stream_sonilo_music( + cls: type[IO.ComfyNode], + endpoint: ApiEndpoint, + form: aiohttp.FormData, +) -> bytes: + """POST ``form`` to Sonilo, read the NDJSON stream, and return the first stream's audio bytes.""" + url = urljoin(default_base_url().rstrip("/") + "/", endpoint.path.lstrip("/")) + + headers: dict[str, str] = {} + headers.update(get_auth_header(cls)) + headers.update(endpoint.headers) + + node_id = get_node_id(cls) + start_ts = time.monotonic() + last_chunk_status_ts = 0.0 + audio_streams: dict[int, list[bytes]] = {} + title: str | None = None + + timeout = aiohttp.ClientTimeout(total=1200.0, sock_read=300.0) + async with aiohttp.ClientSession(timeout=timeout) as session: + PromptServer.instance.send_progress_text("Status: Queued", node_id) + async with session.post(url, data=form, headers=headers) as resp: + if resp.status >= 400: + msg = await _extract_error_message(resp) + raise Exception(f"Sonilo API error ({resp.status}): {msg}") + + while True: + if is_processing_interrupted(): + raise ProcessingInterrupted("Task cancelled") + + raw_line = await resp.content.readline() + if not raw_line: + break + + line = raw_line.decode("utf-8").strip() + if not line: + continue + + try: + evt = json.loads(line) + except json.JSONDecodeError: + logger.warning("Sonilo: skipping malformed NDJSON line") + continue + + evt_type = evt.get("type") + if evt_type == "error": + code = evt.get("code", "UNKNOWN") + message = evt.get("message", "Unknown error") + raise Exception(f"Sonilo generation error ({code}): {message}") + if evt_type == "duration": + duration_sec = evt.get("duration_sec") + if duration_sec is not None: + PromptServer.instance.send_progress_text( + f"Status: Generating\nVideo duration: {duration_sec:.1f}s", + node_id, + ) + elif evt_type in ("titles", "title"): + # v2m sends a "titles" list, t2m sends a scalar "title" + if evt_type == "titles": + titles = evt.get("titles", []) + if titles: + title = titles[0] + else: + title = evt.get("title") or title + if title: + PromptServer.instance.send_progress_text( + f"Status: Generating\nTitle: {title}", + node_id, + ) + elif evt_type == "audio_chunk": + stream_idx = evt.get("stream_index", 0) + chunk_data = base64.b64decode(evt["data"]) + + if stream_idx not in audio_streams: + audio_streams[stream_idx] = [] + audio_streams[stream_idx].append(chunk_data) + + now = time.monotonic() + if now - last_chunk_status_ts >= 1.0: + total_chunks = sum(len(chunks) for chunks in audio_streams.values()) + elapsed = int(now - start_ts) + status_lines = ["Status: Receiving audio"] + if title: + status_lines.append(f"Title: {title}") + status_lines.append(f"Chunks received: {total_chunks}") + status_lines.append(f"Time elapsed: {elapsed}s") + PromptServer.instance.send_progress_text("\n".join(status_lines), node_id) + last_chunk_status_ts = now + elif evt_type == "complete": + break + + if not audio_streams: + raise Exception("Sonilo API returned no audio data.") + + PromptServer.instance.send_progress_text("Status: Completed", node_id) + selected_stream = 0 if 0 in audio_streams else min(audio_streams) + return b"".join(audio_streams[selected_stream]) + + +async def _extract_error_message(resp: aiohttp.ClientResponse) -> str: + """Extract a human-readable error message from an HTTP error response.""" + try: + error_body = await resp.json() + detail = error_body.get("detail", {}) + if isinstance(detail, dict): + return detail.get("message", str(detail)) + return str(detail) + except Exception: + return await resp.text() + + +class SoniloExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [SoniloVideoToMusic, SoniloTextToMusic] + + +async def comfy_entrypoint() -> SoniloExtension: + return SoniloExtension() diff --git a/comfy_api_nodes/nodes_sora.py b/comfy_api_nodes/nodes_sora.py index afc18bb25..c1d485188 100644 --- a/comfy_api_nodes/nodes_sora.py +++ b/comfy_api_nodes/nodes_sora.py @@ -33,9 +33,13 @@ class OpenAIVideoSora2(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="OpenAIVideoSora2", - display_name="OpenAI Sora - Video", + display_name="OpenAI Sora - Video (DEPRECATED)", category="api node/video/Sora", - description="OpenAI video and audio generation.", + description=( + "OpenAI video and audio generation.\n\n" + "DEPRECATION NOTICE: OpenAI will stop serving the Sora v2 API in September 2026. " + "This node will be removed from ComfyUI at that time." + ), inputs=[ IO.Combo.Input( "model", diff --git a/comfy_api_nodes/nodes_stability.py b/comfy_api_nodes/nodes_stability.py index 9ef13c83b..906d8ff35 100644 --- a/comfy_api_nodes/nodes_stability.py +++ b/comfy_api_nodes/nodes_stability.py @@ -401,7 +401,7 @@ class StabilityUpscaleConservativeNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.25}""", + expr="""{"type":"usd","usd":0.4}""", ), ) @@ -510,7 +510,7 @@ class StabilityUpscaleCreativeNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.25}""", + expr="""{"type":"usd","usd":0.6}""", ), ) @@ -593,7 +593,7 @@ class StabilityUpscaleFastNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.01}""", + expr="""{"type":"usd","usd":0.02}""", ), ) diff --git a/comfy_api_nodes/nodes_topaz.py b/comfy_api_nodes/nodes_topaz.py index b18b31af1..e79c16d3c 100644 --- a/comfy_api_nodes/nodes_topaz.py +++ b/comfy_api_nodes/nodes_topaz.py @@ -36,11 +36,15 @@ from comfy_api_nodes.util import ( ) UPSCALER_MODELS_MAP = { + "Astra 2": "ast-2", "Starlight (Astra) Fast": "slf-1", "Starlight (Astra) Creative": "slc-1", "Starlight Precise 2.5": "slp-2.5", } +AST2_MAX_FRAMES = 9000 +AST2_MAX_FRAMES_WITH_PROMPT = 450 + class TopazImageEnhance(IO.ComfyNode): @classmethod @@ -230,13 +234,20 @@ class TopazVideoEnhance(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="TopazVideoEnhance", - display_name="Topaz Video Enhance", + display_name="Topaz Video Enhance (Legacy)", category="api node/video/Topaz", description="Breathe new life into video with powerful upscaling and recovery technology.", inputs=[ IO.Video.Input("video"), IO.Boolean.Input("upscaler_enabled", default=True), - IO.Combo.Input("upscaler_model", options=list(UPSCALER_MODELS_MAP.keys())), + IO.Combo.Input( + "upscaler_model", + options=[ + "Starlight (Astra) Fast", + "Starlight (Astra) Creative", + "Starlight Precise 2.5", + ], + ), IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]), IO.Combo.Input( "upscaler_creativity", @@ -304,6 +315,7 @@ class TopazVideoEnhance(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + is_deprecated=True, ) @classmethod @@ -453,7 +465,350 @@ class TopazVideoEnhance(IO.ComfyNode): progress_extractor=lambda x: getattr(x, "progress", 0), price_extractor=lambda x: (x.estimates.cost[0] * 0.08 if x.estimates and x.estimates.cost[0] else None), poll_interval=10.0, - max_poll_attempts=320, + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.download.url)) + + +class TopazVideoEnhanceV2(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TopazVideoEnhanceV2", + display_name="Topaz Video Enhance", + category="api node/video/Topaz", + description="Breathe new life into video with powerful upscaling and recovery technology.", + inputs=[ + IO.Video.Input("video"), + IO.DynamicCombo.Input( + "upscaler_model", + options=[ + IO.DynamicCombo.Option( + "Astra 2", + [ + IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]), + IO.Float.Input( + "creativity", + default=0.5, + min=0.0, + max=1.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Creative strength of the upscale.", + ), + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Optional descriptive (not instructive) scene prompt." + f"Capping input at {AST2_MAX_FRAMES_WITH_PROMPT} frames (~15s @ 30fps) when set.", + ), + IO.Float.Input( + "sharp", + default=0.5, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Pre-enhance sharpness: " + "0.0=Gaussian blur, 0.5=passthrough (default), 1.0=USM sharpening.", + advanced=True, + ), + IO.Float.Input( + "realism", + default=0.0, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Pulls output toward photographic realism." + "Leave at 0 for the model default.", + advanced=True, + ), + ], + ), + IO.DynamicCombo.Option( + "Starlight (Astra) Fast", + [IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]),], + ), + IO.DynamicCombo.Option( + "Starlight (Astra) Creative", + [ + IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]), + IO.Combo.Input( + "creativity", + options=["low", "middle", "high"], + default="low", + tooltip="Creative strength of the upscale.", + ), + ], + ), + IO.DynamicCombo.Option( + "Starlight Precise 2.5", + [IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"])], + ), + IO.DynamicCombo.Option("Disabled", []), + ], + ), + IO.DynamicCombo.Input( + "interpolation_model", + options=[ + IO.DynamicCombo.Option("Disabled", []), + IO.DynamicCombo.Option( + "apo-8", + [ + IO.Int.Input( + "interpolation_frame_rate", + default=60, + min=15, + max=240, + display_mode=IO.NumberDisplay.number, + tooltip="Output frame rate.", + ), + IO.Int.Input( + "interpolation_slowmo", + default=1, + min=1, + max=16, + display_mode=IO.NumberDisplay.number, + tooltip="Slow-motion factor applied to the input video. " + "For example, 2 makes the output twice as slow and doubles the duration.", + advanced=True, + ), + IO.Boolean.Input( + "interpolation_duplicate", + default=False, + tooltip="Analyze the input for duplicate frames and remove them.", + advanced=True, + ), + IO.Float.Input( + "interpolation_duplicate_threshold", + default=0.01, + min=0.001, + max=0.1, + step=0.001, + display_mode=IO.NumberDisplay.number, + tooltip="Detection sensitivity for duplicate frames.", + advanced=True, + ), + ], + ), + ], + ), + IO.Combo.Input( + "dynamic_compression_level", + options=["Low", "Mid", "High"], + default="Low", + tooltip="CQP level.", + optional=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=[ + "upscaler_model", + "upscaler_model.upscaler_resolution", + "interpolation_model", + ]), + expr=""" + ( + $model := $lookup(widgets, "upscaler_model"); + $res := $lookup(widgets, "upscaler_model.upscaler_resolution"); + $interp := $lookup(widgets, "interpolation_model"); + $is4k := $contains($res, "4k"); + $hasInterp := $interp != "disabled"; + $rates := { + "starlight (astra) fast": {"hd": 0.43, "uhd": 0.85}, + "starlight precise 2.5": {"hd": 0.70, "uhd": 1.54}, + "astra 2": {"hd": 1.72, "uhd": 2.85}, + "starlight (astra) creative": {"hd": 2.25, "uhd": 3.99} + }; + $surcharge := $is4k ? 0.28 : 0.14; + $entry := $lookup($rates, $model); + $base := $is4k ? $entry.uhd : $entry.hd; + $hi := $base + ($hasInterp ? $surcharge : 0); + $model = "disabled" + ? {"type":"text","text":"Interpolation only"} + : ($hasInterp + ? {"type":"text","text":"~" & $string($base) & "–" & $string($hi) & " credits/src frame"} + : {"type":"text","text":"~" & $string($base) & " credits/src frame"}) + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + upscaler_model: dict, + interpolation_model: dict, + dynamic_compression_level: str = "Low", + ) -> IO.NodeOutput: + upscaler_choice = upscaler_model["upscaler_model"] + interpolation_choice = interpolation_model["interpolation_model"] + if upscaler_choice == "Disabled" and interpolation_choice == "Disabled": + raise ValueError("There is nothing to do: both upscaling and interpolation are disabled.") + validate_container_format_is_mp4(video) + src_width, src_height = video.get_dimensions() + src_frame_rate = int(video.get_frame_rate()) + duration_sec = video.get_duration() + src_video_stream = video.get_stream_source() + target_width = src_width + target_height = src_height + target_frame_rate = src_frame_rate + filters = [] + if upscaler_choice != "Disabled": + if "1080p" in upscaler_model["upscaler_resolution"]: + target_pixel_p = 1080 + max_long_side = 1920 + else: + target_pixel_p = 2160 + max_long_side = 3840 + ar = src_width / src_height + if src_width >= src_height: + # Landscape or Square; Attempt to set height to target (e.g., 2160), calculate width + target_height = target_pixel_p + target_width = int(target_height * ar) + # Check if width exceeds standard bounds (for ultra-wide e.g., 21:9 ARs) + if target_width > max_long_side: + target_width = max_long_side + target_height = int(target_width / ar) + else: + # Portrait; Attempt to set width to target (e.g., 2160), calculate height + target_width = target_pixel_p + target_height = int(target_width / ar) + # Check if height exceeds standard bounds + if target_height > max_long_side: + target_height = max_long_side + target_width = int(target_height * ar) + if target_width % 2 != 0: + target_width += 1 + if target_height % 2 != 0: + target_height += 1 + model_id = UPSCALER_MODELS_MAP[upscaler_choice] + if model_id == "slc-1": + filters.append( + VideoEnhancementFilter( + model=model_id, + creativity=upscaler_model["creativity"], + isOptimizedMode=True, + ) + ) + elif model_id == "ast-2": + n_frames = video.get_frame_count() + ast2_prompt = (upscaler_model["prompt"] or "").strip() + if ast2_prompt and n_frames > AST2_MAX_FRAMES_WITH_PROMPT: + raise ValueError( + f"Astra 2 with a prompt is limited to {AST2_MAX_FRAMES_WITH_PROMPT} input frames " + f"(~15s @ 30fps); video has {n_frames}. Clear the prompt or shorten the clip." + ) + if n_frames > AST2_MAX_FRAMES: + raise ValueError(f"Astra 2 is limited to {AST2_MAX_FRAMES} input frames; video has {n_frames}.") + realism = upscaler_model["realism"] + filters.append( + VideoEnhancementFilter( + model=model_id, + creativity=upscaler_model["creativity"], + prompt=(ast2_prompt or None), + sharp=upscaler_model["sharp"], + realism=(realism if realism > 0 else None), + ) + ) + else: + filters.append(VideoEnhancementFilter(model=model_id)) + if interpolation_choice != "Disabled": + target_frame_rate = interpolation_model["interpolation_frame_rate"] + filters.append( + VideoFrameInterpolationFilter( + model=interpolation_choice, + slowmo=interpolation_model["interpolation_slowmo"], + fps=interpolation_model["interpolation_frame_rate"], + duplicate=interpolation_model["interpolation_duplicate"], + duplicate_threshold=interpolation_model["interpolation_duplicate_threshold"], + ), + ) + initial_res = await sync_op( + cls, + ApiEndpoint(path="/proxy/topaz/video/", method="POST"), + response_model=CreateVideoResponse, + data=CreateVideoRequest( + source=CreateVideoRequestSource( + container="mp4", + size=get_fs_object_size(src_video_stream), + duration=int(duration_sec), + frameCount=video.get_frame_count(), + frameRate=src_frame_rate, + resolution=Resolution(width=src_width, height=src_height), + ), + filters=filters, + output=OutputInformationVideo( + resolution=Resolution(width=target_width, height=target_height), + frameRate=target_frame_rate, + audioCodec="AAC", + audioTransfer="Copy", + dynamicCompressionLevel=dynamic_compression_level, + ), + ), + wait_label="Creating task", + final_label_on_success="Task created", + ) + upload_res = await sync_op( + cls, + ApiEndpoint( + path=f"/proxy/topaz/video/{initial_res.requestId}/accept", + method="PATCH", + ), + response_model=VideoAcceptResponse, + wait_label="Preparing upload", + final_label_on_success="Upload started", + ) + if len(upload_res.urls) > 1: + raise NotImplementedError( + "Large files are not currently supported. Please open an issue in the ComfyUI repository." + ) + async with aiohttp.ClientSession(headers={"Content-Type": "video/mp4"}) as session: + if isinstance(src_video_stream, BytesIO): + src_video_stream.seek(0) + async with session.put(upload_res.urls[0], data=src_video_stream, raise_for_status=True) as res: + upload_etag = res.headers["Etag"] + else: + with builtins.open(src_video_stream, "rb") as video_file: + async with session.put(upload_res.urls[0], data=video_file, raise_for_status=True) as res: + upload_etag = res.headers["Etag"] + await sync_op( + cls, + ApiEndpoint( + path=f"/proxy/topaz/video/{initial_res.requestId}/complete-upload", + method="PATCH", + ), + response_model=VideoCompleteUploadResponse, + data=VideoCompleteUploadRequest( + uploadResults=[ + VideoCompleteUploadRequestPart( + partNum=1, + eTag=upload_etag, + ), + ], + ), + wait_label="Finalizing upload", + final_label_on_success="Upload completed", + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/topaz/video/{initial_res.requestId}/status"), + response_model=VideoStatusResponse, + status_extractor=lambda x: x.status, + progress_extractor=lambda x: getattr(x, "progress", 0), + price_extractor=lambda x: (x.estimates.cost[0] * 0.08 if x.estimates and x.estimates.cost[0] else None), + poll_interval=10.0, ) return IO.NodeOutput(await download_url_to_video_output(final_response.download.url)) @@ -464,6 +819,7 @@ class TopazExtension(ComfyExtension): return [ TopazImageEnhance, TopazVideoEnhance, + TopazVideoEnhanceV2, ] diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py index 9f4298dce..d6501dee4 100644 --- a/comfy_api_nodes/nodes_tripo.py +++ b/comfy_api_nodes/nodes_tripo.py @@ -60,6 +60,7 @@ async def poll_until_finished( ], status_extractor=lambda x: x.data.status, progress_extractor=lambda x: x.data.progress, + price_extractor=lambda x: x.data.consumed_credit * 0.01 if x.data.consumed_credit else None, estimated_duration=average_duration, ) if response_poll.data.status == TripoTaskStatus.SUCCESS: @@ -113,7 +114,6 @@ class TripoTextToModelNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends( widgets=[ "model_version", - "style", "texture", "pbr", "quad", @@ -124,20 +124,17 @@ class TripoTextToModelNode(IO.ComfyNode): expr=""" ( $isV14 := $contains(widgets.model_version,"v1.4"); - $style := widgets.style; - $hasStyle := ($style != "" and $style != "none"); + $isV3OrLater := $contains(widgets.model_version,"v3."); $withTexture := widgets.texture or widgets.pbr; $isHdTexture := (widgets.texture_quality = "detailed"); $isDetailedGeometry := (widgets.geometry_quality = "detailed"); - $baseCredits := - $isV14 ? 20 : ($withTexture ? 20 : 10); - $credits := - $baseCredits - + ($hasStyle ? 5 : 0) + $credits := $isV14 ? 20 : ( + ($withTexture ? 20 : 10) + (widgets.quad ? 5 : 0) + ($isHdTexture ? 10 : 0) - + ($isDetailedGeometry ? 20 : 0); - {"type":"usd","usd": $round($credits * 0.01, 2)} + + (($isDetailedGeometry and $isV3OrLater) ? 20 : 0) + ); + {"type":"usd","usd": $round($credits * 0.01, 2), "format": {"approximate": true}} ) """, ), @@ -239,7 +236,6 @@ class TripoImageToModelNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends( widgets=[ "model_version", - "style", "texture", "pbr", "quad", @@ -250,20 +246,17 @@ class TripoImageToModelNode(IO.ComfyNode): expr=""" ( $isV14 := $contains(widgets.model_version,"v1.4"); - $style := widgets.style; - $hasStyle := ($style != "" and $style != "none"); + $isV3OrLater := $contains(widgets.model_version,"v3."); $withTexture := widgets.texture or widgets.pbr; $isHdTexture := (widgets.texture_quality = "detailed"); $isDetailedGeometry := (widgets.geometry_quality = "detailed"); - $baseCredits := - $isV14 ? 30 : ($withTexture ? 30 : 20); - $credits := - $baseCredits - + ($hasStyle ? 5 : 0) + $credits := $isV14 ? 30 : ( + ($withTexture ? 30 : 20) + (widgets.quad ? 5 : 0) + ($isHdTexture ? 10 : 0) - + ($isDetailedGeometry ? 20 : 0); - {"type":"usd","usd": $round($credits * 0.01, 2)} + + (($isDetailedGeometry and $isV3OrLater) ? 20 : 0) + ); + {"type":"usd","usd": $round($credits * 0.01, 2), "format": {"approximate": true}} ) """, ), @@ -358,7 +351,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode): "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True ), IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True, advanced=True), - IO.Boolean.Input("quad", default=False, optional=True, advanced=True), + IO.Boolean.Input("quad", default=False, optional=True, advanced=True, tooltip="This parameter is deprecated and does nothing."), IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), ], outputs=[ @@ -379,7 +372,6 @@ class TripoMultiviewToModelNode(IO.ComfyNode): "model_version", "texture", "pbr", - "quad", "texture_quality", "geometry_quality", ], @@ -387,17 +379,16 @@ class TripoMultiviewToModelNode(IO.ComfyNode): expr=""" ( $isV14 := $contains(widgets.model_version,"v1.4"); + $isV3OrLater := $contains(widgets.model_version,"v3."); $withTexture := widgets.texture or widgets.pbr; $isHdTexture := (widgets.texture_quality = "detailed"); $isDetailedGeometry := (widgets.geometry_quality = "detailed"); - $baseCredits := - $isV14 ? 30 : ($withTexture ? 30 : 20); - $credits := - $baseCredits - + (widgets.quad ? 5 : 0) + $credits := $isV14 ? 30 : ( + ($withTexture ? 30 : 20) + ($isHdTexture ? 10 : 0) - + ($isDetailedGeometry ? 20 : 0); - {"type":"usd","usd": $round($credits * 0.01, 2)} + + (($isDetailedGeometry and $isV3OrLater) ? 20 : 0) + ); + {"type":"usd","usd": $round($credits * 0.01, 2), "format": {"approximate": true}} ) """, ), @@ -457,7 +448,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode): geometry_quality=geometry_quality, texture_alignment=texture_alignment, face_limit=face_limit if face_limit != -1 else None, - quad=quad, + quad=None, ), ) return await poll_until_finished(cls, response, average_duration=80) @@ -498,7 +489,7 @@ class TripoTextureNode(IO.ComfyNode): expr=""" ( $tq := widgets.texture_quality; - {"type":"usd","usd": ($contains($tq,"detailed") ? 0.2 : 0.1)} + {"type":"usd","usd": ($contains($tq,"detailed") ? 0.2 : 0.1), "format": {"approximate": true}} ) """, ), @@ -555,7 +546,7 @@ class TripoRefineNode(IO.ComfyNode): is_api_node=True, is_output_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.3}""", + expr="""{"type":"usd","usd":0.3, "format": {"approximate": true}}""", ), ) @@ -592,7 +583,7 @@ class TripoRigNode(IO.ComfyNode): is_api_node=True, is_output_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.25}""", + expr="""{"type":"usd","usd":0.25, "format": {"approximate": true}}""", ), ) @@ -652,7 +643,7 @@ class TripoRetargetNode(IO.ComfyNode): is_api_node=True, is_output_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.1}""", + expr="""{"type":"usd","usd":0.1, "format": {"approximate": true}}""", ), ) @@ -761,19 +752,10 @@ class TripoConversionNode(IO.ComfyNode): "face_limit", "texture_size", "texture_format", - "force_symmetry", "flatten_bottom", "flatten_bottom_threshold", "pivot_to_center_bottom", "scale_factor", - "with_animation", - "pack_uv", - "bake", - "part_names", - "fbx_preset", - "export_vertex_colors", - "export_orientation", - "animate_in_place", ], ), expr=""" @@ -783,28 +765,16 @@ class TripoConversionNode(IO.ComfyNode): $flatThresh := (widgets.flatten_bottom_threshold != null) ? widgets.flatten_bottom_threshold : 0; $scale := (widgets.scale_factor != null) ? widgets.scale_factor : 1; $texFmt := (widgets.texture_format != "" ? widgets.texture_format : "jpeg"); - $part := widgets.part_names; - $fbx := (widgets.fbx_preset != "" ? widgets.fbx_preset : "blender"); - $orient := (widgets.export_orientation != "" ? widgets.export_orientation : "default"); $advanced := widgets.quad or - widgets.force_symmetry or widgets.flatten_bottom or widgets.pivot_to_center_bottom or - widgets.with_animation or - widgets.pack_uv or - widgets.bake or - widgets.export_vertex_colors or - widgets.animate_in_place or ($face != -1) or ($texSize != 4096) or ($flatThresh != 0) or ($scale != 1) or - ($texFmt != "jpeg") or - ($part != "") or - ($fbx != "blender") or - ($orient != "default"); - {"type":"usd","usd": ($advanced ? 0.1 : 0.05)} + ($texFmt != "jpeg"); + {"type":"usd","usd": ($advanced ? 0.1 : 0.05), "format": {"approximate": true}} ) """, ), diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py index 13fc1cc36..2ff75d9b2 100644 --- a/comfy_api_nodes/nodes_veo2.py +++ b/comfy_api_nodes/nodes_veo2.py @@ -24,8 +24,9 @@ from comfy_api_nodes.util import ( AVERAGE_DURATION_VIDEO_GEN = 32 MODELS_MAP = { "veo-2.0-generate-001": "veo-2.0-generate-001", - "veo-3.1-generate": "veo-3.1-generate-preview", - "veo-3.1-fast-generate": "veo-3.1-fast-generate-preview", + "veo-3.1-generate": "veo-3.1-generate-001", + "veo-3.1-fast-generate": "veo-3.1-fast-generate-001", + "veo-3.1-lite": "veo-3.1-lite-generate-001", "veo-3.0-generate-001": "veo-3.0-generate-001", "veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001", } @@ -247,17 +248,8 @@ class VeoVideoGenerationNode(IO.ComfyNode): raise Exception("Video generation completed but no video was returned") -class Veo3VideoGenerationNode(VeoVideoGenerationNode): - """ - Generates videos from text prompts using Google's Veo 3 API. - - Supported models: - - veo-3.0-generate-001 - - veo-3.0-fast-generate-001 - - This node extends the base Veo node with Veo 3 specific features including - audio generation and fixed 8-second duration. - """ +class Veo3VideoGenerationNode(IO.ComfyNode): + """Generates videos from text prompts using Google's Veo 3 API.""" @classmethod def define_schema(cls): @@ -279,6 +271,13 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): default="16:9", tooltip="Aspect ratio of the output video", ), + IO.Combo.Input( + "resolution", + options=["720p", "1080p", "4k"], + default="720p", + tooltip="Output video resolution. 4K is not available for veo-3.1-lite and veo-3.0 models.", + optional=True, + ), IO.String.Input( "negative_prompt", multiline=True, @@ -289,11 +288,11 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): IO.Int.Input( "duration_seconds", default=8, - min=8, + min=4, max=8, - step=1, + step=2, display_mode=IO.NumberDisplay.number, - tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)", + tooltip="Duration of the output video in seconds", optional=True, ), IO.Boolean.Input( @@ -332,10 +331,10 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): options=[ "veo-3.1-generate", "veo-3.1-fast-generate", + "veo-3.1-lite", "veo-3.0-generate-001", "veo-3.0-fast-generate-001", ], - default="veo-3.0-generate-001", tooltip="Veo 3 model to use for video generation", optional=True, ), @@ -356,21 +355,111 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "resolution", "duration_seconds"]), expr=""" ( $m := widgets.model; + $r := widgets.resolution; $a := widgets.generate_audio; - ($contains($m,"veo-3.0-fast-generate-001") or $contains($m,"veo-3.1-fast-generate")) - ? {"type":"usd","usd": ($a ? 1.2 : 0.8)} - : ($contains($m,"veo-3.0-generate-001") or $contains($m,"veo-3.1-generate")) - ? {"type":"usd","usd": ($a ? 3.2 : 1.6)} - : {"type":"range_usd","min_usd":0.8,"max_usd":3.2} + $seconds := widgets.duration_seconds; + $pps := + $contains($m, "lite") + ? ($r = "1080p" ? ($a ? 0.08 : 0.05) : ($a ? 0.05 : 0.03)) + : $contains($m, "3.1-fast") + ? ($r = "4k" ? ($a ? 0.30 : 0.25) : $r = "1080p" ? ($a ? 0.12 : 0.10) : ($a ? 0.10 : 0.08)) + : $contains($m, "3.1-generate") + ? ($r = "4k" ? ($a ? 0.60 : 0.40) : ($a ? 0.40 : 0.20)) + : $contains($m, "3.0-fast") + ? ($a ? 0.15 : 0.10) + : ($a ? 0.40 : 0.20); + {"type":"usd","usd": $pps * $seconds} ) """, ), ) + @classmethod + async def execute( + cls, + prompt, + aspect_ratio="16:9", + resolution="720p", + negative_prompt="", + duration_seconds=8, + enhance_prompt=True, + person_generation="ALLOW", + seed=0, + image=None, + model="veo-3.0-generate-001", + generate_audio=False, + ): + if resolution == "4k" and ("lite" in model or "3.0" in model): + raise Exception("4K resolution is not supported by the veo-3.1-lite or veo-3.0 models.") + + model = MODELS_MAP[model] + + instances = [{"prompt": prompt}] + if image is not None: + image_base64 = tensor_to_base64_string(image) + if image_base64: + instances[0]["image"] = {"bytesBase64Encoded": image_base64, "mimeType": "image/png"} + + parameters = { + "aspectRatio": aspect_ratio, + "personGeneration": person_generation, + "durationSeconds": duration_seconds, + "enhancePrompt": True, + "generateAudio": generate_audio, + } + if negative_prompt: + parameters["negativePrompt"] = negative_prompt + if seed > 0: + parameters["seed"] = seed + if "veo-3.1" in model: + parameters["resolution"] = resolution + + initial_response = await sync_op( + cls, + ApiEndpoint(path=f"/proxy/veo/{model}/generate", method="POST"), + response_model=VeoGenVidResponse, + data=VeoGenVidRequest( + instances=instances, + parameters=parameters, + ), + ) + + poll_response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/veo/{model}/poll", method="POST"), + response_model=VeoGenVidPollResponse, + status_extractor=lambda r: "completed" if r.done else "pending", + data=VeoGenVidPollRequest(operationName=initial_response.name), + poll_interval=9.0, + estimated_duration=AVERAGE_DURATION_VIDEO_GEN, + ) + + if poll_response.error: + raise Exception(f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})") + + response = poll_response.response + filtered_count = response.raiMediaFilteredCount + if filtered_count: + reasons = response.raiMediaFilteredReasons or [] + reason_part = f": {reasons[0]}" if reasons else "" + raise Exception( + f"Content blocked by Google's Responsible AI filters{reason_part} " + f"({filtered_count} video{'s' if filtered_count != 1 else ''} filtered)." + ) + + if response.videos: + video = response.videos[0] + if video.bytesBase64Encoded: + return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(base64.b64decode(video.bytesBase64Encoded)))) + if video.gcsUri: + return IO.NodeOutput(await download_url_to_video_output(video.gcsUri)) + raise Exception("Video returned but no data or URL was provided") + raise Exception("Video generation completed but no video was returned") + class Veo3FirstLastFrameNode(IO.ComfyNode): @@ -394,7 +483,7 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): default="", tooltip="Negative text prompt to guide what to avoid in the video", ), - IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Combo.Input("resolution", options=["720p", "1080p", "4k"]), IO.Combo.Input( "aspect_ratio", options=["16:9", "9:16"], @@ -424,8 +513,7 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): IO.Image.Input("last_frame", tooltip="End frame"), IO.Combo.Input( "model", - options=["veo-3.1-generate", "veo-3.1-fast-generate"], - default="veo-3.1-fast-generate", + options=["veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.1-lite"], ), IO.Boolean.Input( "generate_audio", @@ -443,26 +531,20 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "duration"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "duration", "resolution"]), expr=""" ( - $prices := { - "veo-3.1-fast-generate": { "audio": 0.15, "no_audio": 0.10 }, - "veo-3.1-generate": { "audio": 0.40, "no_audio": 0.20 } - }; $m := widgets.model; - $ga := (widgets.generate_audio = "true"); + $r := widgets.resolution; + $ga := widgets.generate_audio; $seconds := widgets.duration; - $modelKey := - $contains($m, "veo-3.1-fast-generate") ? "veo-3.1-fast-generate" : - $contains($m, "veo-3.1-generate") ? "veo-3.1-generate" : - ""; - $audioKey := $ga ? "audio" : "no_audio"; - $modelPrices := $lookup($prices, $modelKey); - $pps := $lookup($modelPrices, $audioKey); - ($pps != null) - ? {"type":"usd","usd": $pps * $seconds} - : {"type":"range_usd","min_usd": 0.4, "max_usd": 3.2} + $pps := + $contains($m, "lite") + ? ($r = "1080p" ? ($ga ? 0.08 : 0.05) : ($ga ? 0.05 : 0.03)) + : $contains($m, "fast") + ? ($r = "4k" ? ($ga ? 0.30 : 0.25) : $r = "1080p" ? ($ga ? 0.12 : 0.10) : ($ga ? 0.10 : 0.08)) + : ($r = "4k" ? ($ga ? 0.60 : 0.40) : ($ga ? 0.40 : 0.20)); + {"type":"usd","usd": $pps * $seconds} ) """, ), @@ -482,6 +564,9 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): model: str, generate_audio: bool, ): + if "lite" in model and resolution == "4k": + raise Exception("4K resolution is not supported by the veo-3.1-lite model.") + model = MODELS_MAP[model] initial_response = await sync_op( cls, @@ -519,7 +604,7 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): data=VeoGenVidPollRequest( operationName=initial_response.name, ), - poll_interval=5.0, + poll_interval=9.0, estimated_duration=AVERAGE_DURATION_VIDEO_GEN, ) diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py index f04407eb5..8d90cefeb 100644 --- a/comfy_api_nodes/nodes_vidu.py +++ b/comfy_api_nodes/nodes_vidu.py @@ -38,7 +38,7 @@ async def execute_task( cls: type[IO.ComfyNode], vidu_endpoint: str, payload: TaskCreationRequest | TaskExtendCreationRequest | TaskMultiFrameCreationRequest, - max_poll_attempts: int = 320, + max_poll_attempts: int = 480, ) -> list[TaskResult]: task_creation_response = await sync_op( cls, @@ -1097,7 +1097,6 @@ class ViduExtendVideoNode(IO.ComfyNode): video_url=await upload_video_to_comfyapi(cls, video, wait_label="Uploading video"), images=[image_url] if image_url else None, ), - max_poll_attempts=480, ) return IO.NodeOutput(await download_url_to_video_output(results[0].url)) diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py index e2afe7f9c..68061bb5c 100644 --- a/comfy_api_nodes/nodes_wan.py +++ b/comfy_api_nodes/nodes_wan.py @@ -1,9 +1,40 @@ import re -from pydantic import BaseModel, Field from typing_extensions import override from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.wan import ( + Image2ImageInputField, + Image2ImageParametersField, + Image2ImageTaskCreationRequest, + Image2VideoInputField, + Image2VideoParametersField, + Image2VideoTaskCreationRequest, + ImageTaskStatusResponse, + Reference2VideoInputField, + Reference2VideoParametersField, + Reference2VideoTaskCreationRequest, + TaskCreationResponse, + Text2ImageInputField, + Text2ImageTaskCreationRequest, + Text2VideoInputField, + Text2VideoParametersField, + Text2VideoTaskCreationRequest, + Txt2ImageParametersField, + VideoTaskStatusResponse, + Wan27ImageToVideoInputField, + Wan27ImageToVideoParametersField, + Wan27ImageToVideoTaskCreationRequest, + Wan27MediaItem, + Wan27ReferenceVideoInputField, + Wan27ReferenceVideoParametersField, + Wan27ReferenceVideoTaskCreationRequest, + Wan27Text2VideoParametersField, + Wan27Text2VideoTaskCreationRequest, + Wan27VideoEditInputField, + Wan27VideoEditParametersField, + Wan27VideoEditTaskCreationRequest, +) from comfy_api_nodes.util import ( ApiEndpoint, audio_to_base64_string, @@ -13,157 +44,14 @@ from comfy_api_nodes.util import ( poll_op, sync_op, tensor_to_base64_string, + upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_video_to_comfyapi, validate_audio_duration, + validate_string, validate_video_duration, ) - -class Text2ImageInputField(BaseModel): - prompt: str = Field(...) - negative_prompt: str | None = Field(None) - - -class Image2ImageInputField(BaseModel): - prompt: str = Field(...) - negative_prompt: str | None = Field(None) - images: list[str] = Field(..., min_length=1, max_length=2) - - -class Text2VideoInputField(BaseModel): - prompt: str = Field(...) - negative_prompt: str | None = Field(None) - audio_url: str | None = Field(None) - - -class Image2VideoInputField(BaseModel): - prompt: str = Field(...) - negative_prompt: str | None = Field(None) - img_url: str = Field(...) - audio_url: str | None = Field(None) - - -class Reference2VideoInputField(BaseModel): - prompt: str = Field(...) - negative_prompt: str | None = Field(None) - reference_video_urls: list[str] = Field(...) - - -class Txt2ImageParametersField(BaseModel): - size: str = Field(...) - n: int = Field(1, description="Number of images to generate.") # we support only value=1 - seed: int = Field(..., ge=0, le=2147483647) - prompt_extend: bool = Field(True) - watermark: bool = Field(False) - - -class Image2ImageParametersField(BaseModel): - size: str | None = Field(None) - n: int = Field(1, description="Number of images to generate.") # we support only value=1 - seed: int = Field(..., ge=0, le=2147483647) - watermark: bool = Field(False) - - -class Text2VideoParametersField(BaseModel): - size: str = Field(...) - seed: int = Field(..., ge=0, le=2147483647) - duration: int = Field(5, ge=5, le=15) - prompt_extend: bool = Field(True) - watermark: bool = Field(False) - audio: bool = Field(False, description="Whether to generate audio automatically.") - shot_type: str = Field("single") - - -class Image2VideoParametersField(BaseModel): - resolution: str = Field(...) - seed: int = Field(..., ge=0, le=2147483647) - duration: int = Field(5, ge=5, le=15) - prompt_extend: bool = Field(True) - watermark: bool = Field(False) - audio: bool = Field(False, description="Whether to generate audio automatically.") - shot_type: str = Field("single") - - -class Reference2VideoParametersField(BaseModel): - size: str = Field(...) - duration: int = Field(5, ge=5, le=15) - shot_type: str = Field("single") - seed: int = Field(..., ge=0, le=2147483647) - watermark: bool = Field(False) - - -class Text2ImageTaskCreationRequest(BaseModel): - model: str = Field(...) - input: Text2ImageInputField = Field(...) - parameters: Txt2ImageParametersField = Field(...) - - -class Image2ImageTaskCreationRequest(BaseModel): - model: str = Field(...) - input: Image2ImageInputField = Field(...) - parameters: Image2ImageParametersField = Field(...) - - -class Text2VideoTaskCreationRequest(BaseModel): - model: str = Field(...) - input: Text2VideoInputField = Field(...) - parameters: Text2VideoParametersField = Field(...) - - -class Image2VideoTaskCreationRequest(BaseModel): - model: str = Field(...) - input: Image2VideoInputField = Field(...) - parameters: Image2VideoParametersField = Field(...) - - -class Reference2VideoTaskCreationRequest(BaseModel): - model: str = Field(...) - input: Reference2VideoInputField = Field(...) - parameters: Reference2VideoParametersField = Field(...) - - -class TaskCreationOutputField(BaseModel): - task_id: str = Field(...) - task_status: str = Field(...) - - -class TaskCreationResponse(BaseModel): - output: TaskCreationOutputField | None = Field(None) - request_id: str = Field(...) - code: str | None = Field(None, description="Error code for the failed request.") - message: str | None = Field(None, description="Details about the failed request.") - - -class TaskResult(BaseModel): - url: str | None = Field(None) - code: str | None = Field(None) - message: str | None = Field(None) - - -class ImageTaskStatusOutputField(TaskCreationOutputField): - task_id: str = Field(...) - task_status: str = Field(...) - results: list[TaskResult] | None = Field(None) - - -class VideoTaskStatusOutputField(TaskCreationOutputField): - task_id: str = Field(...) - task_status: str = Field(...) - video_url: str | None = Field(None) - code: str | None = Field(None) - message: str | None = Field(None) - - -class ImageTaskStatusResponse(BaseModel): - output: ImageTaskStatusOutputField | None = Field(None) - request_id: str = Field(...) - - -class VideoTaskStatusResponse(BaseModel): - output: VideoTaskStatusOutputField | None = Field(None) - request_id: str = Field(...) - - RES_IN_PARENS = re.compile(r"\((\d+)\s*[x×]\s*(\d+)\)") @@ -179,7 +67,6 @@ class WanTextToImageApi(IO.ComfyNode): IO.Combo.Input( "model", options=["wan2.5-t2i-preview"], - default="wan2.5-t2i-preview", tooltip="Model to use.", ), IO.String.Input( @@ -931,7 +818,1380 @@ class WanReferenceVideoApi(IO.ComfyNode): response_model=VideoTaskStatusResponse, status_extractor=lambda x: x.output.task_status, poll_interval=6, - max_poll_attempts=280, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class Wan2TextToVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Wan2TextToVideoApi", + display_name="Wan 2.7 Text to Video", + category="api node/video/Wan", + description="Generates a video based on a text prompt using the Wan 2.7 model.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "wan2.7-t2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the elements and visual features. " + "Supports English and Chinese.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + default="", + tooltip="Negative prompt describing what to avoid.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + ), + IO.Int.Input( + "duration", + default=5, + min=2, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + ], + ), + IO.Audio.Input( + "audio", + optional=True, + tooltip="Audio for driving video generation (e.g., lip sync, beat-matched motion). " + "Duration: 3s-30s. If not provided, the model automatically generates matching " + "background music or sound effects.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "prompt_extend", + default=True, + tooltip="Whether to enhance the prompt with AI assistance.", + advanced=True, + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps * $dur } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + prompt_extend: bool, + watermark: bool, + audio: Input.Audio | None = None, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + audio_url = None + if audio is not None: + validate_audio_duration(audio, 1.5, 60.0) + audio_url = await upload_audio_to_comfyapi( + cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg" + ) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27Text2VideoTaskCreationRequest( + model=model["model"], + input=Text2VideoInputField( + prompt=model["prompt"], + negative_prompt=model["negative_prompt"] or None, + audio_url=audio_url, + ), + parameters=Wan27Text2VideoParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + prompt_extend=prompt_extend, + watermark=watermark, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class Wan2ImageToVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Wan2ImageToVideoApi", + display_name="Wan 2.7 Image to Video", + category="api node/video/Wan", + description="Generate a video from a first-frame image, with optional last-frame image and audio.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "wan2.7-i2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the elements and visual features. " + "Supports English and Chinese.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + default="", + tooltip="Negative prompt describing what to avoid.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Int.Input( + "duration", + default=5, + min=2, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + ], + ), + IO.Image.Input( + "first_frame", + tooltip="First frame image. The output aspect ratio is derived from this image.", + ), + IO.Image.Input( + "last_frame", + optional=True, + tooltip="Last frame image. The model generates a video transitioning from first to last frame.", + ), + IO.Audio.Input( + "audio", + optional=True, + tooltip="Audio for driving video generation (e.g., lip sync, beat-matched motion). " + "Duration: 2s-30s. If not provided, the model automatically generates matching " + "background music or sound effects.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "prompt_extend", + default=True, + tooltip="Whether to enhance the prompt with AI assistance.", + advanced=True, + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps * $dur } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + first_frame: Input.Image, + seed: int, + prompt_extend: bool, + watermark: bool, + last_frame: Input.Image | None = None, + audio: Input.Audio | None = None, + ): + media = [ + Wan27MediaItem( + type="first_frame", + url=await upload_image_to_comfyapi(cls, image=first_frame), + ) + ] + if last_frame is not None: + media.append( + Wan27MediaItem( + type="last_frame", + url=await upload_image_to_comfyapi(cls, image=last_frame), + ) + ) + if audio is not None: + validate_audio_duration(audio, 2.0, 30.0) + audio_url = await upload_audio_to_comfyapi( + cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg" + ) + media.append(Wan27MediaItem(type="driving_audio", url=audio_url)) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27ImageToVideoTaskCreationRequest( + model=model["model"], + input=Wan27ImageToVideoInputField( + prompt=model["prompt"] or None, + negative_prompt=model["negative_prompt"] or None, + media=media, + ), + parameters=Wan27ImageToVideoParametersField( + resolution=model["resolution"], + duration=model["duration"], + seed=seed, + prompt_extend=prompt_extend, + watermark=watermark, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class Wan2VideoContinuationApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Wan2VideoContinuationApi", + display_name="Wan 2.7 Video Continuation", + category="api node/video/Wan", + description="Continue a video from where it left off, with optional last-frame control.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "wan2.7-i2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the elements and visual features. Supports English and Chinese.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + default="", + tooltip="Negative prompt describing what to avoid.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Int.Input( + "duration", + default=5, + min=2, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + tooltip="Total output duration in seconds. The model generates continuation " + "to fill the remaining time after the input clip.", + ), + ], + ), + ], + ), + IO.Video.Input( + "first_clip", + tooltip="Input video to continue from. Duration: 2s-10s. " + "The output aspect ratio is derived from this video.", + ), + IO.Image.Input( + "last_frame", + optional=True, + tooltip="Last frame image. The continuation will transition towards this frame.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "prompt_extend", + default=True, + tooltip="Whether to enhance the prompt with AI assistance.", + advanced=True, + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, $res); + $outputPrice := $pps * $dur; + { + "type": "range_usd", + "min_usd": 2 * $pps + $outputPrice, + "max_usd": 5 * $pps + $outputPrice + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + first_clip: Input.Video, + prompt: str = "", + negative_prompt: str = "", + last_frame: Input.Image | None = None, + seed: int = 0, + prompt_extend: bool = True, + watermark: bool = False, + ): + validate_video_duration(first_clip, min_duration=2, max_duration=10) + media = [ + Wan27MediaItem( + type="first_clip", + url=await upload_video_to_comfyapi(cls, first_clip), + ) + ] + if last_frame is not None: + media.append( + Wan27MediaItem( + type="last_frame", + url=await upload_image_to_comfyapi(cls, image=last_frame), + ) + ) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27ImageToVideoTaskCreationRequest( + model=model["model"], + input=Wan27ImageToVideoInputField( + prompt=model["prompt"] or None, + negative_prompt=model["negative_prompt"] or None, + media=media, + ), + parameters=Wan27ImageToVideoParametersField( + resolution=model["resolution"], + duration=model["duration"], + seed=seed, + prompt_extend=prompt_extend, + watermark=watermark, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class Wan2VideoEditApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Wan2VideoEditApi", + display_name="Wan 2.7 Video Edit", + category="api node/video/Wan", + description="Edit a video using text instructions, reference images, or style transfer.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "wan2.7-videoedit", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Editing instructions or style transfer requirements.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + tooltip="Aspect ratio. If not changed, approximates the input video ratio.", + ), + IO.Combo.Input( + "duration", + options=["auto", "2", "3", "4", "5", "6", "7", "8", "9", "10"], + default="auto", + tooltip="Output duration in seconds. 'auto' matches the input video duration. " + "A specific value truncates from the start of the video.", + ), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=[ + "image1", + "image2", + "image3", + "image4", + ], + min=0, + ), + ), + ], + ), + ], + ), + IO.Video.Input( + "video", + tooltip="The video to edit.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Combo.Input( + "audio_setting", + options=["auto", "origin"], + default="auto", + tooltip="'auto': model decides whether to regenerate audio based on the prompt. " + "'origin': preserve the original audio from the input video.", + advanced=True, + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps, "format": { "suffix": "/second", "note": "(input + output)" } } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + video: Input.Video, + seed: int, + audio_setting: str, + watermark: bool, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + validate_video_duration(video, min_duration=2, max_duration=10) + duration = 0 if model["duration"] == "auto" else int(model["duration"]) + media = [Wan27MediaItem(type="video", url=await upload_video_to_comfyapi(cls, video))] + reference_images = model.get("reference_images", {}) + for key in reference_images: + media.append( + Wan27MediaItem( + type="reference_image", url=await upload_image_to_comfyapi(cls, image=reference_images[key]) + ) + ) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27VideoEditTaskCreationRequest( + model=model["model"], + input=Wan27VideoEditInputField(prompt=model["prompt"], media=media), + parameters=Wan27VideoEditParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=duration, + audio_setting=audio_setting, + watermark=watermark, + seed=seed, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class Wan2ReferenceVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Wan2ReferenceVideoApi", + display_name="Wan 2.7 Reference to Video", + category="api node/video/Wan", + description="Generate a video featuring a person or object from reference materials. " + "Supports single-character performances and multi-character interactions.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "wan2.7-r2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the video. Use identifiers such as 'character1' and " + "'character2' to refer to the reference characters.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + default="", + tooltip="Negative prompt describing what to avoid.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + ), + IO.Int.Input( + "duration", + default=5, + min=2, + max=10, + step=1, + display_mode=IO.NumberDisplay.number, + ), + IO.Autogrow.Input( + "reference_videos", + template=IO.Autogrow.TemplateNames( + IO.Video.Input("reference_video"), + names=["video1", "video2", "video3"], + min=0, + ), + ), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=["image1", "image2", "image3", "image4", "image5"], + min=0, + ), + ), + ], + ), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, $res); + $outputPrice := $pps * $dur; + { + "type": "range_usd", + "min_usd": $outputPrice, + "max_usd": 5 * $pps + $outputPrice + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + media = [] + reference_videos = model.get("reference_videos", {}) + for key in reference_videos: + media.append( + Wan27MediaItem(type="reference_video", url=await upload_video_to_comfyapi(cls, reference_videos[key])) + ) + reference_images = model.get("reference_images", {}) + for key in reference_images: + media.append( + Wan27MediaItem( + type="reference_image", + url=await upload_image_to_comfyapi(cls, image=reference_images[key]), + ) + ) + if not media: + raise ValueError("At least one reference video or reference image must be provided.") + if len(media) > 5: + raise ValueError( + f"Too many references ({len(media)}). The maximum total of reference videos and images is 5." + ) + + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27ReferenceVideoTaskCreationRequest( + model=model["model"], + input=Wan27ReferenceVideoInputField( + prompt=model["prompt"], + negative_prompt=model["negative_prompt"] or None, + media=media, + ), + parameters=Wan27ReferenceVideoParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + watermark=watermark, + seed=seed, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class HappyHorseTextToVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="HappyHorseTextToVideoApi", + display_name="HappyHorse Text to Video", + category="api node/video/Wan", + description="Generates a video based on a text prompt using the HappyHorse model.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "happyhorse-1.0-t2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the elements and visual features. " + "Supports English and Chinese.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + ), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.14, "1080p": 0.24 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps * $dur } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27Text2VideoTaskCreationRequest( + model=model["model"], + input=Text2VideoInputField( + prompt=model["prompt"], + negative_prompt=None, + ), + parameters=Wan27Text2VideoParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class HappyHorseImageToVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="HappyHorseImageToVideoApi", + display_name="HappyHorse Image to Video", + category="api node/video/Wan", + description="Generate a video from a first-frame image using the HappyHorse model.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "happyhorse-1.0-i2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the elements and visual features. " + "Supports English and Chinese.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + ], + ), + IO.Image.Input( + "first_frame", + tooltip="First frame image. The output aspect ratio is derived from this image.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.14, "1080p": 0.24 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps * $dur } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + first_frame: Input.Image, + seed: int, + watermark: bool, + ): + media = [ + Wan27MediaItem( + type="first_frame", + url=await upload_image_to_comfyapi(cls, image=first_frame), + ) + ] + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27ImageToVideoTaskCreationRequest( + model=model["model"], + input=Wan27ImageToVideoInputField( + prompt=model["prompt"] or None, + negative_prompt=None, + media=media, + ), + parameters=Wan27ImageToVideoParametersField( + resolution=model["resolution"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class HappyHorseVideoEditApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="HappyHorseVideoEditApi", + display_name="HappyHorse Video Edit", + category="api node/video/Wan", + description="Edit a video using text instructions or reference images with the HappyHorse model. " + "Output duration is 3-15s and matches the input video; inputs longer than 15s are truncated.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "happyhorse-1.0-video-edit", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Editing instructions or style transfer requirements.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + tooltip="Aspect ratio. If not changed, approximates the input video ratio.", + ), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=[ + "image1", + "image2", + "image3", + "image4", + "image5", + ], + min=0, + ), + ), + ], + ), + ], + ), + IO.Video.Input( + "video", + tooltip="The video to edit.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $ppsTable := { "720p": 0.14, "1080p": 0.24 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps, "format": { "suffix": "/second" } } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + video: Input.Video, + seed: int, + watermark: bool, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + validate_video_duration(video, min_duration=3, max_duration=60) + media = [Wan27MediaItem(type="video", url=await upload_video_to_comfyapi(cls, video))] + reference_images = model.get("reference_images", {}) + for key in reference_images: + media.append( + Wan27MediaItem( + type="reference_image", url=await upload_image_to_comfyapi(cls, image=reference_images[key]) + ) + ) + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27VideoEditTaskCreationRequest( + model=model["model"], + input=Wan27VideoEditInputField(prompt=model["prompt"], media=media), + parameters=Wan27VideoEditParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=None, + watermark=watermark, + seed=seed, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, + ) + return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) + + +class HappyHorseReferenceVideoApi(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="HappyHorseReferenceVideoApi", + display_name="HappyHorse Reference to Video", + category="api node/video/Wan", + description="Generate a video featuring a person or object from reference materials with the HappyHorse " + "model. Supports single-character performances and multi-character interactions.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "happyhorse-1.0-r2v", + [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Prompt describing the video. Use identifiers such as 'character1' and " + "'character2' to refer to the reference characters.", + ), + IO.Combo.Input( + "resolution", + options=["720P", "1080P"], + ), + IO.Combo.Input( + "ratio", + options=["16:9", "9:16", "1:1", "4:3", "3:4"], + ), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + step=1, + display_mode=IO.NumberDisplay.number, + ), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=[ + "image1", + "image2", + "image3", + "image4", + "image5", + "image6", + "image7", + "image8", + "image9", + ], + min=1, + ), + ), + ], + ), + ], + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed to use for generation.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $ppsTable := { "720p": 0.14, "1080p": 0.24 }; + $pps := $lookup($ppsTable, $res); + { "type": "usd", "usd": $pps * $dur } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ): + validate_string(model["prompt"], strip_whitespace=False, min_length=1) + media = [] + reference_images = model.get("reference_images", {}) + for key in reference_images: + media.append( + Wan27MediaItem( + type="reference_image", + url=await upload_image_to_comfyapi(cls, image=reference_images[key]), + ) + ) + if not media: + raise ValueError("At least one reference reference image must be provided.") + + initial_response = await sync_op( + cls, + ApiEndpoint( + path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", + method="POST", + ), + response_model=TaskCreationResponse, + data=Wan27ReferenceVideoTaskCreationRequest( + model=model["model"], + input=Wan27ReferenceVideoInputField( + prompt=model["prompt"], + negative_prompt=None, + media=media, + ), + parameters=Wan27ReferenceVideoParametersField( + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + watermark=watermark, + seed=seed, + ), + ), + ) + if not initial_response.output: + raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}") + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), + response_model=VideoTaskStatusResponse, + status_extractor=lambda x: x.output.task_status, + poll_interval=7, ) return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) @@ -945,6 +2205,15 @@ class WanApiExtension(ComfyExtension): WanTextToVideoApi, WanImageToVideoApi, WanReferenceVideoApi, + Wan2TextToVideoApi, + Wan2ImageToVideoApi, + Wan2VideoContinuationApi, + Wan2VideoEditApi, + Wan2ReferenceVideoApi, + HappyHorseTextToVideoApi, + HappyHorseImageToVideoApi, + HappyHorseVideoEditApi, + HappyHorseReferenceVideoApi, ] diff --git a/comfy_api_nodes/nodes_wavespeed.py b/comfy_api_nodes/nodes_wavespeed.py index c59fafd3b..65e45f60a 100644 --- a/comfy_api_nodes/nodes_wavespeed.py +++ b/comfy_api_nodes/nodes_wavespeed.py @@ -84,7 +84,6 @@ class WavespeedFlashVSRNode(IO.ComfyNode): response_model=TaskResultResponse, status_extractor=lambda x: "failed" if x.data is None else x.data.status, poll_interval=10.0, - max_poll_attempts=480, ) if final_response.code != 200: raise ValueError( @@ -156,7 +155,6 @@ class WavespeedImageUpscaleNode(IO.ComfyNode): response_model=TaskResultResponse, status_extractor=lambda x: "failed" if x.data is None else x.data.status, poll_interval=10.0, - max_poll_attempts=480, ) if final_response.code != 200: raise ValueError( diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py index 0cb9a47c7..f3584aba9 100644 --- a/comfy_api_nodes/util/__init__.py +++ b/comfy_api_nodes/util/__init__.py @@ -19,6 +19,7 @@ from .conversions import ( image_tensor_pair_to_batch, pil_to_bytesio, resize_mask_to_image, + resize_video_to_pixel_budget, tensor_to_base64_string, tensor_to_bytesio, tensor_to_pil, @@ -90,6 +91,7 @@ __all__ = [ "image_tensor_pair_to_batch", "pil_to_bytesio", "resize_mask_to_image", + "resize_video_to_pixel_budget", "tensor_to_base64_string", "tensor_to_bytesio", "tensor_to_pil", diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py index 9d730b81a..052301c33 100644 --- a/comfy_api_nodes/util/client.py +++ b/comfy_api_nodes/util/client.py @@ -19,6 +19,8 @@ from comfy import utils from comfy_api.latest import IO from server import PromptServer +from comfy.deploy_environment import get_deploy_environment + from . import request_logger from ._helpers import ( default_base_url, @@ -148,7 +150,7 @@ async def poll_op( queued_statuses: list[str | int] | None = None, data: BaseModel | None = None, poll_interval: float = 5.0, - max_poll_attempts: int = 160, + max_poll_attempts: int = 480, timeout_per_poll: float = 120.0, max_retries_per_poll: int = 10, retry_delay_per_poll: float = 1.0, @@ -156,6 +158,7 @@ async def poll_op( estimated_duration: int | None = None, cancel_endpoint: ApiEndpoint | None = None, cancel_timeout: float = 10.0, + extra_text: str | None = None, ) -> M: raw = await poll_op_raw( cls, @@ -176,6 +179,7 @@ async def poll_op( estimated_duration=estimated_duration, cancel_endpoint=cancel_endpoint, cancel_timeout=cancel_timeout, + extra_text=extra_text, ) if not isinstance(raw, dict): raise Exception("Expected JSON response to validate into a Pydantic model, got non-JSON (binary or text).") @@ -252,7 +256,7 @@ async def poll_op_raw( queued_statuses: list[str | int] | None = None, data: dict[str, Any] | BaseModel | None = None, poll_interval: float = 5.0, - max_poll_attempts: int = 160, + max_poll_attempts: int = 480, timeout_per_poll: float = 120.0, max_retries_per_poll: int = 10, retry_delay_per_poll: float = 1.0, @@ -260,6 +264,7 @@ async def poll_op_raw( estimated_duration: int | None = None, cancel_endpoint: ApiEndpoint | None = None, cancel_timeout: float = 10.0, + extra_text: str | None = None, ) -> dict[str, Any]: """ Polls an endpoint until the task reaches a terminal state. Displays time while queued/processing, @@ -299,6 +304,7 @@ async def poll_op_raw( price=state.price, is_queued=state.is_queued, processing_elapsed_seconds=int(proc_elapsed), + extra_text=extra_text, ) await asyncio.sleep(1.0) except Exception as exc: @@ -389,6 +395,7 @@ async def poll_op_raw( price=state.price, is_queued=False, processing_elapsed_seconds=int(state.base_processing_elapsed), + extra_text=extra_text, ) return resp_json @@ -462,6 +469,7 @@ def _display_time_progress( price: float | None = None, is_queued: bool | None = None, processing_elapsed_seconds: int | None = None, + extra_text: str | None = None, ) -> None: if estimated_total is not None and estimated_total > 0 and is_queued is False: pe = processing_elapsed_seconds if processing_elapsed_seconds is not None else elapsed_seconds @@ -469,7 +477,8 @@ def _display_time_progress( time_line = f"Time elapsed: {int(elapsed_seconds)}s (~{remaining}s remaining)" else: time_line = f"Time elapsed: {int(elapsed_seconds)}s" - _display_text(node_cls, time_line, status=status, price=price) + text = f"{time_line}\n\n{extra_text}" if extra_text else time_line + _display_text(node_cls, text, status=status, price=price) async def _diagnose_connectivity() -> dict[str, bool]: @@ -479,10 +488,30 @@ async def _diagnose_connectivity() -> dict[str, bool]: "api_accessible": False, } timeout = aiohttp.ClientTimeout(total=5.0) + + # Probe Google and Baidu in parallel: Google is blocked by the GFW in mainland China, so a Baidu probe is required + # to correctly detect that Chinese users with working internet do have working internet. + internet_probe_urls = ("https://www.google.com", "https://www.baidu.com") + async with aiohttp.ClientSession(timeout=timeout) as session: - with contextlib.suppress(ClientError, OSError): - async with session.get("https://www.google.com") as resp: - results["internet_accessible"] = resp.status < 500 + async def _probe(url: str) -> bool: + try: + async with session.get(url) as resp: + return resp.status < 500 + except (ClientError, OSError, asyncio.TimeoutError): + return False + + probe_tasks = [asyncio.create_task(_probe(u)) for u in internet_probe_urls] + try: + for fut in asyncio.as_completed(probe_tasks): + if await fut: + results["internet_accessible"] = True + break + finally: + for t in probe_tasks: + if not t.done(): + t.cancel() + await asyncio.gather(*probe_tasks, return_exceptions=True) if not results["internet_accessible"]: return results @@ -617,6 +646,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): payload_headers = {"Accept": "*/*"} if expect_binary else {"Accept": "application/json"} if not parsed_url.scheme and not parsed_url.netloc: # is URL relative? payload_headers.update(get_auth_header(cfg.node_cls)) + payload_headers["Comfy-Env"] = get_deploy_environment() if cfg.endpoint.headers: payload_headers.update(cfg.endpoint.headers) diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py index 82b6d22a5..be5d5719b 100644 --- a/comfy_api_nodes/util/conversions.py +++ b/comfy_api_nodes/util/conversions.py @@ -129,22 +129,38 @@ def pil_to_bytesio(img: Image.Image, mime_type: str = "image/png") -> BytesIO: return img_byte_arr +def _compute_downscale_dims(src_w: int, src_h: int, total_pixels: int) -> tuple[int, int] | None: + """Return downscaled (w, h) with even dims fitting ``total_pixels``, or None if already fits. + + Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions + are rounded down to even values (many codecs require divisible-by-2). + """ + pixels = src_w * src_h + if pixels <= total_pixels: + return None + scale = math.sqrt(total_pixels / pixels) + new_w = max(2, int(src_w * scale)) + new_h = max(2, int(src_h * scale)) + new_w -= new_w % 2 + new_h -= new_h % 2 + return new_w, new_h + + def downscale_image_tensor(image: torch.Tensor, total_pixels: int = 1536 * 1024) -> torch.Tensor: - """Downscale input image tensor to roughly the specified total pixels.""" + """Downscale input image tensor to roughly the specified total pixels. + + Output dimensions are rounded down to even values so that the result is guaranteed to fit within ``total_pixels`` + and is compatible with codecs that require even dimensions (e.g. yuv420p). + """ samples = image.movedim(-1, 1) - total = int(total_pixels) - scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) - if scale_by >= 1: + dims = _compute_downscale_dims(samples.shape[3], samples.shape[2], int(total_pixels)) + if dims is None: return image - width = round(samples.shape[3] * scale_by) - height = round(samples.shape[2] * scale_by) - - s = common_upscale(samples, width, height, "lanczos", "disabled") - s = s.movedim(1, -1) - return s + new_w, new_h = dims + return common_upscale(samples, new_w, new_h, "lanczos", "disabled").movedim(1, -1) -def downscale_image_tensor_by_max_side(image: torch.Tensor, *, max_side: int) -> torch.Tensor: +def downscale_image_tensor_by_max_side(image: torch.Tensor, *, max_side: int) -> torch.Tensor: """Downscale input image tensor so the largest dimension is at most max_side pixels.""" samples = image.movedim(-1, 1) height, width = samples.shape[2], samples.shape[3] @@ -399,6 +415,72 @@ def trim_video(video: Input.Video, duration_sec: float) -> Input.Video: raise RuntimeError(f"Failed to trim video: {str(e)}") from e +def resize_video_to_pixel_budget(video: Input.Video, total_pixels: int) -> Input.Video: + """Downscale a video to fit within ``total_pixels`` (w * h), preserving aspect ratio. + + Returns the original video object untouched when it already fits. Preserves frame rate, duration, and audio. + Aspect ratio is preserved up to a fraction of a percent (even-dim rounding). + """ + src_w, src_h = video.get_dimensions() + scale_dims = _compute_downscale_dims(src_w, src_h, total_pixels) + if scale_dims is None: + return video + return _apply_video_scale(video, scale_dims) + + +def _apply_video_scale(video: Input.Video, scale_dims: tuple[int, int]) -> Input.Video: + """Re-encode ``video`` scaled to ``scale_dims`` with a single decode/encode pass.""" + out_w, out_h = scale_dims + output_buffer = BytesIO() + input_container = None + output_container = None + + try: + input_source = video.get_stream_source() + input_container = av.open(input_source, mode="r") + output_container = av.open(output_buffer, mode="w", format="mp4") + + video_stream = output_container.add_stream("h264", rate=video.get_frame_rate()) + video_stream.width = out_w + video_stream.height = out_h + video_stream.pix_fmt = "yuv420p" + + audio_stream = None + for stream in input_container.streams: + if isinstance(stream, av.AudioStream): + audio_stream = output_container.add_stream("aac", rate=stream.sample_rate) + audio_stream.sample_rate = stream.sample_rate + audio_stream.layout = stream.layout + break + + for frame in input_container.decode(video=0): + frame = frame.reformat(width=out_w, height=out_h, format="yuv420p") + for packet in video_stream.encode(frame): + output_container.mux(packet) + for packet in video_stream.encode(): + output_container.mux(packet) + + if audio_stream is not None: + input_container.seek(0) + for audio_frame in input_container.decode(audio=0): + for packet in audio_stream.encode(audio_frame): + output_container.mux(packet) + for packet in audio_stream.encode(): + output_container.mux(packet) + + output_container.close() + input_container.close() + output_buffer.seek(0) + return InputImpl.VideoFromFile(output_buffer) + + except Exception as e: + if input_container is not None: + input_container.close() + if output_container is not None: + output_container.close() + raise RuntimeError(f"Failed to resize video: {str(e)}") from e + + def _f32_pcm(wav: torch.Tensor) -> torch.Tensor: """Convert audio to float 32 bits PCM format. Copy-paste from nodes_audio.py file.""" if wav.dtype.is_floating_point: diff --git a/comfy_execution/caching.py b/comfy_execution/caching.py index f9c913bdb..ba1e8bc84 100644 --- a/comfy_execution/caching.py +++ b/comfy_execution/caching.py @@ -5,6 +5,7 @@ import psutil import time import torch from typing import Sequence, Mapping, Dict +from comfy.model_patcher import ModelPatcher from comfy_execution.graph import DynamicPrompt from abc import ABC, abstractmethod @@ -523,13 +524,15 @@ class RAMPressureCache(LRUCache): self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time() super().set_local(node_id, value) - def ram_release(self, target): + def ram_release(self, target, free_active=False): if psutil.virtual_memory().available >= target: return clean_list = [] for key, cache_entry in self.cache.items(): + if not free_active and self.used_generation[key] == self.generation: + continue oom_score = RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key]) ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE @@ -542,6 +545,9 @@ class RAMPressureCache(LRUCache): scan_list_for_ram_usage(output) elif isinstance(output, torch.Tensor) and output.device.type == 'cpu': ram_usage += output.numel() * output.element_size() + elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation: + #old ModelPatchers are the first to go + ram_usage = 1e30 scan_list_for_ram_usage(cache_entry.outputs) oom_score *= ram_usage diff --git a/comfy_extras/frame_interpolation_models/film_net.py b/comfy_extras/frame_interpolation_models/film_net.py new file mode 100644 index 000000000..36bc79dc3 --- /dev/null +++ b/comfy_extras/frame_interpolation_models/film_net.py @@ -0,0 +1,261 @@ +"""FILM: Frame Interpolation for Large Motion (ECCV 2022).""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import comfy.ops + +ops = comfy.ops.disable_weight_init + + +class FilmConv2d(nn.Module): + """Conv2d with optional LeakyReLU and FILM-style padding.""" + + def __init__(self, in_channels, out_channels, size, activation=True, device=None, dtype=None, operations=ops): + super().__init__() + self.even_pad = not size % 2 + self.conv = operations.Conv2d(in_channels, out_channels, kernel_size=size, padding=size // 2 if size % 2 else 0, device=device, dtype=dtype) + self.activation = nn.LeakyReLU(0.2) if activation else None + + def forward(self, x): + if self.even_pad: + x = F.pad(x, (0, 1, 0, 1)) + x = self.conv(x) + if self.activation is not None: + x = self.activation(x) + return x + + +def _warp_core(image, flow, grid_x, grid_y): + dtype = image.dtype + H, W = flow.shape[2], flow.shape[3] + dx = flow[:, 0].float() / (W * 0.5) + dy = flow[:, 1].float() / (H * 0.5) + grid = torch.stack([grid_x[None, None, :] + dx, grid_y[None, :, None] + dy], dim=3) + return F.grid_sample(image.float(), grid, mode="bilinear", padding_mode="border", align_corners=False).to(dtype) + + +def build_image_pyramid(image, pyramid_levels): + pyramid = [image] + for _ in range(1, pyramid_levels): + image = F.avg_pool2d(image, 2, 2) + pyramid.append(image) + return pyramid + + +def flow_pyramid_synthesis(residual_pyramid): + flow = residual_pyramid[-1] + flow_pyramid = [flow] + for residual_flow in residual_pyramid[:-1][::-1]: + flow = F.interpolate(flow, size=residual_flow.shape[2:4], mode="bilinear", scale_factor=None).mul_(2).add_(residual_flow) + flow_pyramid.append(flow) + flow_pyramid.reverse() + return flow_pyramid + + +def multiply_pyramid(pyramid, scalar): + return [image * scalar[:, None, None, None] for image in pyramid] + + +def pyramid_warp(feature_pyramid, flow_pyramid, warp_fn): + return [warp_fn(features, flow) for features, flow in zip(feature_pyramid, flow_pyramid)] + + +def concatenate_pyramids(pyramid1, pyramid2): + return [torch.cat([f1, f2], dim=1) for f1, f2 in zip(pyramid1, pyramid2)] + + +class SubTreeExtractor(nn.Module): + def __init__(self, in_channels=3, channels=64, n_layers=4, device=None, dtype=None, operations=ops): + super().__init__() + convs = [] + for i in range(n_layers): + out_ch = channels << i + convs.append(nn.Sequential( + FilmConv2d(in_channels, out_ch, 3, device=device, dtype=dtype, operations=operations), + FilmConv2d(out_ch, out_ch, 3, device=device, dtype=dtype, operations=operations))) + in_channels = out_ch + self.convs = nn.ModuleList(convs) + + def forward(self, image, n): + head = image + pyramid = [] + for i, layer in enumerate(self.convs): + head = layer(head) + pyramid.append(head) + if i < n - 1: + head = F.avg_pool2d(head, 2, 2) + return pyramid + + +class FeatureExtractor(nn.Module): + def __init__(self, in_channels=3, channels=64, sub_levels=4, device=None, dtype=None, operations=ops): + super().__init__() + self.extract_sublevels = SubTreeExtractor(in_channels, channels, sub_levels, device=device, dtype=dtype, operations=operations) + self.sub_levels = sub_levels + + def forward(self, image_pyramid): + sub_pyramids = [self.extract_sublevels(image_pyramid[i], min(len(image_pyramid) - i, self.sub_levels)) + for i in range(len(image_pyramid))] + feature_pyramid = [] + for i in range(len(image_pyramid)): + features = sub_pyramids[i][0] + for j in range(1, self.sub_levels): + if j <= i: + features = torch.cat([features, sub_pyramids[i - j][j]], dim=1) + feature_pyramid.append(features) + # Free sub-pyramids no longer needed by future levels + if i >= self.sub_levels - 1: + sub_pyramids[i - self.sub_levels + 1] = None + return feature_pyramid + + +class FlowEstimator(nn.Module): + def __init__(self, in_channels, num_convs, num_filters, device=None, dtype=None, operations=ops): + super().__init__() + self._convs = nn.ModuleList() + for _ in range(num_convs): + self._convs.append(FilmConv2d(in_channels, num_filters, 3, device=device, dtype=dtype, operations=operations)) + in_channels = num_filters + self._convs.append(FilmConv2d(in_channels, num_filters // 2, 1, device=device, dtype=dtype, operations=operations)) + self._convs.append(FilmConv2d(num_filters // 2, 2, 1, activation=False, device=device, dtype=dtype, operations=operations)) + + def forward(self, features_a, features_b): + net = torch.cat([features_a, features_b], dim=1) + for conv in self._convs: + net = conv(net) + return net + + +class PyramidFlowEstimator(nn.Module): + def __init__(self, filters=64, flow_convs=(3, 3, 3, 3), flow_filters=(32, 64, 128, 256), device=None, dtype=None, operations=ops): + super().__init__() + in_channels = filters << 1 + predictors = [] + for i in range(len(flow_convs)): + predictors.append(FlowEstimator(in_channels, flow_convs[i], flow_filters[i], device=device, dtype=dtype, operations=operations)) + in_channels += filters << (i + 2) + self._predictor = predictors[-1] + self._predictors = nn.ModuleList(predictors[:-1][::-1]) + + def forward(self, feature_pyramid_a, feature_pyramid_b, warp_fn): + levels = len(feature_pyramid_a) + v = self._predictor(feature_pyramid_a[-1], feature_pyramid_b[-1]) + residuals = [v] + # Coarse-to-fine: shared predictor for deep levels, then specialized predictors for fine levels + steps = [(i, self._predictor) for i in range(levels - 2, len(self._predictors) - 1, -1)] + steps += [(len(self._predictors) - 1 - k, p) for k, p in enumerate(self._predictors)] + for i, predictor in steps: + v = F.interpolate(v, size=feature_pyramid_a[i].shape[2:4], mode="bilinear").mul_(2) + v_residual = predictor(feature_pyramid_a[i], warp_fn(feature_pyramid_b[i], v)) + residuals.append(v_residual) + v = v.add_(v_residual) + residuals.reverse() + return residuals + + +def _get_fusion_channels(level, filters): + # Per direction: multi-scale features + RGB image (3ch) + flow (2ch), doubled for both directions + return (sum(filters << i for i in range(level)) + 3 + 2) * 2 + + +class Fusion(nn.Module): + def __init__(self, n_layers=4, specialized_layers=3, filters=64, device=None, dtype=None, operations=ops): + super().__init__() + self.output_conv = operations.Conv2d(filters, 3, kernel_size=1, device=device, dtype=dtype) + self.convs = nn.ModuleList() + in_channels = _get_fusion_channels(n_layers, filters) + increase = 0 + for i in range(n_layers)[::-1]: + num_filters = (filters << i) if i < specialized_layers else (filters << specialized_layers) + self.convs.append(nn.ModuleList([ + FilmConv2d(in_channels, num_filters, 2, activation=False, device=device, dtype=dtype, operations=operations), + FilmConv2d(in_channels + (increase or num_filters), num_filters, 3, device=device, dtype=dtype, operations=operations), + FilmConv2d(num_filters, num_filters, 3, device=device, dtype=dtype, operations=operations)])) + in_channels = num_filters + increase = _get_fusion_channels(i, filters) - num_filters // 2 + + def forward(self, pyramid): + net = pyramid[-1] + for k, layers in enumerate(self.convs): + i = len(self.convs) - 1 - k + net = layers[0](F.interpolate(net, size=pyramid[i].shape[2:4], mode="nearest")) + net = layers[2](layers[1](torch.cat([pyramid[i], net], dim=1))) + return self.output_conv(net) + + +class FILMNet(nn.Module): + def __init__(self, pyramid_levels=7, fusion_pyramid_levels=5, specialized_levels=3, sub_levels=4, + filters=64, flow_convs=(3, 3, 3, 3), flow_filters=(32, 64, 128, 256), device=None, dtype=None, operations=ops): + super().__init__() + self.pyramid_levels = pyramid_levels + self.fusion_pyramid_levels = fusion_pyramid_levels + self.extract = FeatureExtractor(3, filters, sub_levels, device=device, dtype=dtype, operations=operations) + self.predict_flow = PyramidFlowEstimator(filters, flow_convs, flow_filters, device=device, dtype=dtype, operations=operations) + self.fuse = Fusion(sub_levels, specialized_levels, filters, device=device, dtype=dtype, operations=operations) + self._warp_grids = {} + + def get_dtype(self): + return self.extract.extract_sublevels.convs[0][0].conv.weight.dtype + + def memory_used_forward(self, shape, dtype): + return 1700 * shape[1] * shape[2] * dtype.itemsize + + def _build_warp_grids(self, H, W, device): + """Pre-compute warp grids for all pyramid levels.""" + if (H, W) in self._warp_grids: + return + self._warp_grids = {} # clear old resolution grids to prevent memory leaks + for _ in range(self.pyramid_levels): + self._warp_grids[(H, W)] = ( + torch.linspace(-(1 - 1 / W), 1 - 1 / W, W, dtype=torch.float32, device=device), + torch.linspace(-(1 - 1 / H), 1 - 1 / H, H, dtype=torch.float32, device=device), + ) + H, W = H // 2, W // 2 + + def warp(self, image, flow): + grid_x, grid_y = self._warp_grids[(flow.shape[2], flow.shape[3])] + return _warp_core(image, flow, grid_x, grid_y) + + def extract_features(self, img): + """Extract image and feature pyramids for a single frame. Can be cached across pairs.""" + image_pyramid = build_image_pyramid(img, self.pyramid_levels) + feature_pyramid = self.extract(image_pyramid) + return image_pyramid, feature_pyramid + + def forward(self, img0, img1, timestep=0.5, cache=None): + # FILM uses a scalar timestep per batch element (spatially-varying timesteps not supported) + t = timestep.mean(dim=(1, 2, 3)).item() if isinstance(timestep, torch.Tensor) else timestep + return self.forward_multi_timestep(img0, img1, [t], cache=cache) + + def forward_multi_timestep(self, img0, img1, timesteps, cache=None): + """Compute flow once, synthesize at multiple timesteps. Expects batch=1 inputs.""" + self._build_warp_grids(img0.shape[2], img0.shape[3], img0.device) + + image_pyr0, feat_pyr0 = cache["img0"] if cache and "img0" in cache else self.extract_features(img0) + image_pyr1, feat_pyr1 = cache["img1"] if cache and "img1" in cache else self.extract_features(img1) + + fwd_flow = flow_pyramid_synthesis(self.predict_flow(feat_pyr0, feat_pyr1, self.warp))[:self.fusion_pyramid_levels] + bwd_flow = flow_pyramid_synthesis(self.predict_flow(feat_pyr1, feat_pyr0, self.warp))[:self.fusion_pyramid_levels] + + # Build warp targets and free full pyramids (only first fpl levels needed from here) + fpl = self.fusion_pyramid_levels + p2w = [concatenate_pyramids(image_pyr0[:fpl], feat_pyr0[:fpl]), + concatenate_pyramids(image_pyr1[:fpl], feat_pyr1[:fpl])] + del image_pyr0, image_pyr1, feat_pyr0, feat_pyr1 + + results = [] + dt_tensors = torch.tensor(timesteps, device=img0.device, dtype=img0.dtype) + for idx in range(len(timesteps)): + batch_dt = dt_tensors[idx:idx + 1] + bwd_scaled = multiply_pyramid(bwd_flow, batch_dt) + fwd_scaled = multiply_pyramid(fwd_flow, 1 - batch_dt) + fwd_warped = pyramid_warp(p2w[0], bwd_scaled, self.warp) + bwd_warped = pyramid_warp(p2w[1], fwd_scaled, self.warp) + aligned = [torch.cat([fw, bw, bf, ff], dim=1) + for fw, bw, bf, ff in zip(fwd_warped, bwd_warped, bwd_scaled, fwd_scaled)] + del fwd_warped, bwd_warped, bwd_scaled, fwd_scaled + results.append(self.fuse(aligned)) + del aligned + return torch.cat(results, dim=0) diff --git a/comfy_extras/frame_interpolation_models/ifnet.py b/comfy_extras/frame_interpolation_models/ifnet.py new file mode 100644 index 000000000..ad6edbec9 --- /dev/null +++ b/comfy_extras/frame_interpolation_models/ifnet.py @@ -0,0 +1,131 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import comfy.ops + +ops = comfy.ops.disable_weight_init + + +def _warp(img, flow, warp_grids): + B, _, H, W = img.shape + base_grid, flow_div = warp_grids[(H, W)] + flow_norm = torch.cat([flow[:, 0:1] / flow_div[0], flow[:, 1:2] / flow_div[1]], 1).float() + grid = (base_grid.expand(B, -1, -1, -1) + flow_norm).permute(0, 2, 3, 1) + return F.grid_sample(img.float(), grid, mode="bilinear", padding_mode="border", align_corners=True).to(img.dtype) + + +class Head(nn.Module): + def __init__(self, out_ch=4, device=None, dtype=None, operations=ops): + super().__init__() + self.cnn0 = operations.Conv2d(3, 16, 3, 2, 1, device=device, dtype=dtype) + self.cnn1 = operations.Conv2d(16, 16, 3, 1, 1, device=device, dtype=dtype) + self.cnn2 = operations.Conv2d(16, 16, 3, 1, 1, device=device, dtype=dtype) + self.cnn3 = operations.ConvTranspose2d(16, out_ch, 4, 2, 1, device=device, dtype=dtype) + self.relu = nn.LeakyReLU(0.2, True) + + def forward(self, x): + x = self.relu(self.cnn0(x)) + x = self.relu(self.cnn1(x)) + x = self.relu(self.cnn2(x)) + return self.cnn3(x) + + +class ResConv(nn.Module): + def __init__(self, c, device=None, dtype=None, operations=ops): + super().__init__() + self.conv = operations.Conv2d(c, c, 3, 1, 1, device=device, dtype=dtype) + self.beta = nn.Parameter(torch.ones((1, c, 1, 1), device=device, dtype=dtype)) + self.relu = nn.LeakyReLU(0.2, True) + + def forward(self, x): + return self.relu(torch.addcmul(x, self.conv(x), self.beta)) + + +class IFBlock(nn.Module): + def __init__(self, in_planes, c=64, device=None, dtype=None, operations=ops): + super().__init__() + self.conv0 = nn.Sequential( + nn.Sequential(operations.Conv2d(in_planes, c // 2, 3, 2, 1, device=device, dtype=dtype), nn.LeakyReLU(0.2, True)), + nn.Sequential(operations.Conv2d(c // 2, c, 3, 2, 1, device=device, dtype=dtype), nn.LeakyReLU(0.2, True))) + self.convblock = nn.Sequential(*(ResConv(c, device=device, dtype=dtype, operations=operations) for _ in range(8))) + self.lastconv = nn.Sequential(operations.ConvTranspose2d(c, 4 * 13, 4, 2, 1, device=device, dtype=dtype), nn.PixelShuffle(2)) + + def forward(self, x, flow=None, scale=1): + x = F.interpolate(x, scale_factor=1.0 / scale, mode="bilinear") + if flow is not None: + flow = F.interpolate(flow, scale_factor=1.0 / scale, mode="bilinear").div_(scale) + x = torch.cat((x, flow), 1) + feat = self.convblock(self.conv0(x)) + tmp = F.interpolate(self.lastconv(feat), scale_factor=scale, mode="bilinear") + return tmp[:, :4] * scale, tmp[:, 4:5], tmp[:, 5:] + + +class IFNet(nn.Module): + def __init__(self, head_ch=4, channels=(192, 128, 96, 64, 32), device=None, dtype=None, operations=ops): + super().__init__() + self.encode = Head(out_ch=head_ch, device=device, dtype=dtype, operations=operations) + block_in = [7 + 2 * head_ch] + [8 + 4 + 8 + 2 * head_ch] * 4 + self.blocks = nn.ModuleList([IFBlock(block_in[i], channels[i], device=device, dtype=dtype, operations=operations) for i in range(5)]) + self.scale_list = [16, 8, 4, 2, 1] + self.pad_align = 64 + self._warp_grids = {} + + def get_dtype(self): + return self.encode.cnn0.weight.dtype + + def memory_used_forward(self, shape, dtype): + return 300 * shape[1] * shape[2] * dtype.itemsize + + def _build_warp_grids(self, H, W, device): + if (H, W) in self._warp_grids: + return + self._warp_grids = {} # clear old resolution grids to prevent memory leaks + grid_y, grid_x = torch.meshgrid( + torch.linspace(-1.0, 1.0, H, device=device, dtype=torch.float32), + torch.linspace(-1.0, 1.0, W, device=device, dtype=torch.float32), indexing="ij") + self._warp_grids[(H, W)] = ( + torch.stack((grid_x, grid_y), dim=0).unsqueeze(0), + torch.tensor([(W - 1.0) / 2.0, (H - 1.0) / 2.0], dtype=torch.float32, device=device)) + + def warp(self, img, flow): + return _warp(img, flow, self._warp_grids) + + def extract_features(self, img): + """Extract head features for a single frame. Can be cached across pairs.""" + return self.encode(img) + + def forward(self, img0, img1, timestep=0.5, cache=None): + if not isinstance(timestep, torch.Tensor): + timestep = torch.full((img0.shape[0], 1, img0.shape[2], img0.shape[3]), timestep, device=img0.device, dtype=img0.dtype) + + self._build_warp_grids(img0.shape[2], img0.shape[3], img0.device) + + B = img0.shape[0] + f0 = cache["img0"].expand(B, -1, -1, -1) if cache and "img0" in cache else self.encode(img0) + f1 = cache["img1"].expand(B, -1, -1, -1) if cache and "img1" in cache else self.encode(img1) + flow = mask = feat = None + warped_img0, warped_img1 = img0, img1 + for i, block in enumerate(self.blocks): + if flow is None: + flow, mask, feat = block(torch.cat((img0, img1, f0, f1, timestep), 1), None, scale=self.scale_list[i]) + else: + fd, mask, feat = block( + torch.cat((warped_img0, warped_img1, self.warp(f0, flow[:, :2]), self.warp(f1, flow[:, 2:4]), timestep, mask, feat), 1), + flow, scale=self.scale_list[i]) + flow = flow.add_(fd) + warped_img0 = self.warp(img0, flow[:, :2]) + warped_img1 = self.warp(img1, flow[:, 2:4]) + return torch.lerp(warped_img1, warped_img0, torch.sigmoid(mask)) + + +def detect_rife_config(state_dict): + head_ch = state_dict["encode.cnn3.weight"].shape[1] # ConvTranspose2d: (in_ch, out_ch, kH, kW) + channels = [] + for i in range(5): + key = f"blocks.{i}.conv0.1.0.weight" + if key in state_dict: + channels.append(state_dict[key].shape[0]) + if len(channels) != 5: + raise ValueError(f"Unsupported RIFE model: expected 5 blocks, found {len(channels)}") + return head_ch, channels diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py index 9cf84ab4d..affcf3b71 100644 --- a/comfy_extras/nodes_ace.py +++ b/comfy_extras/nodes_ace.py @@ -3,136 +3,136 @@ from typing_extensions import override import comfy.model_management import node_helpers -from comfy_api.latest import ComfyExtension, io +from comfy_api.latest import ComfyExtension, IO -class TextEncodeAceStepAudio(io.ComfyNode): +class TextEncodeAceStepAudio(IO.ComfyNode): @classmethod def define_schema(cls): - return io.Schema( + return IO.Schema( node_id="TextEncodeAceStepAudio", category="conditioning", inputs=[ - io.Clip.Input("clip"), - io.String.Input("tags", multiline=True, dynamic_prompts=True), - io.String.Input("lyrics", multiline=True, dynamic_prompts=True), - io.Float.Input("lyrics_strength", default=1.0, min=0.0, max=10.0, step=0.01), + IO.Clip.Input("clip"), + IO.String.Input("tags", multiline=True, dynamic_prompts=True), + IO.String.Input("lyrics", multiline=True, dynamic_prompts=True), + IO.Float.Input("lyrics_strength", default=1.0, min=0.0, max=10.0, step=0.01), ], - outputs=[io.Conditioning.Output()], + outputs=[IO.Conditioning.Output()], ) @classmethod - def execute(cls, clip, tags, lyrics, lyrics_strength) -> io.NodeOutput: + def execute(cls, clip, tags, lyrics, lyrics_strength) -> IO.NodeOutput: tokens = clip.tokenize(tags, lyrics=lyrics) conditioning = clip.encode_from_tokens_scheduled(tokens) conditioning = node_helpers.conditioning_set_values(conditioning, {"lyrics_strength": lyrics_strength}) - return io.NodeOutput(conditioning) + return IO.NodeOutput(conditioning) -class TextEncodeAceStepAudio15(io.ComfyNode): +class TextEncodeAceStepAudio15(IO.ComfyNode): @classmethod def define_schema(cls): - return io.Schema( + return IO.Schema( node_id="TextEncodeAceStepAudio1.5", category="conditioning", inputs=[ - io.Clip.Input("clip"), - io.String.Input("tags", multiline=True, dynamic_prompts=True), - io.String.Input("lyrics", multiline=True, dynamic_prompts=True), - io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True), - io.Int.Input("bpm", default=120, min=10, max=300), - io.Float.Input("duration", default=120.0, min=0.0, max=2000.0, step=0.1), - io.Combo.Input("timesignature", options=['2', '3', '4', '6']), - io.Combo.Input("language", options=["en", "ja", "zh", "es", "de", "fr", "pt", "ru", "it", "nl", "pl", "tr", "vi", "cs", "fa", "id", "ko", "uk", "hu", "ar", "sv", "ro", "el"]), - io.Combo.Input("keyscale", options=[f"{root} {quality}" for quality in ["major", "minor"] for root in ["C", "C#", "Db", "D", "D#", "Eb", "E", "F", "F#", "Gb", "G", "G#", "Ab", "A", "A#", "Bb", "B"]]), - io.Boolean.Input("generate_audio_codes", default=True, tooltip="Enable the LLM that generates audio codes. This can be slow but will increase the quality of the generated audio. Turn this off if you are giving the model an audio reference.", advanced=True), - io.Float.Input("cfg_scale", default=2.0, min=0.0, max=100.0, step=0.1, advanced=True), - io.Float.Input("temperature", default=0.85, min=0.0, max=2.0, step=0.01, advanced=True), - io.Float.Input("top_p", default=0.9, min=0.0, max=2000.0, step=0.01, advanced=True), - io.Int.Input("top_k", default=0, min=0, max=100, advanced=True), - io.Float.Input("min_p", default=0.000, min=0.0, max=1.0, step=0.001, advanced=True), + IO.Clip.Input("clip"), + IO.String.Input("tags", multiline=True, dynamic_prompts=True), + IO.String.Input("lyrics", multiline=True, dynamic_prompts=True), + IO.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True), + IO.Int.Input("bpm", default=120, min=10, max=300), + IO.Float.Input("duration", default=120.0, min=0.0, max=2000.0, step=0.1), + IO.Combo.Input("timesignature", options=['2', '3', '4', '6']), + IO.Combo.Input("language", options=['ar', 'az', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'he', 'hi', 'hr', 'ht', 'hu', 'id', 'is', 'it', 'ja', 'ko', 'la', 'lt', 'ms', 'ne', 'nl', 'no', 'pa', 'pl', 'pt', 'ro', 'ru', 'sa', 'sk', 'sr', 'sv', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'yue', 'zh', 'unknown'], default='en'), + IO.Combo.Input("keyscale", options=[f"{root} {quality}" for quality in ["major", "minor"] for root in ["C", "C#", "Db", "D", "D#", "Eb", "E", "F", "F#", "Gb", "G", "G#", "Ab", "A", "A#", "Bb", "B"]]), + IO.Boolean.Input("generate_audio_codes", default=True, tooltip="Enable the LLM that generates audio codes. This can be slow but will increase the quality of the generated audio. Turn this off if you are giving the model an audio reference.", advanced=True), + IO.Float.Input("cfg_scale", default=2.0, min=0.0, max=100.0, step=0.1, advanced=True), + IO.Float.Input("temperature", default=0.85, min=0.0, max=2.0, step=0.01, advanced=True), + IO.Float.Input("top_p", default=0.9, min=0.0, max=2000.0, step=0.01, advanced=True), + IO.Int.Input("top_k", default=0, min=0, max=100, advanced=True), + IO.Float.Input("min_p", default=0.000, min=0.0, max=1.0, step=0.001, advanced=True), ], - outputs=[io.Conditioning.Output()], + outputs=[IO.Conditioning.Output()], ) @classmethod - def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k, min_p) -> io.NodeOutput: + def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k, min_p) -> IO.NodeOutput: tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p) conditioning = clip.encode_from_tokens_scheduled(tokens) - return io.NodeOutput(conditioning) + return IO.NodeOutput(conditioning) -class EmptyAceStepLatentAudio(io.ComfyNode): +class EmptyAceStepLatentAudio(IO.ComfyNode): @classmethod def define_schema(cls): - return io.Schema( + return IO.Schema( node_id="EmptyAceStepLatentAudio", display_name="Empty Ace Step 1.0 Latent Audio", category="latent/audio", inputs=[ - io.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1), - io.Int.Input( + IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1), + IO.Int.Input( "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch." ), ], - outputs=[io.Latent.Output()], + outputs=[IO.Latent.Output()], ) @classmethod - def execute(cls, seconds, batch_size) -> io.NodeOutput: + def execute(cls, seconds, batch_size) -> IO.NodeOutput: length = int(seconds * 44100 / 512 / 8) - latent = torch.zeros([batch_size, 8, 16, length], device=comfy.model_management.intermediate_device()) - return io.NodeOutput({"samples": latent, "type": "audio"}) + latent = torch.zeros([batch_size, 8, 16, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) + return IO.NodeOutput({"samples": latent, "type": "audio"}) -class EmptyAceStep15LatentAudio(io.ComfyNode): +class EmptyAceStep15LatentAudio(IO.ComfyNode): @classmethod def define_schema(cls): - return io.Schema( + return IO.Schema( node_id="EmptyAceStep1.5LatentAudio", display_name="Empty Ace Step 1.5 Latent Audio", category="latent/audio", inputs=[ - io.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01), - io.Int.Input( + IO.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01), + IO.Int.Input( "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch." ), ], - outputs=[io.Latent.Output()], + outputs=[IO.Latent.Output()], ) @classmethod - def execute(cls, seconds, batch_size) -> io.NodeOutput: + def execute(cls, seconds, batch_size) -> IO.NodeOutput: length = round((seconds * 48000 / 1920)) - latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device()) - return io.NodeOutput({"samples": latent, "type": "audio"}) + latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) + return IO.NodeOutput({"samples": latent, "type": "audio"}) -class ReferenceAudio(io.ComfyNode): +class ReferenceAudio(IO.ComfyNode): @classmethod def define_schema(cls): - return io.Schema( + return IO.Schema( node_id="ReferenceTimbreAudio", display_name="Reference Audio", category="advanced/conditioning/audio", is_experimental=True, description="This node sets the reference audio for ace step 1.5", inputs=[ - io.Conditioning.Input("conditioning"), - io.Latent.Input("latent", optional=True), + IO.Conditioning.Input("conditioning"), + IO.Latent.Input("latent", optional=True), ], outputs=[ - io.Conditioning.Output(), + IO.Conditioning.Output(), ] ) @classmethod - def execute(cls, conditioning, latent=None) -> io.NodeOutput: + def execute(cls, conditioning, latent=None) -> IO.NodeOutput: if latent is not None: conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_audio_timbre_latents": [latent["samples"]]}, append=True) - return io.NodeOutput(conditioning) + return IO.NodeOutput(conditioning) class AceExtension(ComfyExtension): @override - async def get_node_list(self) -> list[type[io.ComfyNode]]: + async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ TextEncodeAceStepAudio, EmptyAceStepLatentAudio, diff --git a/comfy_extras/nodes_advanced_samplers.py b/comfy_extras/nodes_advanced_samplers.py index 7f716cd76..567c37be0 100644 --- a/comfy_extras/nodes_advanced_samplers.py +++ b/comfy_extras/nodes_advanced_samplers.py @@ -86,13 +86,44 @@ def sample_euler_pp(model, x, sigmas, extra_args=None, callback=None, disable=No return x +class SamplerLCM(io.ComfyNode): + @classmethod + def define_schema(cls) -> io.Schema: + return io.Schema( + node_id="SamplerLCM", + category="sampling/samplers", + description=("LCM sampler with tunable per-step noise. s_noise is a multiplier on the model's training noise scale"), + inputs=[ + io.Float.Input("s_noise", default=1.0, min=0.0, max=64.0, step=0.01, + tooltip="Per-step noise multiplier at the first step (1.0 = match training)."), + io.Float.Input("s_noise_end", default=1.0, min=0.0, max=64.0, step=0.01, + tooltip="Per-step noise multiplier at the last step. Set equal to s_noise for a constant schedule."), + io.Float.Input("noise_clip_std", default=0.0, min=0.0, max=10.0, step=0.01, + tooltip="Clamp per-step noise to +/- N*std. 0 disables."), + ], + outputs=[io.Sampler.Output()], + ) + + @classmethod + def execute(cls, s_noise, s_noise_end, noise_clip_std) -> io.NodeOutput: + sampler = comfy.samplers.ksampler( + "lcm", + { + "s_noise": float(s_noise), + "s_noise_end": float(s_noise_end), + "noise_clip_std": float(noise_clip_std), + }, + ) + return io.NodeOutput(sampler) + + class SamplerEulerCFGpp(io.ComfyNode): @classmethod def define_schema(cls) -> io.Schema: return io.Schema( node_id="SamplerEulerCFGpp", display_name="SamplerEulerCFG++", - category="_for_testing", # "sampling/custom_sampling/samplers" + category="experimental", # "sampling/custom_sampling/samplers" inputs=[ io.Combo.Input("version", options=["regular", "alternative"], advanced=True), ], @@ -114,6 +145,7 @@ class AdvancedSamplersExtension(ComfyExtension): async def get_node_list(self) -> list[type[io.ComfyNode]]: return [ SamplerLCMUpscale, + SamplerLCM, SamplerEulerCFGpp, ] diff --git a/comfy_extras/nodes_ar_video.py b/comfy_extras/nodes_ar_video.py new file mode 100644 index 000000000..b36588b14 --- /dev/null +++ b/comfy_extras/nodes_ar_video.py @@ -0,0 +1,136 @@ +""" +ComfyUI nodes for autoregressive video generation (Causal Forcing, Self-Forcing, etc.). + - EmptyARVideoLatent: create 5D [B, C, T, H, W] video latent tensors + - SamplerARVideo: SAMPLER for the block-by-block autoregressive denoising loop + - ARVideoI2V: image-to-video conditioning for AR models (seeds KV cache with start image) +""" + +import torch +from typing_extensions import override + +import comfy.model_management +import comfy.samplers +import comfy.utils +from comfy_api.latest import ComfyExtension, io + + +class EmptyARVideoLatent(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="EmptyARVideoLatent", + category="latent/video", + inputs=[ + io.Int.Input("width", default=832, min=16, max=8192, step=16), + io.Int.Input("height", default=480, min=16, max=8192, step=16), + io.Int.Input("length", default=81, min=1, max=1024, step=4), + io.Int.Input("batch_size", default=1, min=1, max=64), + ], + outputs=[ + io.Latent.Output(display_name="LATENT"), + ], + ) + + @classmethod + def execute(cls, width, height, length, batch_size) -> io.NodeOutput: + lat_t = ((length - 1) // 4) + 1 + latent = torch.zeros( + [batch_size, 16, lat_t, height // 8, width // 8], + device=comfy.model_management.intermediate_device(), + ) + return io.NodeOutput({"samples": latent}) + + +class SamplerARVideo(io.ComfyNode): + """Sampler for autoregressive video models (Causal Forcing, Self-Forcing). + + All AR-loop parameters are owned by this node so they live in the workflow. + Add new widgets here as the AR sampler grows new options. + """ + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="SamplerARVideo", + display_name="Sampler AR Video", + category="sampling/custom_sampling/samplers", + inputs=[ + io.Int.Input( + "num_frame_per_block", + default=1, min=1, max=64, + tooltip="Frames per autoregressive block. 1 = framewise, " + "3 = chunkwise. Must match the checkpoint's training mode.", + ), + ], + outputs=[io.Sampler.Output()], + ) + + @classmethod + def execute(cls, num_frame_per_block) -> io.NodeOutput: + extra_options = { + "num_frame_per_block": num_frame_per_block, + } + return io.NodeOutput(comfy.samplers.ksampler("ar_video", extra_options)) + + +class ARVideoI2V(io.ComfyNode): + """Image-to-video setup for AR video models (Causal Forcing, Self-Forcing). + + VAE-encodes the start image and stores it in the model's transformer_options + so that sample_ar_video can seed the KV cache before denoising. + Uses the same T2V model checkpoint -- no separate I2V architecture needed. + """ + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="ARVideoI2V", + category="conditioning/video_models", + inputs=[ + io.Model.Input("model"), + io.Vae.Input("vae"), + io.Image.Input("start_image"), + io.Int.Input("width", default=832, min=16, max=8192, step=16), + io.Int.Input("height", default=480, min=16, max=8192, step=16), + io.Int.Input("length", default=81, min=1, max=1024, step=4), + io.Int.Input("batch_size", default=1, min=1, max=64), + ], + outputs=[ + io.Model.Output(display_name="MODEL"), + io.Latent.Output(display_name="LATENT"), + ], + ) + + @classmethod + def execute(cls, model, vae, start_image, width, height, length, batch_size) -> io.NodeOutput: + start_image = comfy.utils.common_upscale( + start_image[:1].movedim(-1, 1), width, height, "bilinear", "center" + ).movedim(1, -1) + + initial_latent = vae.encode(start_image[:, :, :, :3]) + + m = model.clone() + to = m.model_options.setdefault("transformer_options", {}) + ar_cfg = to.setdefault("ar_config", {}) + ar_cfg["initial_latent"] = initial_latent + + lat_t = ((length - 1) // 4) + 1 + latent = torch.zeros( + [batch_size, 16, lat_t, height // 8, width // 8], + device=comfy.model_management.intermediate_device(), + ) + return io.NodeOutput(m, {"samples": latent}) + + +class ARVideoExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + EmptyARVideoLatent, + SamplerARVideo, + ARVideoI2V, + ] + + +async def comfy_entrypoint() -> ARVideoExtension: + return ARVideoExtension() diff --git a/comfy_extras/nodes_attention_multiply.py b/comfy_extras/nodes_attention_multiply.py index 060a5c9be..f4ee6a689 100644 --- a/comfy_extras/nodes_attention_multiply.py +++ b/comfy_extras/nodes_attention_multiply.py @@ -25,7 +25,7 @@ class UNetSelfAttentionMultiply(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="UNetSelfAttentionMultiply", - category="_for_testing/attention_experiments", + category="experimental/attention_experiments", inputs=[ io.Model.Input("model"), io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), @@ -48,7 +48,7 @@ class UNetCrossAttentionMultiply(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="UNetCrossAttentionMultiply", - category="_for_testing/attention_experiments", + category="experimental/attention_experiments", inputs=[ io.Model.Input("model"), io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), @@ -72,7 +72,7 @@ class CLIPAttentionMultiply(io.ComfyNode): return io.Schema( node_id="CLIPAttentionMultiply", search_aliases=["clip attention scale", "text encoder attention"], - category="_for_testing/attention_experiments", + category="experimental/attention_experiments", inputs=[ io.Clip.Input("clip"), io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), @@ -106,7 +106,7 @@ class UNetTemporalAttentionMultiply(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="UNetTemporalAttentionMultiply", - category="_for_testing/attention_experiments", + category="experimental/attention_experiments", inputs=[ io.Model.Input("model"), io.Float.Input("self_structural", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index a395392d8..fcc1c34d5 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -82,6 +82,8 @@ class VAEEncodeAudio(IO.ComfyNode): @classmethod def execute(cls, vae, audio) -> IO.NodeOutput: + if audio is None: + raise ValueError("VAEEncodeAudio: input audio is None (source video may have no audio track).") sample_rate = audio["sample_rate"] vae_sample_rate = getattr(vae, "audio_sample_rate", 44100) if vae_sample_rate != sample_rate: @@ -104,7 +106,7 @@ def vae_decode_audio(vae, samples, tile=None, overlap=None): std = torch.std(audio, dim=[1, 2], keepdim=True) * 5.0 std[std < 1.0] = 1.0 audio /= std - vae_sample_rate = getattr(vae, "audio_sample_rate", 44100) + vae_sample_rate = getattr(vae, "audio_sample_rate_output", getattr(vae, "audio_sample_rate", 44100)) return {"waveform": audio, "sample_rate": vae_sample_rate if "sample_rate" not in samples else samples["sample_rate"]} @@ -171,6 +173,8 @@ class SaveAudio(IO.ComfyNode): @classmethod def execute(cls, audio, filename_prefix="ComfyUI", format="flac") -> IO.NodeOutput: + if audio is None: + raise ValueError("SaveAudio: input audio is None (source video may have no audio track).") return IO.NodeOutput( ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=format) ) @@ -198,6 +202,8 @@ class SaveAudioMP3(IO.ComfyNode): @classmethod def execute(cls, audio, filename_prefix="ComfyUI", format="mp3", quality="128k") -> IO.NodeOutput: + if audio is None: + raise ValueError("SaveAudioMP3: input audio is None (source video may have no audio track).") return IO.NodeOutput( ui=UI.AudioSaveHelper.get_save_audio_ui( audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality @@ -226,6 +232,8 @@ class SaveAudioOpus(IO.ComfyNode): @classmethod def execute(cls, audio, filename_prefix="ComfyUI", format="opus", quality="V3") -> IO.NodeOutput: + if audio is None: + raise ValueError("SaveAudioOpus: input audio is None (source video may have no audio track).") return IO.NodeOutput( ui=UI.AudioSaveHelper.get_save_audio_ui( audio, filename_prefix=filename_prefix, cls=cls, format=format, quality=quality @@ -252,6 +260,8 @@ class PreviewAudio(IO.ComfyNode): @classmethod def execute(cls, audio) -> IO.NodeOutput: + if audio is None: + raise ValueError("PreviewAudio: input audio is None (source video may have no audio track).") return IO.NodeOutput(ui=UI.PreviewAudio(audio, cls=cls)) save_flac = execute # TODO: remove @@ -297,6 +307,7 @@ class LoadAudio(IO.ComfyNode): @classmethod def define_schema(cls): input_dir = folder_paths.get_input_directory() + os.makedirs(input_dir, exist_ok=True) files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"]) return IO.Schema( node_id="LoadAudio", @@ -391,21 +402,26 @@ class TrimAudioDuration(IO.ComfyNode): @classmethod def execute(cls, audio, start_index, duration) -> IO.NodeOutput: + if audio is None: + return IO.NodeOutput(None) waveform = audio["waveform"] sample_rate = audio["sample_rate"] audio_length = waveform.shape[-1] + if audio_length == 0: + return IO.NodeOutput(audio) + if start_index < 0: start_frame = audio_length + int(round(start_index * sample_rate)) else: start_frame = int(round(start_index * sample_rate)) - start_frame = max(0, min(start_frame, audio_length - 1)) + start_frame = max(0, min(start_frame, audio_length)) end_frame = start_frame + int(round(duration * sample_rate)) end_frame = max(0, min(end_frame, audio_length)) if start_frame >= end_frame: - raise ValueError("AudioTrim: Start time must be less than end time and be within the audio length.") + raise ValueError("TrimAudioDuration: Start time must be less than end time and be within the audio length.") return IO.NodeOutput({"waveform": waveform[..., start_frame:end_frame], "sample_rate": sample_rate}) @@ -432,11 +448,13 @@ class SplitAudioChannels(IO.ComfyNode): @classmethod def execute(cls, audio) -> IO.NodeOutput: + if audio is None: + return IO.NodeOutput(None, None) waveform = audio["waveform"] sample_rate = audio["sample_rate"] if waveform.shape[1] != 2: - raise ValueError("AudioSplit: Input audio has only one channel.") + raise ValueError(f"AudioSplit: Input audio must be stereo (2 channels), got {waveform.shape[1]} channel(s).") left_channel = waveform[..., 0:1, :] right_channel = waveform[..., 1:2, :] @@ -464,6 +482,12 @@ class JoinAudioChannels(IO.ComfyNode): @classmethod def execute(cls, audio_left, audio_right) -> IO.NodeOutput: + if audio_left is None and audio_right is None: + return IO.NodeOutput(None) + if audio_left is None: + return IO.NodeOutput(audio_right) + if audio_right is None: + return IO.NodeOutput(audio_left) waveform_left = audio_left["waveform"] sample_rate_left = audio_left["sample_rate"] waveform_right = audio_right["waveform"] @@ -537,6 +561,12 @@ class AudioConcat(IO.ComfyNode): @classmethod def execute(cls, audio1, audio2, direction) -> IO.NodeOutput: + if audio1 is None and audio2 is None: + return IO.NodeOutput(None) + if audio1 is None: + return IO.NodeOutput(audio2) + if audio2 is None: + return IO.NodeOutput(audio1) waveform_1 = audio1["waveform"] waveform_2 = audio2["waveform"] sample_rate_1 = audio1["sample_rate"] @@ -584,6 +614,12 @@ class AudioMerge(IO.ComfyNode): @classmethod def execute(cls, audio1, audio2, merge_method) -> IO.NodeOutput: + if audio1 is None and audio2 is None: + return IO.NodeOutput(None) + if audio1 is None: + return IO.NodeOutput(audio2) + if audio2 is None: + return IO.NodeOutput(audio1) waveform_1 = audio1["waveform"] waveform_2 = audio2["waveform"] sample_rate_1 = audio1["sample_rate"] @@ -594,6 +630,9 @@ class AudioMerge(IO.ComfyNode): length_1 = waveform_1.shape[-1] length_2 = waveform_2.shape[-1] + if length_1 == 0 or length_2 == 0: + return IO.NodeOutput({"waveform": waveform_1, "sample_rate": output_sample_rate}) + if length_2 > length_1: logging.info(f"AudioMerge: Trimming audio2 from {length_2} to {length_1} samples to match audio1 length.") waveform_2 = waveform_2[..., :length_1] @@ -645,6 +684,8 @@ class AudioAdjustVolume(IO.ComfyNode): @classmethod def execute(cls, audio, volume) -> IO.NodeOutput: + if audio is None: + return IO.NodeOutput(None) if volume == 0: return IO.NodeOutput(audio) waveform = audio["waveform"] @@ -728,8 +769,14 @@ class AudioEqualizer3Band(IO.ComfyNode): @classmethod def execute(cls, audio, low_gain_dB, low_freq, mid_gain_dB, mid_freq, mid_q, high_gain_dB, high_freq) -> IO.NodeOutput: + if audio is None: + return IO.NodeOutput(None) waveform = audio["waveform"] sample_rate = audio["sample_rate"] + + if waveform.shape[-1] == 0: + return IO.NodeOutput(audio) + eq_waveform = waveform.clone() # 1. Apply Low Shelf (Bass) diff --git a/comfy_extras/nodes_audio_encoder.py b/comfy_extras/nodes_audio_encoder.py index 13aacd41a..6a85da89b 100644 --- a/comfy_extras/nodes_audio_encoder.py +++ b/comfy_extras/nodes_audio_encoder.py @@ -10,6 +10,7 @@ class AudioEncoderLoader(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="AudioEncoderLoader", + display_name="Load Audio Encoder", category="loaders", inputs=[ io.Combo.Input( diff --git a/comfy_extras/nodes_bg_removal.py b/comfy_extras/nodes_bg_removal.py new file mode 100644 index 000000000..8d046b8d4 --- /dev/null +++ b/comfy_extras/nodes_bg_removal.py @@ -0,0 +1,60 @@ +import folder_paths +from typing_extensions import override +from comfy_api.latest import ComfyExtension, IO +from comfy.bg_removal_model import load + + +class LoadBackgroundRemovalModel(IO.ComfyNode): + @classmethod + def define_schema(cls): + files = folder_paths.get_filename_list("background_removal") + return IO.Schema( + node_id="LoadBackgroundRemovalModel", + display_name="Load Background Removal Model", + category="loaders", + inputs=[ + IO.Combo.Input("bg_removal_name", options=sorted(files), tooltip="The model used to remove backgrounds from images"), + ], + outputs=[ + IO.BackgroundRemoval.Output("bg_model") + ] + ) + @classmethod + def execute(cls, bg_removal_name): + path = folder_paths.get_full_path_or_raise("background_removal", bg_removal_name) + bg = load(path) + if bg is None: + raise RuntimeError("ERROR: background model file is invalid and does not contain a valid background removal model.") + return IO.NodeOutput(bg) + +class RemoveBackground(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RemoveBackground", + display_name="Remove Background", + category="image/background removal", + inputs=[ + IO.Image.Input("image", tooltip="Input image to remove the background from"), + IO.BackgroundRemoval.Input("bg_removal_model", tooltip="Background removal model used to generate the mask") + ], + outputs=[ + IO.Mask.Output("mask", tooltip="Generated foreground mask") + ] + ) + @classmethod + def execute(cls, image, bg_removal_model): + mask = bg_removal_model.encode_image(image) + return IO.NodeOutput(mask) + +class BackgroundRemovalExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + LoadBackgroundRemovalModel, + RemoveBackground + ] + + +async def comfy_entrypoint() -> BackgroundRemovalExtension: + return BackgroundRemovalExtension() diff --git a/comfy_extras/nodes_camera_trajectory.py b/comfy_extras/nodes_camera_trajectory.py index e7efa29ba..34b78e81b 100644 --- a/comfy_extras/nodes_camera_trajectory.py +++ b/comfy_extras/nodes_camera_trajectory.py @@ -153,7 +153,7 @@ class WanCameraEmbedding(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="WanCameraEmbedding", - category="camera", + category="conditioning/video_models", inputs=[ io.Combo.Input( "camera_pose", diff --git a/comfy_extras/nodes_compositing.py b/comfy_extras/nodes_compositing.py index 3bc9fccb3..720efc629 100644 --- a/comfy_extras/nodes_compositing.py +++ b/comfy_extras/nodes_compositing.py @@ -202,14 +202,11 @@ class JoinImageWithAlpha(io.ComfyNode): @classmethod def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput: - batch_size = min(len(image), len(alpha)) - out_images = [] - - alpha = 1.0 - resize_mask(alpha, image.shape[1:]) - for i in range(batch_size): - out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2)) - - return io.NodeOutput(torch.stack(out_images)) + batch_size = max(len(image), len(alpha)) + alpha = 1.0 - resize_mask(alpha.to(image), image.shape[1:]) + alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size) + image = comfy.utils.repeat_to_batch_size(image, batch_size) + return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1)) class CompositingExtension(ComfyExtension): diff --git a/comfy_extras/nodes_cond.py b/comfy_extras/nodes_cond.py index 86426a780..b745a43af 100644 --- a/comfy_extras/nodes_cond.py +++ b/comfy_extras/nodes_cond.py @@ -8,7 +8,7 @@ class CLIPTextEncodeControlnet(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="CLIPTextEncodeControlnet", - category="_for_testing/conditioning", + category="experimental/conditioning", inputs=[ io.Clip.Input("clip"), io.Conditioning.Input("conditioning"), @@ -35,7 +35,7 @@ class T5TokenizerOptions(io.ComfyNode): def define_schema(cls) -> io.Schema: return io.Schema( node_id="T5TokenizerOptions", - category="_for_testing/conditioning", + category="experimental/conditioning", inputs=[ io.Clip.Input("clip"), io.Int.Input("min_padding", default=0, min=0, max=10000, step=1, advanced=True), diff --git a/comfy_extras/nodes_context_windows.py b/comfy_extras/nodes_context_windows.py index 0e43f2e44..f7ca833dc 100644 --- a/comfy_extras/nodes_context_windows.py +++ b/comfy_extras/nodes_context_windows.py @@ -10,7 +10,7 @@ class ContextWindowsManualNode(io.ComfyNode): return io.Schema( node_id="ContextWindowsManual", display_name="Context Windows (Manual)", - category="context", + category="model_patches", description="Manually set context windows.", inputs=[ io.Model.Input("model", tooltip="The model to apply context windows to during sampling."), @@ -29,6 +29,7 @@ class ContextWindowsManualNode(io.ComfyNode): io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."), io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."), io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."), + io.Boolean.Input("causal_window_fix", default=True, tooltip="Whether to add a causal fix frame to non-0-indexed context windows."), ], outputs=[ io.Model.Output(tooltip="The model with context windows applied during sampling."), @@ -38,7 +39,7 @@ class ContextWindowsManualNode(io.ComfyNode): @classmethod def execute(cls, model: io.Model.Type, context_length: int, context_overlap: int, context_schedule: str, context_stride: int, closed_loop: bool, fuse_method: str, dim: int, freenoise: bool, - cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False) -> io.Model: + cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False, causal_window_fix: bool=True) -> io.Model: model = model.clone() model.model_options["context_handler"] = comfy.context_windows.IndexListContextHandler( context_schedule=comfy.context_windows.get_matching_context_schedule(context_schedule), @@ -50,7 +51,8 @@ class ContextWindowsManualNode(io.ComfyNode): dim=dim, freenoise=freenoise, cond_retain_index_list=cond_retain_index_list, - split_conds_to_windows=split_conds_to_windows + split_conds_to_windows=split_conds_to_windows, + causal_window_fix=causal_window_fix, ) # make memory usage calculation only take into account the context window latents comfy.context_windows.create_prepare_sampling_wrapper(model) diff --git a/comfy_extras/nodes_curve.py b/comfy_extras/nodes_curve.py index 9016a84f9..9803e8034 100644 --- a/comfy_extras/nodes_curve.py +++ b/comfy_extras/nodes_curve.py @@ -1,5 +1,7 @@ from __future__ import annotations +import numpy as np + from comfy_api.latest import ComfyExtension, io from comfy_api.input import CurveInput from typing_extensions import override @@ -32,10 +34,58 @@ class CurveEditor(io.ComfyNode): return io.NodeOutput(result, ui=ui) if ui else io.NodeOutput(result) +class ImageHistogram(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="ImageHistogram", + display_name="Image Histogram", + category="utils", + inputs=[ + io.Image.Input("image"), + ], + outputs=[ + io.Histogram.Output("rgb"), + io.Histogram.Output("luminance"), + io.Histogram.Output("red"), + io.Histogram.Output("green"), + io.Histogram.Output("blue"), + ], + ) + + @classmethod + def execute(cls, image) -> io.NodeOutput: + img = image[0].cpu().numpy() + img_uint8 = np.clip(img * 255, 0, 255).astype(np.uint8) + + def bincount(data): + return np.bincount(data.ravel(), minlength=256)[:256] + + hist_r = bincount(img_uint8[:, :, 0]) + hist_g = bincount(img_uint8[:, :, 1]) + hist_b = bincount(img_uint8[:, :, 2]) + + # Average of R, G, B histograms (same as Photoshop's RGB composite) + rgb = ((hist_r + hist_g + hist_b) // 3).tolist() + + # ITU-R BT.709-6, Item 3.2 (p.6) — Derivation of luminance signal + # https://www.itu.int/rec/R-REC-BT.709-6-201506-I/en + lum = 0.2126 * img[:, :, 0] + 0.7152 * img[:, :, 1] + 0.0722 * img[:, :, 2] + luminance = bincount(np.clip(lum * 255, 0, 255).astype(np.uint8)).tolist() + + return io.NodeOutput( + rgb, + luminance, + hist_r.tolist(), + hist_g.tolist(), + hist_b.tolist(), + ) + + class CurveExtension(ComfyExtension): @override async def get_node_list(self): - return [CurveEditor] + return [CurveEditor, ImageHistogram] async def comfy_entrypoint(): diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 1e957c09b..c67145d2d 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -984,7 +984,7 @@ class AddNoise(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="AddNoise", - category="_for_testing/custom_sampling/noise", + category="experimental/custom_sampling/noise", is_experimental=True, inputs=[ io.Model.Input("model"), @@ -1034,7 +1034,7 @@ class ManualSigmas(io.ComfyNode): return io.Schema( node_id="ManualSigmas", search_aliases=["custom noise schedule", "define sigmas"], - category="_for_testing/custom_sampling", + category="experimental/custom_sampling", is_experimental=True, inputs=[ io.String.Input("sigmas", default="1, 0.5", multiline=False) diff --git a/comfy_extras/nodes_differential_diffusion.py b/comfy_extras/nodes_differential_diffusion.py index 34ffb9a89..4fa61ad0e 100644 --- a/comfy_extras/nodes_differential_diffusion.py +++ b/comfy_extras/nodes_differential_diffusion.py @@ -13,7 +13,7 @@ class DifferentialDiffusion(io.ComfyNode): node_id="DifferentialDiffusion", search_aliases=["inpaint gradient", "variable denoise strength"], display_name="Differential Diffusion", - category="_for_testing", + category="experimental", inputs=[ io.Model.Input("model"), io.Float.Input( diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py index 3a23c7d04..5e04a5f77 100644 --- a/comfy_extras/nodes_flux.py +++ b/comfy_extras/nodes_flux.py @@ -102,7 +102,7 @@ class FluxDisableGuidance(io.ComfyNode): append = execute # TODO: remove -PREFERED_KONTEXT_RESOLUTIONS = [ +PREFERRED_KONTEXT_RESOLUTIONS = [ (672, 1568), (688, 1504), (720, 1456), @@ -143,7 +143,7 @@ class FluxKontextImageScale(io.ComfyNode): width = image.shape[2] height = image.shape[1] aspect_ratio = width / height - _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS) + _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_KONTEXT_RESOLUTIONS) image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1) return io.NodeOutput(image) diff --git a/comfy_extras/nodes_frame_interpolation.py b/comfy_extras/nodes_frame_interpolation.py new file mode 100644 index 000000000..9dd34cfb8 --- /dev/null +++ b/comfy_extras/nodes_frame_interpolation.py @@ -0,0 +1,208 @@ +import torch +from tqdm import tqdm +from typing_extensions import override + +import comfy.model_patcher +import comfy.utils +import folder_paths +from comfy import model_management +from comfy_extras.frame_interpolation_models.ifnet import IFNet, detect_rife_config +from comfy_extras.frame_interpolation_models.film_net import FILMNet +from comfy_api.latest import ComfyExtension, io + +FrameInterpolationModel = io.Custom("INTERP_MODEL") + + +class FrameInterpolationModelLoader(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="FrameInterpolationModelLoader", + display_name="Load Frame Interpolation Model", + category="loaders", + inputs=[ + io.Combo.Input("model_name", options=folder_paths.get_filename_list("frame_interpolation"), + tooltip="Select a frame interpolation model to load. Models must be placed in the 'frame_interpolation' folder."), + ], + outputs=[ + FrameInterpolationModel.Output(), + ], + ) + + @classmethod + def execute(cls, model_name) -> io.NodeOutput: + model_path = folder_paths.get_full_path_or_raise("frame_interpolation", model_name) + sd = comfy.utils.load_torch_file(model_path, safe_load=True) + + model = cls._detect_and_load(sd) + dtype = torch.float16 if model_management.should_use_fp16(model_management.get_torch_device()) else torch.float32 + model.eval().to(dtype) + patcher = comfy.model_patcher.CoreModelPatcher( + model, + load_device=model_management.get_torch_device(), + offload_device=model_management.unet_offload_device(), + ) + return io.NodeOutput(patcher) + + @classmethod + def _detect_and_load(cls, sd): + # Try FILM + if "extract.extract_sublevels.convs.0.0.conv.weight" in sd: + model = FILMNet() + model.load_state_dict(sd) + return model + + # Try RIFE (needs key remapping for raw checkpoints) + sd = comfy.utils.state_dict_prefix_replace(sd, {"module.": "", "flownet.": ""}) + key_map = {} + for k in sd: + for i in range(5): + if k.startswith(f"block{i}."): + key_map[k] = f"blocks.{i}.{k[len(f'block{i}.'):]}" + if key_map: + sd = {key_map.get(k, k): v for k, v in sd.items()} + sd = {k: v for k, v in sd.items() if not k.startswith(("teacher.", "caltime."))} + + try: + head_ch, channels = detect_rife_config(sd) + except (KeyError, ValueError): + raise ValueError("Unrecognized frame interpolation model format") + model = IFNet(head_ch=head_ch, channels=channels) + model.load_state_dict(sd) + return model + + +class FrameInterpolate(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="FrameInterpolate", + display_name="Frame Interpolate", + category="video", + search_aliases=["rife", "film", "frame interpolation", "slow motion", "interpolate frames", "vfi"], + inputs=[ + FrameInterpolationModel.Input("interp_model"), + io.Image.Input("images"), + io.Int.Input("multiplier", default=2, min=2, max=16), + ], + outputs=[ + io.Image.Output(), + ], + ) + + @classmethod + def execute(cls, interp_model, images, multiplier) -> io.NodeOutput: + offload_device = model_management.intermediate_device() + + num_frames = images.shape[0] + if num_frames < 2 or multiplier < 2: + return io.NodeOutput(images) + + device = interp_model.load_device + dtype = interp_model.model_dtype() + inference_model = interp_model.model + activation_mem = inference_model.memory_used_forward(images.shape, dtype) + model_management.load_models_gpu([interp_model], memory_required=activation_mem) + align = getattr(inference_model, "pad_align", 1) + H, W = images.shape[1], images.shape[2] + + # Prepare a single padded frame on device for determining output dimensions + def prepare_frame(idx): + frame = images[idx:idx + 1].movedim(-1, 1).to(dtype=dtype, device=device) + if align > 1: + from comfy.ldm.common_dit import pad_to_patch_size + frame = pad_to_patch_size(frame, (align, align), padding_mode="reflect") + return frame + + # Count total interpolation passes for progress bar + total_pairs = num_frames - 1 + num_interp = multiplier - 1 + total_steps = total_pairs * num_interp + pbar = comfy.utils.ProgressBar(total_steps) + tqdm_bar = tqdm(total=total_steps, desc="Frame interpolation") + + batch = num_interp # reduced on OOM and persists across pairs (same resolution = same limit) + t_values = [t / multiplier for t in range(1, multiplier)] + + out_dtype = model_management.intermediate_dtype() + total_out_frames = total_pairs * multiplier + 1 + result = torch.empty((total_out_frames, 3, H, W), dtype=out_dtype, device=offload_device) + result[0] = images[0].movedim(-1, 0).to(out_dtype) + out_idx = 1 + + # Pre-compute timestep tensor on device (padded dimensions needed) + sample = prepare_frame(0) + pH, pW = sample.shape[2], sample.shape[3] + ts_full = torch.tensor(t_values, device=device, dtype=dtype).reshape(num_interp, 1, 1, 1) + ts_full = ts_full.expand(-1, 1, pH, pW) + del sample + + multi_fn = getattr(inference_model, "forward_multi_timestep", None) + feat_cache = {} + prev_frame = None + + try: + for i in range(total_pairs): + img0_single = prev_frame if prev_frame is not None else prepare_frame(i) + img1_single = prepare_frame(i + 1) + prev_frame = img1_single + + # Cache features: img1 of pair N becomes img0 of pair N+1 + feat_cache["img0"] = feat_cache.pop("next") if "next" in feat_cache else inference_model.extract_features(img0_single) + feat_cache["img1"] = inference_model.extract_features(img1_single) + feat_cache["next"] = feat_cache["img1"] + + used_multi = False + if multi_fn is not None: + # Models with timestep-independent flow can compute it once for all timesteps + try: + mids = multi_fn(img0_single, img1_single, t_values, cache=feat_cache) + result[out_idx:out_idx + num_interp] = mids[:, :, :H, :W].to(out_dtype) + out_idx += num_interp + pbar.update(num_interp) + tqdm_bar.update(num_interp) + used_multi = True + except model_management.OOM_EXCEPTION: + model_management.soft_empty_cache() + multi_fn = None # fall through to single-timestep path + + if not used_multi: + j = 0 + while j < num_interp: + b = min(batch, num_interp - j) + try: + img0 = img0_single.expand(b, -1, -1, -1) + img1 = img1_single.expand(b, -1, -1, -1) + mids = inference_model(img0, img1, timestep=ts_full[j:j + b], cache=feat_cache) + result[out_idx:out_idx + b] = mids[:, :, :H, :W].to(out_dtype) + out_idx += b + pbar.update(b) + tqdm_bar.update(b) + j += b + except model_management.OOM_EXCEPTION: + if batch <= 1: + raise + batch = max(1, batch // 2) + model_management.soft_empty_cache() + + result[out_idx] = images[i + 1].movedim(-1, 0).to(out_dtype) + out_idx += 1 + finally: + tqdm_bar.close() + + # BCHW -> BHWC + result = result.movedim(1, -1).clamp_(0.0, 1.0) + return io.NodeOutput(result) + + +class FrameInterpolationExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + FrameInterpolationModelLoader, + FrameInterpolate, + ] + + +async def comfy_entrypoint() -> FrameInterpolationExtension: + return FrameInterpolationExtension() diff --git a/comfy_extras/nodes_fresca.py b/comfy_extras/nodes_fresca.py index eab4f303f..173f42154 100644 --- a/comfy_extras/nodes_fresca.py +++ b/comfy_extras/nodes_fresca.py @@ -60,7 +60,7 @@ class FreSca(io.ComfyNode): node_id="FreSca", search_aliases=["frequency guidance"], display_name="FreSca", - category="_for_testing", + category="experimental", description="Applies frequency-dependent scaling to the guidance", inputs=[ io.Model.Input("model"), diff --git a/comfy_extras/nodes_hidream_o1.py b/comfy_extras/nodes_hidream_o1.py new file mode 100644 index 000000000..f393745f6 --- /dev/null +++ b/comfy_extras/nodes_hidream_o1.py @@ -0,0 +1,256 @@ +from typing_extensions import override + +import torch + +import comfy.model_management +import comfy.patcher_extension +import node_helpers +from comfy_api.latest import ComfyExtension, io + + +class EmptyHiDreamO1LatentImage(io.ComfyNode): + @classmethod + def define_schema(cls) -> io.Schema: + return io.Schema( + node_id="EmptyHiDreamO1LatentImage", + display_name="Empty HiDream-O1 Latent Image", + category="latent/image", + description=( + "Empty pixel-space latent for HiDream-O1-Image. The model was " + "trained at ~4 megapixels; lower resolutions go off-distribution " + "and quality regresses noticeably. Trained resolutions: " + "2048x2048, 2304x1728, 1728x2304, 2560x1440, 1440x2560, " + "2496x1664, 1664x2496, 3104x1312, 1312x3104, 2304x1792, 1792x2304." + ), + inputs=[ + io.Int.Input(id="width", default=2048, min=64, max=4096, step=32), + io.Int.Input(id="height", default=2048, min=64, max=4096, step=32), + io.Int.Input(id="batch_size", default=1, min=1, max=64), + ], + outputs=[io.Latent().Output()], + ) + + @classmethod + def execute(cls, *, width: int, height: int, batch_size: int = 1) -> io.NodeOutput: + latent = torch.zeros( + (batch_size, 3, height, width), + device=comfy.model_management.intermediate_device(), + ) + return io.NodeOutput({"samples": latent}) + + +class HiDreamO1ReferenceImages(io.ComfyNode): + """Attach reference images to both positive and negative conditioning.""" + + @classmethod + def define_schema(cls) -> io.Schema: + return io.Schema( + node_id="HiDreamO1ReferenceImages", + display_name="HiDream-O1 Reference Images", + category="conditioning/image", + description=( + "Attach 1-10 reference images to conditioning, one for edit instruction" + "or multiple for subject-driven personalization." + ), + inputs=[ + io.Conditioning.Input(id="positive"), + io.Conditioning.Input(id="negative"), + io.Autogrow.Input( + "images", + template=io.Autogrow.TemplateNames( + io.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 11)], + min=1, + ), + tooltip=("Reference images. 1 image = instruction edit; 2-10 images = multi reference." + ), + ), + ], + outputs=[ + io.Conditioning.Output(display_name="positive"), + io.Conditioning.Output(display_name="negative"), + ], + ) + + @classmethod + def execute(cls, *, positive, negative, images: io.Autogrow.Type) -> io.NodeOutput: + refs = [images[f"image_{i}"] for i in range(1, 11) if f"image_{i}" in images] + positive = node_helpers.conditioning_set_values(positive, {"reference_latents": refs}, append=True) + negative = node_helpers.conditioning_set_values(negative, {"reference_latents": refs}, append=True) + return io.NodeOutput(positive, negative) + + +class HiDreamO1PatchSeamSmoothing(io.ComfyNode): + PATCH_SIZE = 32 + EDGE_FEATHER = 4 + + # Shift presets per (pattern, N). 8-pass = 4-quadrant + 4 quarter-patch offsets. + SHIFTS_BY_PATTERN = { + ("single_shift", 2): [(0, 0), (16, 16)], + ("single_shift", 4): [(0, 0), (16, 0), (0, 16), (16, 16)], + ("single_shift", 8): [(0, 0), (16, 0), (0, 16), (16, 16), + (8, 8), (24, 8), (8, 24), (24, 24)], + ("symmetric", 2): [(-8, -8), (8, 8)], + ("symmetric", 4): [(-8, -8), (8, -8), (-8, 8), (8, 8)], + ("symmetric", 8): [(-12, -12), (4, -12), (-12, 4), (4, 4), + (-4, -4), (12, -4), (-4, 12), (12, 12)], + } + RAMP_LEVELS = { + "2": [2], + "4": [4], + "ramp_2_4": [2, 4], + "ramp_2_4_8": [2, 4, 8], + } + + @staticmethod + def _hann_tile(cy: int, cx: int, size: int = 32) -> torch.Tensor: + """size x size Hann tile peaking at (cy, cx) within a patch.""" + half = size // 2 + yy = torch.arange(size).view(size, 1) + xx = torch.arange(size).view(1, size) + dy = ((yy - cy + half) % size) - half + dx = ((xx - cx + half) % size) - half + return 0.25 * (1 + torch.cos(torch.pi * dy / half)) * (1 + torch.cos(torch.pi * dx / half)) + + @classmethod + def define_schema(cls) -> io.Schema: + return io.Schema( + node_id="HiDreamO1PatchSeamSmoothing", + display_name="HiDream-O1 Patch Seam Smoothing", + category="advanced/model", + is_experimental=True, + description=( + "Average the model output across multiple shifted patch-grid " + "positions during the late portion of sampling. Cancels seams." + ), + inputs=[ + io.Model.Input(id="model"), + io.Float.Input(id="start_percent", default=0.8, min=0.0, max=1.0, step=0.01, + tooltip="Sampling progress (0=start, 1=end) at which the blend turns ON.", + ), + io.Float.Input(id="end_percent", default=1.0, min=0.0, max=1.0, step=0.01, + tooltip="Sampling progress at which the blend turns OFF.", + ), + io.Combo.Input( + id="pattern", + options=["single_shift", "symmetric"], + default="single_shift", + tooltip="Shift layout. single_shift: one pass at the natural patch grid + others offset. symmetric: all passes off-grid, shifts split around origin.", + ), + io.Combo.Input( + id="passes", + options=["2", "4", "ramp_2_4", "ramp_2_4_8"], + default="2", + tooltip="Number of passes per gated step. 2/4 = fixed. ramp_*: pass count increases as sampling approaches end (more smoothing where seams are most visible).", + ), + io.Combo.Input( + id="blend", + options=["average", "window", "median"], + default="average", + tooltip="average: equal-weight mean. window: Hann-windowed weighting favoring each pass away from its patch boundaries. median: per-pixel median, rejects wraparound-outlier passes.", + ), + io.Float.Input(id="strength", default=1.0, min=0.0, max=1.0, step=0.01, + tooltip="Interpolation between the natural-grid pred (0) and the averaged result (1).", + ), + ], + outputs=[io.Model.Output()], + ) + + @classmethod + def execute(cls, *, model, start_percent: float, end_percent: float, pattern: str, passes: str, blend: str, strength: float) -> io.NodeOutput: + if strength <= 0.0 or end_percent <= start_percent: + return io.NodeOutput(model) + + P = cls.PATCH_SIZE + half = P // 2 + shift_levels = [cls.SHIFTS_BY_PATTERN[(pattern, n)] for n in cls.RAMP_LEVELS[passes]] + + if blend == "window": + window_tile_levels = [ + torch.stack([cls._hann_tile((half - sy) % P, (half - sx) % P, P) for sy, sx in lst], dim=0) + for lst in shift_levels + ] + else: + window_tile_levels = [None] * len(shift_levels) + + m = model.clone() + model_sampling = m.get_model_object("model_sampling") + multiplier = float(model_sampling.multiplier) + start_t = float(model_sampling.percent_to_sigma(start_percent)) * multiplier + end_t = float(model_sampling.percent_to_sigma(end_percent)) * multiplier + + edge_ramp_cache: dict = {} + + def get_edge_ramp(H: int, W: int, device, dtype) -> torch.Tensor: + key = (H, W, device, dtype) + cached = edge_ramp_cache.get(key) + if cached is not None: + return cached + feather = cls.EDGE_FEATHER + ys = torch.minimum(torch.arange(H, device=device, dtype=torch.float32), + (H - 1) - torch.arange(H, device=device, dtype=torch.float32)) + xs = torch.minimum(torch.arange(W, device=device, dtype=torch.float32), + (W - 1) - torch.arange(W, device=device, dtype=torch.float32)) + y_mask = ((ys - P) / feather).clamp(0, 1) + x_mask = ((xs - P) / feather).clamp(0, 1) + ramp = (y_mask[:, None] * x_mask[None, :]).to(dtype) + edge_ramp_cache[key] = ramp + return ramp + + def smoothing_wrapper(executor, *args, **kwargs): + x = args[0] + t = float(args[1][0]) + pred = executor(*args, **kwargs) + if not (end_t <= t <= start_t): + return pred + # Pick shift-level by sigma phase across the gated range. + if len(shift_levels) == 1: + level_idx = 0 + else: + phase = (start_t - t) / max(start_t - end_t, 1e-8) + level_idx = min(int(phase * len(shift_levels)), len(shift_levels) - 1) + shifts = shift_levels[level_idx] + window_tiles = window_tile_levels[level_idx] + + preds = [] + for sy, sx in shifts: + if sy == 0 and sx == 0: + preds.append(pred) + continue + x_rolled = torch.roll(x, shifts=(sy, sx), dims=(-2, -1)) + pred_rolled = executor(x_rolled, *args[1:], **kwargs) + preds.append(torch.roll(pred_rolled, shifts=(-sy, -sx), dims=(-2, -1))) + stacked = torch.stack(preds, dim=0) # (N, B, C, H, W) + _, _, _, H, W = stacked.shape + if blend == "window": + N = stacked.shape[0] + tiles = window_tiles.to(device=stacked.device, dtype=stacked.dtype) + w = tiles.repeat(1, H // P, W // P)[:, :H, :W] + sum_w = w.sum(dim=0, keepdim=True) + w = torch.where(sum_w < 1e-3, torch.full_like(w, 1.0 / N), w / sum_w.clamp(min=1e-8)) + avg = (stacked * w[:, None, None, :, :]).sum(dim=0) + elif blend == "median": + avg = torch.median(stacked, dim=0).values + else: + avg = stacked.mean(dim=0) + + # Mask out the P-px wraparound contamination strip at each edge. + mask = get_edge_ramp(H, W, pred.device, pred.dtype) + return pred * (1.0 - mask * strength) + avg * (mask * strength) + + m.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, "hidream_o1_patch_seam_smoothing", smoothing_wrapper) + return io.NodeOutput(m) + + +class HiDreamO1Extension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + EmptyHiDreamO1LatentImage, + HiDreamO1ReferenceImages, + HiDreamO1PatchSeamSmoothing, + ] + + +async def comfy_entrypoint() -> HiDreamO1Extension: + return HiDreamO1Extension() diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py index 4ea93a499..9e4873be5 100644 --- a/comfy_extras/nodes_hunyuan.py +++ b/comfy_extras/nodes_hunyuan.py @@ -131,6 +131,8 @@ class HunyuanVideo15SuperResolution(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="HunyuanVideo15SuperResolution", + display_name="Hunyuan Video 1.5 Super Resolution", + category="conditioning/video_models", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), @@ -381,6 +383,8 @@ class HunyuanRefinerLatent(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="HunyuanRefinerLatent", + display_name="Hunyuan Latent Refiner", + category="conditioning/video_models", inputs=[ io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index df0c3e4b1..403eb855b 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -1,12 +1,7 @@ import torch -import os -import json -import struct -import numpy as np from comfy.ldm.modules.diffusionmodules.mmdit import get_1d_sincos_pos_embed_from_grid_torch -import folder_paths import comfy.model_management -from comfy.cli_args import args +from comfy_extras.nodes_save_3d import pack_variable_mesh_batch from typing_extensions import override from comfy_api.latest import ComfyExtension, IO, Types from comfy_api.latest._util import MESH, VOXEL # only for backward compatibility if someone import it from this file (will be removed later) # noqa @@ -40,7 +35,7 @@ class Hunyuan3Dv2Conditioning(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="Hunyuan3Dv2Conditioning", - category="conditioning/video_models", + category="conditioning/3d_models", inputs=[ IO.ClipVisionOutput.Input("clip_vision_output"), ], @@ -65,7 +60,7 @@ class Hunyuan3Dv2ConditioningMultiView(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="Hunyuan3Dv2ConditioningMultiView", - category="conditioning/video_models", + category="conditioning/3d_models", inputs=[ IO.ClipVisionOutput.Input("front", optional=True), IO.ClipVisionOutput.Input("left", optional=True), @@ -424,6 +419,7 @@ class VoxelToMeshBasic(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="VoxelToMeshBasic", + display_name="Voxel to Mesh (Basic)", category="3d", inputs=[ IO.Voxel.Input("voxel"), @@ -443,7 +439,9 @@ class VoxelToMeshBasic(IO.ComfyNode): vertices.append(v) faces.append(f) - return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces): + return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces)) decode = execute # TODO: remove @@ -453,6 +451,7 @@ class VoxelToMesh(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="VoxelToMesh", + display_name="Voxel to Mesh", category="3d", inputs=[ IO.Voxel.Input("voxel"), @@ -479,206 +478,13 @@ class VoxelToMesh(IO.ComfyNode): vertices.append(v) faces.append(f) - return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces): + return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces)) decode = execute # TODO: remove -def save_glb(vertices, faces, filepath, metadata=None): - """ - Save PyTorch tensor vertices and faces as a GLB file without external dependencies. - - Parameters: - vertices: torch.Tensor of shape (N, 3) - The vertex coordinates - faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces) - filepath: str - Output filepath (should end with .glb) - """ - - # Convert tensors to numpy arrays - vertices_np = vertices.cpu().numpy().astype(np.float32) - faces_np = faces.cpu().numpy().astype(np.uint32) - - vertices_buffer = vertices_np.tobytes() - indices_buffer = faces_np.tobytes() - - def pad_to_4_bytes(buffer): - padding_length = (4 - (len(buffer) % 4)) % 4 - return buffer + b'\x00' * padding_length - - vertices_buffer_padded = pad_to_4_bytes(vertices_buffer) - indices_buffer_padded = pad_to_4_bytes(indices_buffer) - - buffer_data = vertices_buffer_padded + indices_buffer_padded - - vertices_byte_length = len(vertices_buffer) - vertices_byte_offset = 0 - indices_byte_length = len(indices_buffer) - indices_byte_offset = len(vertices_buffer_padded) - - gltf = { - "asset": {"version": "2.0", "generator": "ComfyUI"}, - "buffers": [ - { - "byteLength": len(buffer_data) - } - ], - "bufferViews": [ - { - "buffer": 0, - "byteOffset": vertices_byte_offset, - "byteLength": vertices_byte_length, - "target": 34962 # ARRAY_BUFFER - }, - { - "buffer": 0, - "byteOffset": indices_byte_offset, - "byteLength": indices_byte_length, - "target": 34963 # ELEMENT_ARRAY_BUFFER - } - ], - "accessors": [ - { - "bufferView": 0, - "byteOffset": 0, - "componentType": 5126, # FLOAT - "count": len(vertices_np), - "type": "VEC3", - "max": vertices_np.max(axis=0).tolist(), - "min": vertices_np.min(axis=0).tolist() - }, - { - "bufferView": 1, - "byteOffset": 0, - "componentType": 5125, # UNSIGNED_INT - "count": faces_np.size, - "type": "SCALAR" - } - ], - "meshes": [ - { - "primitives": [ - { - "attributes": { - "POSITION": 0 - }, - "indices": 1, - "mode": 4 # TRIANGLES - } - ] - } - ], - "nodes": [ - { - "mesh": 0 - } - ], - "scenes": [ - { - "nodes": [0] - } - ], - "scene": 0 - } - - if metadata is not None: - gltf["asset"]["extras"] = metadata - - # Convert the JSON to bytes - gltf_json = json.dumps(gltf).encode('utf8') - - def pad_json_to_4_bytes(buffer): - padding_length = (4 - (len(buffer) % 4)) % 4 - return buffer + b' ' * padding_length - - gltf_json_padded = pad_json_to_4_bytes(gltf_json) - - # Create the GLB header - # Magic glTF - glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data)) - - # Create JSON chunk header (chunk type 0) - json_chunk_header = struct.pack('