diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml index 28484a9d1..f501b7b31 100644 --- a/.github/workflows/stable-release.yml +++ b/.github/workflows/stable-release.yml @@ -117,7 +117,7 @@ jobs: ./python.exe get-pip.py ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/* - grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt + grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt ./python.exe -s -m pip install -r requirements_comfyui.txt rm requirements_comfyui.txt diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml index 63df2dc3a..adfc5dd32 100644 --- a/.github/workflows/test-ci.yml +++ b/.github/workflows/test-ci.yml @@ -20,7 +20,6 @@ jobs: test-stable: strategy: fail-fast: false - max-parallel: 1 # This forces sequential execution matrix: # os: [macos, linux, windows] # os: [macos, linux] @@ -75,7 +74,6 @@ jobs: test-unix-nightly: strategy: fail-fast: false - max-parallel: 1 # This forces sequential execution matrix: # os: [macos, linux] os: [linux] diff --git a/.github/workflows/test-launch.yml b/.github/workflows/test-launch.yml index ef0d3f123..581c0474b 100644 --- a/.github/workflows/test-launch.yml +++ b/.github/workflows/test-launch.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout ComfyUI uses: actions/checkout@v4 with: - repository: "comfyanonymous/ComfyUI" + repository: "Comfy-Org/ComfyUI" path: "ComfyUI" - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/update-ci-container.yml b/.github/workflows/update-ci-container.yml new file mode 100644 index 000000000..f7972e056 --- /dev/null +++ b/.github/workflows/update-ci-container.yml @@ -0,0 +1,59 @@ +name: "CI: Update CI Container" + +on: + release: + types: [published] + workflow_dispatch: + inputs: + version: + description: 'ComfyUI version (e.g., v0.7.0)' + required: true + type: string + +jobs: + update-ci-container: + runs-on: ubuntu-latest + # Skip pre-releases unless manually triggered + if: github.event_name == 'workflow_dispatch' || !github.event.release.prerelease + steps: + - name: Get version + id: version + run: | + if [ "${{ github.event_name }}" = "release" ]; then + VERSION="${{ github.event.release.tag_name }}" + else + VERSION="${{ inputs.version }}" + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Checkout comfyui-ci-container + uses: actions/checkout@v4 + with: + repository: comfy-org/comfyui-ci-container + token: ${{ secrets.CI_CONTAINER_PAT }} + + - name: Check current version + id: current + run: | + CURRENT=$(grep -oP 'ARG COMFYUI_VERSION=\K.*' Dockerfile || echo "unknown") + echo "current_version=$CURRENT" >> $GITHUB_OUTPUT + + - name: Update Dockerfile + run: | + VERSION="${{ steps.version.outputs.version }}" + sed -i "s/^ARG COMFYUI_VERSION=.*/ARG COMFYUI_VERSION=${VERSION}/" Dockerfile + + - name: Create Pull Request + id: create-pr + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.CI_CONTAINER_PAT }} + branch: automation/comfyui-${{ steps.version.outputs.version }} + title: "chore: bump ComfyUI to ${{ steps.version.outputs.version }}" + body: | + Updates ComfyUI version from `${{ steps.current.outputs.current_version }}` to `${{ steps.version.outputs.version }}` + + **Triggered by:** ${{ github.event_name == 'release' && format('[Release {0}]({1})', github.event.release.tag_name, github.event.release.html_url) || 'Manual workflow dispatch' }} + + labels: automation + commit-message: "chore: bump ComfyUI to ${{ steps.version.outputs.version }}" diff --git a/README.md b/README.md index 6d09758c0..e25f3cda7 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ Simply download, extract with [7-Zip](https://7-zip.org) or with the windows exp If you have trouble extracting it, right click the file -> properties -> unblock -Update your Nvidia drivers if it doesn't start. +The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start. #### Alternative Downloads: @@ -212,7 +212,7 @@ Python 3.14 works but you may encounter issues with the torch compile node. The Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12 -torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch unless it is less than 2 weeks old. +torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old. ### Instructions: diff --git a/alembic_db/versions/0001_assets.py b/alembic_db/versions/0001_assets.py new file mode 100644 index 000000000..1e10b94dc --- /dev/null +++ b/alembic_db/versions/0001_assets.py @@ -0,0 +1,174 @@ +""" +Initial assets schema +Revision ID: 0001_assets +Revises: None +Create Date: 2025-12-10 00:00:00 +""" + +from alembic import op +import sqlalchemy as sa + +revision = "0001_assets" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ASSETS: content identity + op.create_table( + "assets", + sa.Column("id", sa.String(length=36), primary_key=True), + sa.Column("hash", sa.String(length=256), nullable=True), + sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"), + sa.Column("mime_type", sa.String(length=255), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=False), nullable=False), + sa.CheckConstraint("size_bytes >= 0", name="ck_assets_size_nonneg"), + ) + op.create_index("uq_assets_hash", "assets", ["hash"], unique=True) + op.create_index("ix_assets_mime_type", "assets", ["mime_type"]) + + # ASSETS_INFO: user-visible references + op.create_table( + "assets_info", + sa.Column("id", sa.String(length=36), primary_key=True), + sa.Column("owner_id", sa.String(length=128), nullable=False, server_default=""), + sa.Column("name", sa.String(length=512), nullable=False), + sa.Column("asset_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False), + sa.Column("preview_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="SET NULL"), nullable=True), + sa.Column("user_metadata", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=False), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=False), nullable=False), + sa.Column("last_access_time", sa.DateTime(timezone=False), nullable=False), + sa.UniqueConstraint("asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"), + ) + op.create_index("ix_assets_info_owner_id", "assets_info", ["owner_id"]) + op.create_index("ix_assets_info_asset_id", "assets_info", ["asset_id"]) + op.create_index("ix_assets_info_name", "assets_info", ["name"]) + op.create_index("ix_assets_info_created_at", "assets_info", ["created_at"]) + op.create_index("ix_assets_info_last_access_time", "assets_info", ["last_access_time"]) + op.create_index("ix_assets_info_owner_name", "assets_info", ["owner_id", "name"]) + + # TAGS: normalized tag vocabulary + op.create_table( + "tags", + sa.Column("name", sa.String(length=512), primary_key=True), + sa.Column("tag_type", sa.String(length=32), nullable=False, server_default="user"), + sa.CheckConstraint("name = lower(name)", name="ck_tags_lowercase"), + ) + op.create_index("ix_tags_tag_type", "tags", ["tag_type"]) + + # ASSET_INFO_TAGS: many-to-many for tags on AssetInfo + op.create_table( + "asset_info_tags", + sa.Column("asset_info_id", sa.String(length=36), sa.ForeignKey("assets_info.id", ondelete="CASCADE"), nullable=False), + sa.Column("tag_name", sa.String(length=512), sa.ForeignKey("tags.name", ondelete="RESTRICT"), nullable=False), + sa.Column("origin", sa.String(length=32), nullable=False, server_default="manual"), + sa.Column("added_at", sa.DateTime(timezone=False), nullable=False), + sa.PrimaryKeyConstraint("asset_info_id", "tag_name", name="pk_asset_info_tags"), + ) + op.create_index("ix_asset_info_tags_tag_name", "asset_info_tags", ["tag_name"]) + op.create_index("ix_asset_info_tags_asset_info_id", "asset_info_tags", ["asset_info_id"]) + + # ASSET_CACHE_STATE: N:1 local cache rows per Asset + op.create_table( + "asset_cache_state", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("asset_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="CASCADE"), nullable=False), + sa.Column("file_path", sa.Text(), nullable=False), # absolute local path to cached file + sa.Column("mtime_ns", sa.BigInteger(), nullable=True), + sa.Column("needs_verify", sa.Boolean(), nullable=False, server_default=sa.text("false")), + sa.CheckConstraint("(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"), + sa.UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"), + ) + op.create_index("ix_asset_cache_state_file_path", "asset_cache_state", ["file_path"]) + op.create_index("ix_asset_cache_state_asset_id", "asset_cache_state", ["asset_id"]) + + # ASSET_INFO_META: typed KV projection of user_metadata for filtering/sorting + op.create_table( + "asset_info_meta", + sa.Column("asset_info_id", sa.String(length=36), sa.ForeignKey("assets_info.id", ondelete="CASCADE"), nullable=False), + sa.Column("key", sa.String(length=256), nullable=False), + sa.Column("ordinal", sa.Integer(), nullable=False, server_default="0"), + sa.Column("val_str", sa.String(length=2048), nullable=True), + sa.Column("val_num", sa.Numeric(38, 10), nullable=True), + sa.Column("val_bool", sa.Boolean(), nullable=True), + sa.Column("val_json", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("asset_info_id", "key", "ordinal", name="pk_asset_info_meta"), + ) + op.create_index("ix_asset_info_meta_key", "asset_info_meta", ["key"]) + op.create_index("ix_asset_info_meta_key_val_str", "asset_info_meta", ["key", "val_str"]) + op.create_index("ix_asset_info_meta_key_val_num", "asset_info_meta", ["key", "val_num"]) + op.create_index("ix_asset_info_meta_key_val_bool", "asset_info_meta", ["key", "val_bool"]) + + # Tags vocabulary + tags_table = sa.table( + "tags", + sa.column("name", sa.String(length=512)), + sa.column("tag_type", sa.String()), + ) + op.bulk_insert( + tags_table, + [ + {"name": "models", "tag_type": "system"}, + {"name": "input", "tag_type": "system"}, + {"name": "output", "tag_type": "system"}, + + {"name": "configs", "tag_type": "system"}, + {"name": "checkpoints", "tag_type": "system"}, + {"name": "loras", "tag_type": "system"}, + {"name": "vae", "tag_type": "system"}, + {"name": "text_encoders", "tag_type": "system"}, + {"name": "diffusion_models", "tag_type": "system"}, + {"name": "clip_vision", "tag_type": "system"}, + {"name": "style_models", "tag_type": "system"}, + {"name": "embeddings", "tag_type": "system"}, + {"name": "diffusers", "tag_type": "system"}, + {"name": "vae_approx", "tag_type": "system"}, + {"name": "controlnet", "tag_type": "system"}, + {"name": "gligen", "tag_type": "system"}, + {"name": "upscale_models", "tag_type": "system"}, + {"name": "hypernetworks", "tag_type": "system"}, + {"name": "photomaker", "tag_type": "system"}, + {"name": "classifiers", "tag_type": "system"}, + + {"name": "encoder", "tag_type": "system"}, + {"name": "decoder", "tag_type": "system"}, + + {"name": "missing", "tag_type": "system"}, + {"name": "rescan", "tag_type": "system"}, + ], + ) + + +def downgrade() -> None: + op.drop_index("ix_asset_info_meta_key_val_bool", table_name="asset_info_meta") + op.drop_index("ix_asset_info_meta_key_val_num", table_name="asset_info_meta") + op.drop_index("ix_asset_info_meta_key_val_str", table_name="asset_info_meta") + op.drop_index("ix_asset_info_meta_key", table_name="asset_info_meta") + op.drop_table("asset_info_meta") + + op.drop_index("ix_asset_cache_state_asset_id", table_name="asset_cache_state") + op.drop_index("ix_asset_cache_state_file_path", table_name="asset_cache_state") + op.drop_constraint("uq_asset_cache_state_file_path", table_name="asset_cache_state") + op.drop_table("asset_cache_state") + + op.drop_index("ix_asset_info_tags_asset_info_id", table_name="asset_info_tags") + op.drop_index("ix_asset_info_tags_tag_name", table_name="asset_info_tags") + op.drop_table("asset_info_tags") + + op.drop_index("ix_tags_tag_type", table_name="tags") + op.drop_table("tags") + + op.drop_constraint("uq_assets_info_asset_owner_name", table_name="assets_info") + op.drop_index("ix_assets_info_owner_name", table_name="assets_info") + op.drop_index("ix_assets_info_last_access_time", table_name="assets_info") + op.drop_index("ix_assets_info_created_at", table_name="assets_info") + op.drop_index("ix_assets_info_name", table_name="assets_info") + op.drop_index("ix_assets_info_asset_id", table_name="assets_info") + op.drop_index("ix_assets_info_owner_id", table_name="assets_info") + op.drop_table("assets_info") + + op.drop_index("uq_assets_hash", table_name="assets") + op.drop_index("ix_assets_mime_type", table_name="assets") + op.drop_table("assets") diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py new file mode 100644 index 000000000..30e87a898 --- /dev/null +++ b/app/assets/api/routes.py @@ -0,0 +1,102 @@ +import logging +import uuid +from aiohttp import web + +from pydantic import ValidationError + +import app.assets.manager as manager +from app import user_manager +from app.assets.api import schemas_in +from app.assets.helpers import get_query_dict + +ROUTES = web.RouteTableDef() +USER_MANAGER: user_manager.UserManager | None = None + +# UUID regex (canonical hyphenated form, case-insensitive) +UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" + +def register_assets_system(app: web.Application, user_manager_instance: user_manager.UserManager) -> None: + global USER_MANAGER + USER_MANAGER = user_manager_instance + app.add_routes(ROUTES) + +def _error_response(status: int, code: str, message: str, details: dict | None = None) -> web.Response: + return web.json_response({"error": {"code": code, "message": message, "details": details or {}}}, status=status) + + +def _validation_error_response(code: str, ve: ValidationError) -> web.Response: + return _error_response(400, code, "Validation failed.", {"errors": ve.json()}) + + +@ROUTES.get("/api/assets") +async def list_assets(request: web.Request) -> web.Response: + """ + GET request to list assets. + """ + query_dict = get_query_dict(request) + try: + q = schemas_in.ListAssetsQuery.model_validate(query_dict) + except ValidationError as ve: + return _validation_error_response("INVALID_QUERY", ve) + + payload = manager.list_assets( + include_tags=q.include_tags, + exclude_tags=q.exclude_tags, + name_contains=q.name_contains, + metadata_filter=q.metadata_filter, + limit=q.limit, + offset=q.offset, + sort=q.sort, + order=q.order, + owner_id=USER_MANAGER.get_request_user_id(request), + ) + return web.json_response(payload.model_dump(mode="json")) + + +@ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}") +async def get_asset(request: web.Request) -> web.Response: + """ + GET request to get an asset's info as JSON. + """ + asset_info_id = str(uuid.UUID(request.match_info["id"])) + try: + result = manager.get_asset( + asset_info_id=asset_info_id, + owner_id=USER_MANAGER.get_request_user_id(request), + ) + except ValueError as e: + return _error_response(404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id}) + except Exception: + logging.exception( + "get_asset failed for asset_info_id=%s, owner_id=%s", + asset_info_id, + USER_MANAGER.get_request_user_id(request), + ) + return _error_response(500, "INTERNAL", "Unexpected server error.") + return web.json_response(result.model_dump(mode="json"), status=200) + + +@ROUTES.get("/api/tags") +async def get_tags(request: web.Request) -> web.Response: + """ + GET request to list all tags based on query parameters. + """ + query_map = dict(request.rel_url.query) + + try: + query = schemas_in.TagsListQuery.model_validate(query_map) + except ValidationError as e: + return web.json_response( + {"error": {"code": "INVALID_QUERY", "message": "Invalid query parameters", "details": e.errors()}}, + status=400, + ) + + result = manager.list_tags( + prefix=query.prefix, + limit=query.limit, + offset=query.offset, + order=query.order, + include_zero=query.include_zero, + owner_id=USER_MANAGER.get_request_user_id(request), + ) + return web.json_response(result.model_dump(mode="json")) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py new file mode 100644 index 000000000..200b41aef --- /dev/null +++ b/app/assets/api/schemas_in.py @@ -0,0 +1,94 @@ +import json +import uuid +from typing import Any, Literal + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + conint, + field_validator, +) + + +class ListAssetsQuery(BaseModel): + include_tags: list[str] = Field(default_factory=list) + exclude_tags: list[str] = Field(default_factory=list) + name_contains: str | None = None + + # Accept either a JSON string (query param) or a dict + metadata_filter: dict[str, Any] | None = None + + limit: conint(ge=1, le=500) = 20 + offset: conint(ge=0) = 0 + + sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = "created_at" + order: Literal["asc", "desc"] = "desc" + + @field_validator("include_tags", "exclude_tags", mode="before") + @classmethod + def _split_csv_tags(cls, v): + # Accept "a,b,c" or ["a","b"] (we are liberal in what we accept) + if v is None: + return [] + if isinstance(v, str): + return [t.strip() for t in v.split(",") if t.strip()] + if isinstance(v, list): + out: list[str] = [] + for item in v: + if isinstance(item, str): + out.extend([t.strip() for t in item.split(",") if t.strip()]) + return out + return v + + @field_validator("metadata_filter", mode="before") + @classmethod + def _parse_metadata_json(cls, v): + if v is None or isinstance(v, dict): + return v + if isinstance(v, str) and v.strip(): + try: + parsed = json.loads(v) + except Exception as e: + raise ValueError(f"metadata_filter must be JSON: {e}") from e + if not isinstance(parsed, dict): + raise ValueError("metadata_filter must be a JSON object") + return parsed + return None + + +class TagsListQuery(BaseModel): + model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) + + prefix: str | None = Field(None, min_length=1, max_length=256) + limit: int = Field(100, ge=1, le=1000) + offset: int = Field(0, ge=0, le=10_000_000) + order: Literal["count_desc", "name_asc"] = "count_desc" + include_zero: bool = True + + @field_validator("prefix") + @classmethod + def normalize_prefix(cls, v: str | None) -> str | None: + if v is None: + return v + v = v.strip() + return v.lower() or None + + +class SetPreviewBody(BaseModel): + """Set or clear the preview for an AssetInfo. Provide an Asset.id or null.""" + preview_id: str | None = None + + @field_validator("preview_id", mode="before") + @classmethod + def _norm_uuid(cls, v): + if v is None: + return None + s = str(v).strip() + if not s: + return None + try: + uuid.UUID(s) + except Exception: + raise ValueError("preview_id must be a UUID") + return s diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py new file mode 100644 index 000000000..9f8184f20 --- /dev/null +++ b/app/assets/api/schemas_out.py @@ -0,0 +1,60 @@ +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field, field_serializer + + +class AssetSummary(BaseModel): + id: str + name: str + asset_hash: str | None = None + size: int | None = None + mime_type: str | None = None + tags: list[str] = Field(default_factory=list) + preview_url: str | None = None + created_at: datetime | None = None + updated_at: datetime | None = None + last_access_time: datetime | None = None + + model_config = ConfigDict(from_attributes=True) + + @field_serializer("created_at", "updated_at", "last_access_time") + def _ser_dt(self, v: datetime | None, _info): + return v.isoformat() if v else None + + +class AssetsList(BaseModel): + assets: list[AssetSummary] + total: int + has_more: bool + + +class AssetDetail(BaseModel): + id: str + name: str + asset_hash: str | None = None + size: int | None = None + mime_type: str | None = None + tags: list[str] = Field(default_factory=list) + user_metadata: dict[str, Any] = Field(default_factory=dict) + preview_id: str | None = None + created_at: datetime | None = None + last_access_time: datetime | None = None + + model_config = ConfigDict(from_attributes=True) + + @field_serializer("created_at", "last_access_time") + def _ser_dt(self, v: datetime | None, _info): + return v.isoformat() if v else None + + +class TagUsage(BaseModel): + name: str + count: int + type: str + + +class TagsList(BaseModel): + tags: list[TagUsage] = Field(default_factory=list) + total: int + has_more: bool diff --git a/app/assets/database/bulk_ops.py b/app/assets/database/bulk_ops.py new file mode 100644 index 000000000..c7b75290a --- /dev/null +++ b/app/assets/database/bulk_ops.py @@ -0,0 +1,204 @@ +import os +import uuid +import sqlalchemy +from typing import Iterable +from sqlalchemy.orm import Session +from sqlalchemy.dialects import sqlite + +from app.assets.helpers import utcnow +from app.assets.database.models import Asset, AssetCacheState, AssetInfo, AssetInfoTag, AssetInfoMeta + +MAX_BIND_PARAMS = 800 + +def _chunk_rows(rows: list[dict], cols_per_row: int, max_bind_params: int) -> Iterable[list[dict]]: + if not rows: + return [] + rows_per_stmt = max(1, max_bind_params // max(1, cols_per_row)) + for i in range(0, len(rows), rows_per_stmt): + yield rows[i:i + rows_per_stmt] + +def _iter_chunks(seq, n: int): + for i in range(0, len(seq), n): + yield seq[i:i + n] + +def _rows_per_stmt(cols: int) -> int: + return max(1, MAX_BIND_PARAMS // max(1, cols)) + + +def seed_from_paths_batch( + session: Session, + *, + specs: list[dict], + owner_id: str = "", +) -> dict: + """Each spec is a dict with keys: + - abs_path: str + - size_bytes: int + - mtime_ns: int + - info_name: str + - tags: list[str] + - fname: Optional[str] + """ + if not specs: + return {"inserted_infos": 0, "won_states": 0, "lost_states": 0} + + now = utcnow() + asset_rows: list[dict] = [] + state_rows: list[dict] = [] + path_to_asset: dict[str, str] = {} + asset_to_info: dict[str, dict] = {} # asset_id -> prepared info row + path_list: list[str] = [] + + for sp in specs: + ap = os.path.abspath(sp["abs_path"]) + aid = str(uuid.uuid4()) + iid = str(uuid.uuid4()) + path_list.append(ap) + path_to_asset[ap] = aid + + asset_rows.append( + { + "id": aid, + "hash": None, + "size_bytes": sp["size_bytes"], + "mime_type": None, + "created_at": now, + } + ) + state_rows.append( + { + "asset_id": aid, + "file_path": ap, + "mtime_ns": sp["mtime_ns"], + } + ) + asset_to_info[aid] = { + "id": iid, + "owner_id": owner_id, + "name": sp["info_name"], + "asset_id": aid, + "preview_id": None, + "user_metadata": {"filename": sp["fname"]} if sp["fname"] else None, + "created_at": now, + "updated_at": now, + "last_access_time": now, + "_tags": sp["tags"], + "_filename": sp["fname"], + } + + # insert all seed Assets (hash=NULL) + ins_asset = sqlite.insert(Asset) + for chunk in _iter_chunks(asset_rows, _rows_per_stmt(5)): + session.execute(ins_asset, chunk) + + # try to claim AssetCacheState (file_path) + # Insert with ON CONFLICT DO NOTHING, then query to find which paths were actually inserted + ins_state = ( + sqlite.insert(AssetCacheState) + .on_conflict_do_nothing(index_elements=[AssetCacheState.file_path]) + ) + for chunk in _iter_chunks(state_rows, _rows_per_stmt(3)): + session.execute(ins_state, chunk) + + # Query to find which of our paths won (were actually inserted) + winners_by_path: set[str] = set() + for chunk in _iter_chunks(path_list, MAX_BIND_PARAMS): + result = session.execute( + sqlalchemy.select(AssetCacheState.file_path) + .where(AssetCacheState.file_path.in_(chunk)) + .where(AssetCacheState.asset_id.in_([path_to_asset[p] for p in chunk])) + ) + winners_by_path.update(result.scalars().all()) + + all_paths_set = set(path_list) + losers_by_path = all_paths_set - winners_by_path + lost_assets = [path_to_asset[p] for p in losers_by_path] + if lost_assets: # losers get their Asset removed + for id_chunk in _iter_chunks(lost_assets, MAX_BIND_PARAMS): + session.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(id_chunk))) + + if not winners_by_path: + return {"inserted_infos": 0, "won_states": 0, "lost_states": len(losers_by_path)} + + # insert AssetInfo only for winners + # Insert with ON CONFLICT DO NOTHING, then query to find which were actually inserted + winner_info_rows = [asset_to_info[path_to_asset[p]] for p in winners_by_path] + ins_info = ( + sqlite.insert(AssetInfo) + .on_conflict_do_nothing(index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name]) + ) + for chunk in _iter_chunks(winner_info_rows, _rows_per_stmt(9)): + session.execute(ins_info, chunk) + + # Query to find which info rows were actually inserted (by matching our generated IDs) + all_info_ids = [row["id"] for row in winner_info_rows] + inserted_info_ids: set[str] = set() + for chunk in _iter_chunks(all_info_ids, MAX_BIND_PARAMS): + result = session.execute( + sqlalchemy.select(AssetInfo.id).where(AssetInfo.id.in_(chunk)) + ) + inserted_info_ids.update(result.scalars().all()) + + # build and insert tag + meta rows for the AssetInfo + tag_rows: list[dict] = [] + meta_rows: list[dict] = [] + if inserted_info_ids: + for row in winner_info_rows: + iid = row["id"] + if iid not in inserted_info_ids: + continue + for t in row["_tags"]: + tag_rows.append({ + "asset_info_id": iid, + "tag_name": t, + "origin": "automatic", + "added_at": now, + }) + if row["_filename"]: + meta_rows.append( + { + "asset_info_id": iid, + "key": "filename", + "ordinal": 0, + "val_str": row["_filename"], + "val_num": None, + "val_bool": None, + "val_json": None, + } + ) + + bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=meta_rows, max_bind_params=MAX_BIND_PARAMS) + return { + "inserted_infos": len(inserted_info_ids), + "won_states": len(winners_by_path), + "lost_states": len(losers_by_path), + } + + +def bulk_insert_tags_and_meta( + session: Session, + *, + tag_rows: list[dict], + meta_rows: list[dict], + max_bind_params: int, +) -> None: + """Batch insert into asset_info_tags and asset_info_meta with ON CONFLICT DO NOTHING. + - tag_rows keys: asset_info_id, tag_name, origin, added_at + - meta_rows keys: asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json + """ + if tag_rows: + ins_links = ( + sqlite.insert(AssetInfoTag) + .on_conflict_do_nothing(index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]) + ) + for chunk in _chunk_rows(tag_rows, cols_per_row=4, max_bind_params=max_bind_params): + session.execute(ins_links, chunk) + if meta_rows: + ins_meta = ( + sqlite.insert(AssetInfoMeta) + .on_conflict_do_nothing( + index_elements=[AssetInfoMeta.asset_info_id, AssetInfoMeta.key, AssetInfoMeta.ordinal] + ) + ) + for chunk in _chunk_rows(meta_rows, cols_per_row=7, max_bind_params=max_bind_params): + session.execute(ins_meta, chunk) diff --git a/app/assets/database/models.py b/app/assets/database/models.py new file mode 100644 index 000000000..3cd28f68b --- /dev/null +++ b/app/assets/database/models.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import uuid +from datetime import datetime + +from typing import Any +from sqlalchemy import ( + JSON, + BigInteger, + Boolean, + CheckConstraint, + DateTime, + ForeignKey, + Index, + Integer, + Numeric, + String, + Text, + UniqueConstraint, +) +from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship + +from app.assets.helpers import utcnow +from app.database.models import to_dict, Base + + +class Asset(Base): + __tablename__ = "assets" + + id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + hash: Mapped[str | None] = mapped_column(String(256), nullable=True) + size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0) + mime_type: Mapped[str | None] = mapped_column(String(255)) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=False), nullable=False, default=utcnow + ) + + infos: Mapped[list[AssetInfo]] = relationship( + "AssetInfo", + back_populates="asset", + primaryjoin=lambda: Asset.id == foreign(AssetInfo.asset_id), + foreign_keys=lambda: [AssetInfo.asset_id], + cascade="all,delete-orphan", + passive_deletes=True, + ) + + preview_of: Mapped[list[AssetInfo]] = relationship( + "AssetInfo", + back_populates="preview_asset", + primaryjoin=lambda: Asset.id == foreign(AssetInfo.preview_id), + foreign_keys=lambda: [AssetInfo.preview_id], + viewonly=True, + ) + + cache_states: Mapped[list[AssetCacheState]] = relationship( + back_populates="asset", + cascade="all, delete-orphan", + passive_deletes=True, + ) + + __table_args__ = ( + Index("uq_assets_hash", "hash", unique=True), + Index("ix_assets_mime_type", "mime_type"), + CheckConstraint("size_bytes >= 0", name="ck_assets_size_nonneg"), + ) + + def to_dict(self, include_none: bool = False) -> dict[str, Any]: + return to_dict(self, include_none=include_none) + + def __repr__(self) -> str: + return f"" + + +class AssetCacheState(Base): + __tablename__ = "asset_cache_state" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False) + file_path: Mapped[str] = mapped_column(Text, nullable=False) + mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True) + needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + + asset: Mapped[Asset] = relationship(back_populates="cache_states") + + __table_args__ = ( + Index("ix_asset_cache_state_file_path", "file_path"), + Index("ix_asset_cache_state_asset_id", "asset_id"), + CheckConstraint("(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"), + UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"), + ) + + def to_dict(self, include_none: bool = False) -> dict[str, Any]: + return to_dict(self, include_none=include_none) + + def __repr__(self) -> str: + return f"" + + +class AssetInfo(Base): + __tablename__ = "assets_info" + + id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) + owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="") + name: Mapped[str] = mapped_column(String(512), nullable=False) + asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False) + preview_id: Mapped[str | None] = mapped_column(String(36), ForeignKey("assets.id", ondelete="SET NULL")) + user_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON(none_as_null=True)) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow) + last_access_time: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow) + + asset: Mapped[Asset] = relationship( + "Asset", + back_populates="infos", + foreign_keys=[asset_id], + lazy="selectin", + ) + preview_asset: Mapped[Asset | None] = relationship( + "Asset", + back_populates="preview_of", + foreign_keys=[preview_id], + ) + + metadata_entries: Mapped[list[AssetInfoMeta]] = relationship( + back_populates="asset_info", + cascade="all,delete-orphan", + passive_deletes=True, + ) + + tag_links: Mapped[list[AssetInfoTag]] = relationship( + back_populates="asset_info", + cascade="all,delete-orphan", + passive_deletes=True, + overlaps="tags,asset_infos", + ) + + tags: Mapped[list[Tag]] = relationship( + secondary="asset_info_tags", + back_populates="asset_infos", + lazy="selectin", + viewonly=True, + overlaps="tag_links,asset_info_links,asset_infos,tag", + ) + + __table_args__ = ( + UniqueConstraint("asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"), + Index("ix_assets_info_owner_name", "owner_id", "name"), + Index("ix_assets_info_owner_id", "owner_id"), + Index("ix_assets_info_asset_id", "asset_id"), + Index("ix_assets_info_name", "name"), + Index("ix_assets_info_created_at", "created_at"), + Index("ix_assets_info_last_access_time", "last_access_time"), + ) + + def to_dict(self, include_none: bool = False) -> dict[str, Any]: + data = to_dict(self, include_none=include_none) + data["tags"] = [t.name for t in self.tags] + return data + + def __repr__(self) -> str: + return f"" + + +class AssetInfoMeta(Base): + __tablename__ = "asset_info_meta" + + asset_info_id: Mapped[str] = mapped_column( + String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True + ) + key: Mapped[str] = mapped_column(String(256), primary_key=True) + ordinal: Mapped[int] = mapped_column(Integer, primary_key=True, default=0) + + val_str: Mapped[str | None] = mapped_column(String(2048), nullable=True) + val_num: Mapped[float | None] = mapped_column(Numeric(38, 10), nullable=True) + val_bool: Mapped[bool | None] = mapped_column(Boolean, nullable=True) + val_json: Mapped[Any | None] = mapped_column(JSON(none_as_null=True), nullable=True) + + asset_info: Mapped[AssetInfo] = relationship(back_populates="metadata_entries") + + __table_args__ = ( + Index("ix_asset_info_meta_key", "key"), + Index("ix_asset_info_meta_key_val_str", "key", "val_str"), + Index("ix_asset_info_meta_key_val_num", "key", "val_num"), + Index("ix_asset_info_meta_key_val_bool", "key", "val_bool"), + ) + + +class AssetInfoTag(Base): + __tablename__ = "asset_info_tags" + + asset_info_id: Mapped[str] = mapped_column( + String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True + ) + tag_name: Mapped[str] = mapped_column( + String(512), ForeignKey("tags.name", ondelete="RESTRICT"), primary_key=True + ) + origin: Mapped[str] = mapped_column(String(32), nullable=False, default="manual") + added_at: Mapped[datetime] = mapped_column( + DateTime(timezone=False), nullable=False, default=utcnow + ) + + asset_info: Mapped[AssetInfo] = relationship(back_populates="tag_links") + tag: Mapped[Tag] = relationship(back_populates="asset_info_links") + + __table_args__ = ( + Index("ix_asset_info_tags_tag_name", "tag_name"), + Index("ix_asset_info_tags_asset_info_id", "asset_info_id"), + ) + + +class Tag(Base): + __tablename__ = "tags" + + name: Mapped[str] = mapped_column(String(512), primary_key=True) + tag_type: Mapped[str] = mapped_column(String(32), nullable=False, default="user") + + asset_info_links: Mapped[list[AssetInfoTag]] = relationship( + back_populates="tag", + overlaps="asset_infos,tags", + ) + asset_infos: Mapped[list[AssetInfo]] = relationship( + secondary="asset_info_tags", + back_populates="tags", + viewonly=True, + overlaps="asset_info_links,tag_links,tags,asset_info", + ) + + __table_args__ = ( + Index("ix_tags_tag_type", "tag_type"), + ) + + def __repr__(self) -> str: + return f"" diff --git a/app/assets/database/queries.py b/app/assets/database/queries.py new file mode 100644 index 000000000..0824c0c2f --- /dev/null +++ b/app/assets/database/queries.py @@ -0,0 +1,267 @@ +import sqlalchemy as sa +from collections import defaultdict +from sqlalchemy import select, exists, func +from sqlalchemy.orm import Session, contains_eager, noload +from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta, AssetInfoTag, Tag +from app.assets.helpers import escape_like_prefix, normalize_tags +from typing import Sequence + + +def visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement: + """Build owner visibility predicate for reads. Owner-less rows are visible to everyone.""" + owner_id = (owner_id or "").strip() + if owner_id == "": + return AssetInfo.owner_id == "" + return AssetInfo.owner_id.in_(["", owner_id]) + + +def apply_tag_filters( + stmt: sa.sql.Select, + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, +) -> sa.sql.Select: + """include_tags: every tag must be present; exclude_tags: none may be present.""" + include_tags = normalize_tags(include_tags) + exclude_tags = normalize_tags(exclude_tags) + + if include_tags: + for tag_name in include_tags: + stmt = stmt.where( + exists().where( + (AssetInfoTag.asset_info_id == AssetInfo.id) + & (AssetInfoTag.tag_name == tag_name) + ) + ) + + if exclude_tags: + stmt = stmt.where( + ~exists().where( + (AssetInfoTag.asset_info_id == AssetInfo.id) + & (AssetInfoTag.tag_name.in_(exclude_tags)) + ) + ) + return stmt + +def apply_metadata_filter( + stmt: sa.sql.Select, + metadata_filter: dict | None = None, +) -> sa.sql.Select: + """Apply filters using asset_info_meta projection table.""" + if not metadata_filter: + return stmt + + def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement: + return sa.exists().where( + AssetInfoMeta.asset_info_id == AssetInfo.id, + AssetInfoMeta.key == key, + *preds, + ) + + def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement: + if value is None: + no_row_for_key = sa.not_( + sa.exists().where( + AssetInfoMeta.asset_info_id == AssetInfo.id, + AssetInfoMeta.key == key, + ) + ) + null_row = _exists_for_pred( + key, + AssetInfoMeta.val_json.is_(None), + AssetInfoMeta.val_str.is_(None), + AssetInfoMeta.val_num.is_(None), + AssetInfoMeta.val_bool.is_(None), + ) + return sa.or_(no_row_for_key, null_row) + + if isinstance(value, bool): + return _exists_for_pred(key, AssetInfoMeta.val_bool == bool(value)) + if isinstance(value, (int, float)): + from decimal import Decimal + num = value if isinstance(value, Decimal) else Decimal(str(value)) + return _exists_for_pred(key, AssetInfoMeta.val_num == num) + if isinstance(value, str): + return _exists_for_pred(key, AssetInfoMeta.val_str == value) + return _exists_for_pred(key, AssetInfoMeta.val_json == value) + + for k, v in metadata_filter.items(): + if isinstance(v, list): + ors = [_exists_clause_for_value(k, elem) for elem in v] + if ors: + stmt = stmt.where(sa.or_(*ors)) + else: + stmt = stmt.where(_exists_clause_for_value(k, v)) + return stmt + + +def asset_exists_by_hash(session: Session, asset_hash: str) -> bool: + """ + Check if an asset with a given hash exists in database. + """ + row = ( + session.execute( + select(sa.literal(True)).select_from(Asset).where(Asset.hash == asset_hash).limit(1) + ) + ).first() + return row is not None + +def get_asset_info_by_id(session: Session, asset_info_id: str) -> AssetInfo | None: + return session.get(AssetInfo, asset_info_id) + +def list_asset_infos_page( + session: Session, + owner_id: str = "", + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, + name_contains: str | None = None, + metadata_filter: dict | None = None, + limit: int = 20, + offset: int = 0, + sort: str = "created_at", + order: str = "desc", +) -> tuple[list[AssetInfo], dict[str, list[str]], int]: + base = ( + select(AssetInfo) + .join(Asset, Asset.id == AssetInfo.asset_id) + .options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags)) + .where(visible_owner_clause(owner_id)) + ) + + if name_contains: + escaped, esc = escape_like_prefix(name_contains) + base = base.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc)) + + base = apply_tag_filters(base, include_tags, exclude_tags) + base = apply_metadata_filter(base, metadata_filter) + + sort = (sort or "created_at").lower() + order = (order or "desc").lower() + sort_map = { + "name": AssetInfo.name, + "created_at": AssetInfo.created_at, + "updated_at": AssetInfo.updated_at, + "last_access_time": AssetInfo.last_access_time, + "size": Asset.size_bytes, + } + sort_col = sort_map.get(sort, AssetInfo.created_at) + sort_exp = sort_col.desc() if order == "desc" else sort_col.asc() + + base = base.order_by(sort_exp).limit(limit).offset(offset) + + count_stmt = ( + select(sa.func.count()) + .select_from(AssetInfo) + .join(Asset, Asset.id == AssetInfo.asset_id) + .where(visible_owner_clause(owner_id)) + ) + if name_contains: + escaped, esc = escape_like_prefix(name_contains) + count_stmt = count_stmt.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc)) + count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags) + count_stmt = apply_metadata_filter(count_stmt, metadata_filter) + + total = int((session.execute(count_stmt)).scalar_one() or 0) + + infos = (session.execute(base)).unique().scalars().all() + + id_list: list[str] = [i.id for i in infos] + tag_map: dict[str, list[str]] = defaultdict(list) + if id_list: + rows = session.execute( + select(AssetInfoTag.asset_info_id, Tag.name) + .join(Tag, Tag.name == AssetInfoTag.tag_name) + .where(AssetInfoTag.asset_info_id.in_(id_list)) + ) + for aid, tag_name in rows.all(): + tag_map[aid].append(tag_name) + + return infos, tag_map, total + +def fetch_asset_info_asset_and_tags( + session: Session, + asset_info_id: str, + owner_id: str = "", +) -> tuple[AssetInfo, Asset, list[str]] | None: + stmt = ( + select(AssetInfo, Asset, Tag.name) + .join(Asset, Asset.id == AssetInfo.asset_id) + .join(AssetInfoTag, AssetInfoTag.asset_info_id == AssetInfo.id, isouter=True) + .join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True) + .where( + AssetInfo.id == asset_info_id, + visible_owner_clause(owner_id), + ) + .options(noload(AssetInfo.tags)) + .order_by(Tag.name.asc()) + ) + + rows = (session.execute(stmt)).all() + if not rows: + return None + + first_info, first_asset, _ = rows[0] + tags: list[str] = [] + seen: set[str] = set() + for _info, _asset, tag_name in rows: + if tag_name and tag_name not in seen: + seen.add(tag_name) + tags.append(tag_name) + return first_info, first_asset, tags + +def list_tags_with_usage( + session: Session, + prefix: str | None = None, + limit: int = 100, + offset: int = 0, + include_zero: bool = True, + order: str = "count_desc", + owner_id: str = "", +) -> tuple[list[tuple[str, str, int]], int]: + counts_sq = ( + select( + AssetInfoTag.tag_name.label("tag_name"), + func.count(AssetInfoTag.asset_info_id).label("cnt"), + ) + .select_from(AssetInfoTag) + .join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id) + .where(visible_owner_clause(owner_id)) + .group_by(AssetInfoTag.tag_name) + .subquery() + ) + + q = ( + select( + Tag.name, + Tag.tag_type, + func.coalesce(counts_sq.c.cnt, 0).label("count"), + ) + .select_from(Tag) + .join(counts_sq, counts_sq.c.tag_name == Tag.name, isouter=True) + ) + + if prefix: + escaped, esc = escape_like_prefix(prefix.strip().lower()) + q = q.where(Tag.name.like(escaped + "%", escape=esc)) + + if not include_zero: + q = q.where(func.coalesce(counts_sq.c.cnt, 0) > 0) + + if order == "name_asc": + q = q.order_by(Tag.name.asc()) + else: + q = q.order_by(func.coalesce(counts_sq.c.cnt, 0).desc(), Tag.name.asc()) + + total_q = select(func.count()).select_from(Tag) + if prefix: + escaped, esc = escape_like_prefix(prefix.strip().lower()) + total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc)) + if not include_zero: + total_q = total_q.where( + Tag.name.in_(select(AssetInfoTag.tag_name).group_by(AssetInfoTag.tag_name)) + ) + + rows = (session.execute(q.limit(limit).offset(offset))).all() + total = (session.execute(total_q)).scalar_one() + + rows_norm = [(name, ttype, int(count or 0)) for (name, ttype, count) in rows] + return rows_norm, int(total or 0) diff --git a/app/assets/database/tags.py b/app/assets/database/tags.py new file mode 100644 index 000000000..3ab6497c2 --- /dev/null +++ b/app/assets/database/tags.py @@ -0,0 +1,62 @@ +from typing import Iterable + +import sqlalchemy +from sqlalchemy.orm import Session +from sqlalchemy.dialects import sqlite + +from app.assets.helpers import normalize_tags, utcnow +from app.assets.database.models import Tag, AssetInfoTag, AssetInfo + + +def ensure_tags_exist(session: Session, names: Iterable[str], tag_type: str = "user") -> None: + wanted = normalize_tags(list(names)) + if not wanted: + return + rows = [{"name": n, "tag_type": tag_type} for n in list(dict.fromkeys(wanted))] + ins = ( + sqlite.insert(Tag) + .values(rows) + .on_conflict_do_nothing(index_elements=[Tag.name]) + ) + return session.execute(ins) + +def add_missing_tag_for_asset_id( + session: Session, + *, + asset_id: str, + origin: str = "automatic", +) -> None: + select_rows = ( + sqlalchemy.select( + AssetInfo.id.label("asset_info_id"), + sqlalchemy.literal("missing").label("tag_name"), + sqlalchemy.literal(origin).label("origin"), + sqlalchemy.literal(utcnow()).label("added_at"), + ) + .where(AssetInfo.asset_id == asset_id) + .where( + sqlalchemy.not_( + sqlalchemy.exists().where((AssetInfoTag.asset_info_id == AssetInfo.id) & (AssetInfoTag.tag_name == "missing")) + ) + ) + ) + session.execute( + sqlite.insert(AssetInfoTag) + .from_select( + ["asset_info_id", "tag_name", "origin", "added_at"], + select_rows, + ) + .on_conflict_do_nothing(index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]) + ) + +def remove_missing_tag_for_asset_id( + session: Session, + *, + asset_id: str, +) -> None: + session.execute( + sqlalchemy.delete(AssetInfoTag).where( + AssetInfoTag.asset_info_id.in_(sqlalchemy.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)), + AssetInfoTag.tag_name == "missing", + ) + ) diff --git a/app/assets/hashing.py b/app/assets/hashing.py new file mode 100644 index 000000000..4b72084b9 --- /dev/null +++ b/app/assets/hashing.py @@ -0,0 +1,75 @@ +from blake3 import blake3 +from typing import IO +import os +import asyncio + + +DEFAULT_CHUNK = 8 * 1024 *1024 # 8MB + +# NOTE: this allows hashing different representations of a file-like object +def blake3_hash( + fp: str | IO[bytes], + chunk_size: int = DEFAULT_CHUNK, +) -> str: + """ + Returns a BLAKE3 hex digest for ``fp``, which may be: + - a filename (str/bytes) or PathLike + - an open binary file object + If ``fp`` is a file object, it must be opened in **binary** mode and support + ``read``, ``seek``, and ``tell``. The function will seek to the start before + reading and will attempt to restore the original position afterward. + """ + # duck typing to check if input is a file-like object + if hasattr(fp, "read"): + return _hash_file_obj(fp, chunk_size) + + with open(os.fspath(fp), "rb") as f: + return _hash_file_obj(f, chunk_size) + + +async def blake3_hash_async( + fp: str | IO[bytes], + chunk_size: int = DEFAULT_CHUNK, +) -> str: + """Async wrapper for ``blake3_hash_sync``. + Uses a worker thread so the event loop remains responsive. + """ + # If it is a path, open inside the worker thread to keep I/O off the loop. + if hasattr(fp, "read"): + return await asyncio.to_thread(blake3_hash, fp, chunk_size) + + def _worker() -> str: + with open(os.fspath(fp), "rb") as f: + return _hash_file_obj(f, chunk_size) + + return await asyncio.to_thread(_worker) + + +def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str: + """ + Hash an already-open binary file object by streaming in chunks. + - Seeks to the beginning before reading (if supported). + - Restores the original position afterward (if tell/seek are supported). + """ + if chunk_size <= 0: + chunk_size = DEFAULT_CHUNK + + # in case file object is already open and not at the beginning, track so can be restored after hashing + orig_pos = file_obj.tell() + + try: + # seek to the beginning before reading + if orig_pos != 0: + file_obj.seek(0) + + h = blake3() + while True: + chunk = file_obj.read(chunk_size) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + finally: + # restore original position in file object, if needed + if orig_pos != 0: + file_obj.seek(orig_pos) diff --git a/app/assets/helpers.py b/app/assets/helpers.py new file mode 100644 index 000000000..08b465b5a --- /dev/null +++ b/app/assets/helpers.py @@ -0,0 +1,217 @@ +import contextlib +import os +from aiohttp import web +from datetime import datetime, timezone +from pathlib import Path +from typing import Literal, Any + +import folder_paths + + +RootType = Literal["models", "input", "output"] +ALLOWED_ROOTS: tuple[RootType, ...] = ("models", "input", "output") + +def get_query_dict(request: web.Request) -> dict[str, Any]: + """ + Gets a dictionary of query parameters from the request. + + 'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic. + """ + query_dict = { + key: request.query.getall(key) if len(request.query.getall(key)) > 1 else request.query.get(key) + for key in request.query.keys() + } + return query_dict + +def list_tree(base_dir: str) -> list[str]: + out: list[str] = [] + base_abs = os.path.abspath(base_dir) + if not os.path.isdir(base_abs): + return out + for dirpath, _subdirs, filenames in os.walk(base_abs, topdown=True, followlinks=False): + for name in filenames: + out.append(os.path.abspath(os.path.join(dirpath, name))) + return out + +def prefixes_for_root(root: RootType) -> list[str]: + if root == "models": + bases: list[str] = [] + for _bucket, paths in get_comfy_models_folders(): + bases.extend(paths) + return [os.path.abspath(p) for p in bases] + if root == "input": + return [os.path.abspath(folder_paths.get_input_directory())] + if root == "output": + return [os.path.abspath(folder_paths.get_output_directory())] + return [] + +def escape_like_prefix(s: str, escape: str = "!") -> tuple[str, str]: + """Escapes %, _ and the escape char itself in a LIKE prefix. + Returns (escaped_prefix, escape_char). Caller should append '%' and pass escape=escape_char to .like(). + """ + s = s.replace(escape, escape + escape) # escape the escape char first + s = s.replace("%", escape + "%").replace("_", escape + "_") # escape LIKE wildcards + return s, escape + +def fast_asset_file_check( + *, + mtime_db: int | None, + size_db: int | None, + stat_result: os.stat_result, +) -> bool: + if mtime_db is None: + return False + actual_mtime_ns = getattr(stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000)) + if int(mtime_db) != int(actual_mtime_ns): + return False + sz = int(size_db or 0) + if sz > 0: + return int(stat_result.st_size) == sz + return True + +def utcnow() -> datetime: + """Naive UTC timestamp (no tzinfo). We always treat DB datetimes as UTC.""" + return datetime.now(timezone.utc).replace(tzinfo=None) + +def get_comfy_models_folders() -> list[tuple[str, list[str]]]: + """Build a list of (folder_name, base_paths[]) categories that are configured for model locations. + + We trust `folder_paths.folder_names_and_paths` and include a category if + *any* of its base paths lies under the Comfy `models_dir`. + """ + targets: list[tuple[str, list[str]]] = [] + models_root = os.path.abspath(folder_paths.models_dir) + for name, values in folder_paths.folder_names_and_paths.items(): + paths, _exts = values[0], values[1] # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI + if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths): + targets.append((name, paths)) + return targets + +def compute_relative_filename(file_path: str) -> str | None: + """ + Return the model's path relative to the last well-known folder (the model category), + using forward slashes, eg: + /.../models/checkpoints/flux/123/flux.safetensors -> "flux/123/flux.safetensors" + /.../models/text_encoders/clip_g.safetensors -> "clip_g.safetensors" + + For non-model paths, returns None. + NOTE: this is a temporary helper, used only for initializing metadata["filename"] field. + """ + try: + root_category, rel_path = get_relative_to_root_category_path_of_asset(file_path) + except ValueError: + return None + + p = Path(rel_path) + parts = [seg for seg in p.parts if seg not in (".", "..", p.anchor)] + if not parts: + return None + + if root_category == "models": + # parts[0] is the category ("checkpoints", "vae", etc) – drop it + inside = parts[1:] if len(parts) > 1 else [parts[0]] + return "/".join(inside) + return "/".join(parts) # input/output: keep all parts + + +def get_relative_to_root_category_path_of_asset(file_path: str) -> tuple[Literal["input", "output", "models"], str]: + """Given an absolute or relative file path, determine which root category the path belongs to: + - 'input' if the file resides under `folder_paths.get_input_directory()` + - 'output' if the file resides under `folder_paths.get_output_directory()` + - 'models' if the file resides under any base path of categories returned by `get_comfy_models_folders()` + + Returns: + (root_category, relative_path_inside_that_root) + For 'models', the relative path is prefixed with the category name: + e.g. ('models', 'vae/test/sub/ae.safetensors') + + Raises: + ValueError: if the path does not belong to input, output, or configured model bases. + """ + fp_abs = os.path.abspath(file_path) + + def _is_within(child: str, parent: str) -> bool: + try: + return os.path.commonpath([child, parent]) == parent + except Exception: + return False + + def _rel(child: str, parent: str) -> str: + return os.path.relpath(os.path.join(os.sep, os.path.relpath(child, parent)), os.sep) + + # 1) input + input_base = os.path.abspath(folder_paths.get_input_directory()) + if _is_within(fp_abs, input_base): + return "input", _rel(fp_abs, input_base) + + # 2) output + output_base = os.path.abspath(folder_paths.get_output_directory()) + if _is_within(fp_abs, output_base): + return "output", _rel(fp_abs, output_base) + + # 3) models (check deepest matching base to avoid ambiguity) + best: tuple[int, str, str] | None = None # (base_len, bucket, rel_inside_bucket) + for bucket, bases in get_comfy_models_folders(): + for b in bases: + base_abs = os.path.abspath(b) + if not _is_within(fp_abs, base_abs): + continue + cand = (len(base_abs), bucket, _rel(fp_abs, base_abs)) + if best is None or cand[0] > best[0]: + best = cand + + if best is not None: + _, bucket, rel_inside = best + combined = os.path.join(bucket, rel_inside) + return "models", os.path.relpath(os.path.join(os.sep, combined), os.sep) + + raise ValueError(f"Path is not within input, output, or configured model bases: {file_path}") + +def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: + """Return a tuple (name, tags) derived from a filesystem path. + + Semantics: + - Root category is determined by `get_relative_to_root_category_path_of_asset`. + - The returned `name` is the base filename with extension from the relative path. + - The returned `tags` are: + [root_category] + parent folders of the relative path (in order) + For 'models', this means: + file '/.../ModelsDir/vae/test_tag/ae.safetensors' + -> root_category='models', some_path='vae/test_tag/ae.safetensors' + -> name='ae.safetensors', tags=['models', 'vae', 'test_tag'] + + Raises: + ValueError: if the path does not belong to input, output, or configured model bases. + """ + root_category, some_path = get_relative_to_root_category_path_of_asset(file_path) + p = Path(some_path) + parent_parts = [part for part in p.parent.parts if part not in (".", "..", p.anchor)] + return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts]))) + +def normalize_tags(tags: list[str] | None) -> list[str]: + """ + Normalize a list of tags by: + - Stripping whitespace and converting to lowercase. + - Removing duplicates. + """ + return [t.strip().lower() for t in (tags or []) if (t or "").strip()] + +def collect_models_files() -> list[str]: + out: list[str] = [] + for folder_name, bases in get_comfy_models_folders(): + rel_files = folder_paths.get_filename_list(folder_name) or [] + for rel_path in rel_files: + abs_path = folder_paths.get_full_path(folder_name, rel_path) + if not abs_path: + continue + abs_path = os.path.abspath(abs_path) + allowed = False + for b in bases: + base_abs = os.path.abspath(b) + with contextlib.suppress(Exception): + if os.path.commonpath([abs_path, base_abs]) == base_abs: + allowed = True + break + if allowed: + out.append(abs_path) + return out diff --git a/app/assets/manager.py b/app/assets/manager.py new file mode 100644 index 000000000..6425e7aa2 --- /dev/null +++ b/app/assets/manager.py @@ -0,0 +1,123 @@ +from typing import Sequence + +from app.database.db import create_session +from app.assets.api import schemas_out +from app.assets.database.queries import ( + asset_exists_by_hash, + fetch_asset_info_asset_and_tags, + list_asset_infos_page, + list_tags_with_usage, +) + + +def _safe_sort_field(requested: str | None) -> str: + if not requested: + return "created_at" + v = requested.lower() + if v in {"name", "created_at", "updated_at", "size", "last_access_time"}: + return v + return "created_at" + + +def asset_exists(asset_hash: str) -> bool: + with create_session() as session: + return asset_exists_by_hash(session, asset_hash=asset_hash) + +def list_assets( + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, + name_contains: str | None = None, + metadata_filter: dict | None = None, + limit: int = 20, + offset: int = 0, + sort: str = "created_at", + order: str = "desc", + owner_id: str = "", +) -> schemas_out.AssetsList: + sort = _safe_sort_field(sort) + order = "desc" if (order or "desc").lower() not in {"asc", "desc"} else order.lower() + + with create_session() as session: + infos, tag_map, total = list_asset_infos_page( + session, + owner_id=owner_id, + include_tags=include_tags, + exclude_tags=exclude_tags, + name_contains=name_contains, + metadata_filter=metadata_filter, + limit=limit, + offset=offset, + sort=sort, + order=order, + ) + + summaries: list[schemas_out.AssetSummary] = [] + for info in infos: + asset = info.asset + tags = tag_map.get(info.id, []) + summaries.append( + schemas_out.AssetSummary( + id=info.id, + name=info.name, + asset_hash=asset.hash if asset else None, + size=int(asset.size_bytes) if asset else None, + mime_type=asset.mime_type if asset else None, + tags=tags, + preview_url=f"/api/assets/{info.id}/content", + created_at=info.created_at, + updated_at=info.updated_at, + last_access_time=info.last_access_time, + ) + ) + + return schemas_out.AssetsList( + assets=summaries, + total=total, + has_more=(offset + len(summaries)) < total, + ) + +def get_asset(asset_info_id: str, owner_id: str = "") -> schemas_out.AssetDetail: + with create_session() as session: + res = fetch_asset_info_asset_and_tags(session, asset_info_id=asset_info_id, owner_id=owner_id) + if not res: + raise ValueError(f"AssetInfo {asset_info_id} not found") + info, asset, tag_names = res + preview_id = info.preview_id + + return schemas_out.AssetDetail( + id=info.id, + name=info.name, + asset_hash=asset.hash if asset else None, + size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None, + mime_type=asset.mime_type if asset else None, + tags=tag_names, + user_metadata=info.user_metadata or {}, + preview_id=preview_id, + created_at=info.created_at, + last_access_time=info.last_access_time, + ) + +def list_tags( + prefix: str | None = None, + limit: int = 100, + offset: int = 0, + order: str = "count_desc", + include_zero: bool = True, + owner_id: str = "", +) -> schemas_out.TagsList: + limit = max(1, min(1000, limit)) + offset = max(0, offset) + + with create_session() as session: + rows, total = list_tags_with_usage( + session, + prefix=prefix, + limit=limit, + offset=offset, + include_zero=include_zero, + order=order, + owner_id=owner_id, + ) + + tags = [schemas_out.TagUsage(name=name, count=count, type=tag_type) for (name, tag_type, count) in rows] + return schemas_out.TagsList(tags=tags, total=total, has_more=(offset + len(tags)) < total) diff --git a/app/assets/scanner.py b/app/assets/scanner.py new file mode 100644 index 000000000..a16e41d94 --- /dev/null +++ b/app/assets/scanner.py @@ -0,0 +1,229 @@ +import contextlib +import time +import logging +import os +import sqlalchemy + +import folder_paths +from app.database.db import create_session, dependencies_available +from app.assets.helpers import ( + collect_models_files, compute_relative_filename, fast_asset_file_check, get_name_and_tags_from_asset_path, + list_tree,prefixes_for_root, escape_like_prefix, + RootType +) +from app.assets.database.tags import add_missing_tag_for_asset_id, ensure_tags_exist, remove_missing_tag_for_asset_id +from app.assets.database.bulk_ops import seed_from_paths_batch +from app.assets.database.models import Asset, AssetCacheState, AssetInfo + + +def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None: + """ + Scan the given roots and seed the assets into the database. + """ + if not dependencies_available(): + if enable_logging: + logging.warning("Database dependencies not available, skipping assets scan") + return + t_start = time.perf_counter() + created = 0 + skipped_existing = 0 + paths: list[str] = [] + try: + existing_paths: set[str] = set() + for r in roots: + try: + survivors: set[str] = _fast_db_consistency_pass(r, collect_existing_paths=True, update_missing_tags=True) + if survivors: + existing_paths.update(survivors) + except Exception as e: + logging.exception("fast DB scan failed for %s: %s", r, e) + + if "models" in roots: + paths.extend(collect_models_files()) + if "input" in roots: + paths.extend(list_tree(folder_paths.get_input_directory())) + if "output" in roots: + paths.extend(list_tree(folder_paths.get_output_directory())) + + specs: list[dict] = [] + tag_pool: set[str] = set() + for p in paths: + abs_p = os.path.abspath(p) + if abs_p in existing_paths: + skipped_existing += 1 + continue + try: + stat_p = os.stat(abs_p, follow_symlinks=False) + except OSError: + continue + # skip empty files + if not stat_p.st_size: + continue + name, tags = get_name_and_tags_from_asset_path(abs_p) + specs.append( + { + "abs_path": abs_p, + "size_bytes": stat_p.st_size, + "mtime_ns": getattr(stat_p, "st_mtime_ns", int(stat_p.st_mtime * 1_000_000_000)), + "info_name": name, + "tags": tags, + "fname": compute_relative_filename(abs_p), + } + ) + for t in tags: + tag_pool.add(t) + # if no file specs, nothing to do + if not specs: + return + with create_session() as sess: + if tag_pool: + ensure_tags_exist(sess, tag_pool, tag_type="user") + + result = seed_from_paths_batch(sess, specs=specs, owner_id="") + created += result["inserted_infos"] + sess.commit() + finally: + if enable_logging: + logging.info( + "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)", + roots, + time.perf_counter() - t_start, + created, + skipped_existing, + len(paths), + ) + + +def _fast_db_consistency_pass( + root: RootType, + *, + collect_existing_paths: bool = False, + update_missing_tags: bool = False, +) -> set[str] | None: + """Fast DB+FS pass for a root: + - Toggle needs_verify per state using fast check + - For hashed assets with at least one fast-ok state in this root: delete stale missing states + - For seed assets with all states missing: delete Asset and its AssetInfos + - Optionally add/remove 'missing' tags based on fast-ok in this root + - Optionally return surviving absolute paths + """ + prefixes = prefixes_for_root(root) + if not prefixes: + return set() if collect_existing_paths else None + + conds = [] + for p in prefixes: + base = os.path.abspath(p) + if not base.endswith(os.sep): + base += os.sep + escaped, esc = escape_like_prefix(base) + conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc)) + + with create_session() as sess: + rows = ( + sess.execute( + sqlalchemy.select( + AssetCacheState.id, + AssetCacheState.file_path, + AssetCacheState.mtime_ns, + AssetCacheState.needs_verify, + AssetCacheState.asset_id, + Asset.hash, + Asset.size_bytes, + ) + .join(Asset, Asset.id == AssetCacheState.asset_id) + .where(sqlalchemy.or_(*conds)) + .order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc()) + ) + ).all() + + by_asset: dict[str, dict] = {} + for sid, fp, mtime_db, needs_verify, aid, a_hash, a_size in rows: + acc = by_asset.get(aid) + if acc is None: + acc = {"hash": a_hash, "size_db": int(a_size or 0), "states": []} + by_asset[aid] = acc + + fast_ok = False + try: + exists = True + fast_ok = fast_asset_file_check( + mtime_db=mtime_db, + size_db=acc["size_db"], + stat_result=os.stat(fp, follow_symlinks=True), + ) + except FileNotFoundError: + exists = False + except OSError: + exists = False + + acc["states"].append({ + "sid": sid, + "fp": fp, + "exists": exists, + "fast_ok": fast_ok, + "needs_verify": bool(needs_verify), + }) + + to_set_verify: list[int] = [] + to_clear_verify: list[int] = [] + stale_state_ids: list[int] = [] + survivors: set[str] = set() + + for aid, acc in by_asset.items(): + a_hash = acc["hash"] + states = acc["states"] + any_fast_ok = any(s["fast_ok"] for s in states) + all_missing = all(not s["exists"] for s in states) + + for s in states: + if not s["exists"]: + continue + if s["fast_ok"] and s["needs_verify"]: + to_clear_verify.append(s["sid"]) + if not s["fast_ok"] and not s["needs_verify"]: + to_set_verify.append(s["sid"]) + + if a_hash is None: + if states and all_missing: # remove seed Asset completely, if no valid AssetCache exists + sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id == aid)) + asset = sess.get(Asset, aid) + if asset: + sess.delete(asset) + else: + for s in states: + if s["exists"]: + survivors.add(os.path.abspath(s["fp"])) + continue + + if any_fast_ok: # if Asset has at least one valid AssetCache record, remove any invalid AssetCache records + for s in states: + if not s["exists"]: + stale_state_ids.append(s["sid"]) + if update_missing_tags: + with contextlib.suppress(Exception): + remove_missing_tag_for_asset_id(sess, asset_id=aid) + elif update_missing_tags: + with contextlib.suppress(Exception): + add_missing_tag_for_asset_id(sess, asset_id=aid, origin="automatic") + + for s in states: + if s["exists"]: + survivors.add(os.path.abspath(s["fp"])) + + if stale_state_ids: + sess.execute(sqlalchemy.delete(AssetCacheState).where(AssetCacheState.id.in_(stale_state_ids))) + if to_set_verify: + sess.execute( + sqlalchemy.update(AssetCacheState) + .where(AssetCacheState.id.in_(to_set_verify)) + .values(needs_verify=True) + ) + if to_clear_verify: + sess.execute( + sqlalchemy.update(AssetCacheState) + .where(AssetCacheState.id.in_(to_clear_verify)) + .values(needs_verify=False) + ) + sess.commit() + return survivors if collect_existing_paths else None diff --git a/app/database/models.py b/app/database/models.py index 6facfb8f2..e7572677a 100644 --- a/app/database/models.py +++ b/app/database/models.py @@ -1,14 +1,21 @@ -from sqlalchemy.orm import declarative_base +from typing import Any +from datetime import datetime +from sqlalchemy.orm import DeclarativeBase -Base = declarative_base() +class Base(DeclarativeBase): + pass - -def to_dict(obj): +def to_dict(obj: Any, include_none: bool = False) -> dict[str, Any]: fields = obj.__table__.columns.keys() - return { - field: (val.to_dict() if hasattr(val, "to_dict") else val) - for field in fields - if (val := getattr(obj, field)) - } + out: dict[str, Any] = {} + for field in fields: + val = getattr(obj, field) + if val is None and not include_none: + continue + if isinstance(val, datetime): + out[field] = val.isoformat() + else: + out[field] = val + return out # TODO: Define models here diff --git a/app/subgraph_manager.py b/app/subgraph_manager.py index dbe404541..6a8f586a4 100644 --- a/app/subgraph_manager.py +++ b/app/subgraph_manager.py @@ -10,6 +10,7 @@ import hashlib class Source: custom_node = "custom_node" + templates = "templates" class SubgraphEntry(TypedDict): source: str @@ -38,6 +39,18 @@ class CustomNodeSubgraphEntryInfo(TypedDict): class SubgraphManager: def __init__(self): self.cached_custom_node_subgraphs: dict[SubgraphEntry] | None = None + self.cached_blueprint_subgraphs: dict[SubgraphEntry] | None = None + + def _create_entry(self, file: str, source: str, node_pack: str) -> tuple[str, SubgraphEntry]: + """Create a subgraph entry from a file path. Expects normalized path (forward slashes).""" + entry_id = hashlib.sha256(f"{source}{file}".encode()).hexdigest() + entry: SubgraphEntry = { + "source": source, + "name": os.path.splitext(os.path.basename(file))[0], + "path": file, + "info": {"node_pack": node_pack}, + } + return entry_id, entry async def load_entry_data(self, entry: SubgraphEntry): with open(entry['path'], 'r') as f: @@ -60,53 +73,60 @@ class SubgraphManager: return entries async def get_custom_node_subgraphs(self, loadedModules, force_reload=False): - # if not forced to reload and cached, return cache + """Load subgraphs from custom nodes.""" if not force_reload and self.cached_custom_node_subgraphs is not None: return self.cached_custom_node_subgraphs - # Load subgraphs from custom nodes - subfolder = "subgraphs" - subgraphs_dict: dict[SubgraphEntry] = {} + subgraphs_dict: dict[SubgraphEntry] = {} for folder in folder_paths.get_folder_paths("custom_nodes"): - pattern = os.path.join(folder, f"*/{subfolder}/*.json") - matched_files = glob.glob(pattern) - for file in matched_files: - # replace backslashes with forward slashes + pattern = os.path.join(folder, "*/subgraphs/*.json") + for file in glob.glob(pattern): file = file.replace('\\', '/') - info: CustomNodeSubgraphEntryInfo = { - "node_pack": "custom_nodes." + file.split('/')[-3] - } - source = Source.custom_node - # hash source + path to make sure id will be as unique as possible, but - # reproducible across backend reloads - id = hashlib.sha256(f"{source}{file}".encode()).hexdigest() - entry: SubgraphEntry = { - "source": Source.custom_node, - "name": os.path.splitext(os.path.basename(file))[0], - "path": file, - "info": info, - } - subgraphs_dict[id] = entry + node_pack = "custom_nodes." + file.split('/')[-3] + entry_id, entry = self._create_entry(file, Source.custom_node, node_pack) + subgraphs_dict[entry_id] = entry + self.cached_custom_node_subgraphs = subgraphs_dict return subgraphs_dict - async def get_custom_node_subgraph(self, id: str, loadedModules): - subgraphs = await self.get_custom_node_subgraphs(loadedModules) - entry: SubgraphEntry = subgraphs.get(id, None) - if entry is not None and entry.get('data', None) is None: + async def get_blueprint_subgraphs(self, force_reload=False): + """Load subgraphs from the blueprints directory.""" + if not force_reload and self.cached_blueprint_subgraphs is not None: + return self.cached_blueprint_subgraphs + + subgraphs_dict: dict[SubgraphEntry] = {} + blueprints_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'blueprints') + + if os.path.exists(blueprints_dir): + for file in glob.glob(os.path.join(blueprints_dir, "*.json")): + file = file.replace('\\', '/') + entry_id, entry = self._create_entry(file, Source.templates, "comfyui") + subgraphs_dict[entry_id] = entry + + self.cached_blueprint_subgraphs = subgraphs_dict + return subgraphs_dict + + async def get_all_subgraphs(self, loadedModules, force_reload=False): + """Get all subgraphs from all sources (custom nodes and blueprints).""" + custom_node_subgraphs = await self.get_custom_node_subgraphs(loadedModules, force_reload) + blueprint_subgraphs = await self.get_blueprint_subgraphs(force_reload) + return {**custom_node_subgraphs, **blueprint_subgraphs} + + async def get_subgraph(self, id: str, loadedModules): + """Get a specific subgraph by ID from any source.""" + entry = (await self.get_all_subgraphs(loadedModules)).get(id) + if entry is not None and entry.get('data') is None: await self.load_entry_data(entry) return entry def add_routes(self, routes, loadedModules): @routes.get("/global_subgraphs") async def get_global_subgraphs(request): - subgraphs_dict = await self.get_custom_node_subgraphs(loadedModules) - # NOTE: we may want to include other sources of global subgraphs such as templates in the future; - # that's the reasoning for the current implementation + subgraphs_dict = await self.get_all_subgraphs(loadedModules) return web.json_response(await self.sanitize_entries(subgraphs_dict, remove_data=True)) @routes.get("/global_subgraphs/{id}") async def get_global_subgraph(request): id = request.match_info.get("id", None) - subgraph = await self.get_custom_node_subgraph(id, loadedModules) + subgraph = await self.get_subgraph(id, loadedModules) return web.json_response(await self.sanitize_entry(subgraph)) diff --git a/blueprints/put_blueprints_here b/blueprints/put_blueprints_here new file mode 100644 index 000000000..e69de29bb diff --git a/comfy/cli_args.py b/comfy/cli_args.py index dae9a895d..1716c3de7 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -231,6 +231,7 @@ database_default_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "user", "comfyui.db") ) parser.add_argument("--database-url", type=str, default=f"sqlite:///{database_default_path}", help="Specify the database URL, e.g. for an in-memory database you can use 'sqlite:///:memory:'.") +parser.add_argument("--disable-assets-autoscan", action="store_true", help="Disable asset scanning on startup for database synchronization.") if comfy.options.args_parsing: args = parser.parse_args() diff --git a/comfy/clip_model.py b/comfy/clip_model.py index e88872728..d7d3f994c 100644 --- a/comfy/clip_model.py +++ b/comfy/clip_model.py @@ -1,6 +1,7 @@ import torch from comfy.ldm.modules.attention import optimized_attention_for_device import comfy.ops +import math def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True): image = image[:, :, :, :3] if image.shape[3] > 3 else image @@ -21,6 +22,39 @@ def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], s image = torch.clip((255. * image), 0, 255).round() / 255.0 return (image - mean.view([3,1,1])) / std.view([3,1,1]) +def siglip2_flex_calc_resolution(oh, ow, patch_size, max_num_patches, eps=1e-5): + def scale_dim(size, scale): + scaled = math.ceil(size * scale / patch_size) * patch_size + return max(patch_size, int(scaled)) + + # Binary search for optimal scale + lo, hi = eps / 10, 100.0 + while hi - lo >= eps: + mid = (lo + hi) / 2 + h, w = scale_dim(oh, mid), scale_dim(ow, mid) + if (h // patch_size) * (w // patch_size) <= max_num_patches: + lo = mid + else: + hi = mid + + return scale_dim(oh, lo), scale_dim(ow, lo) + +def siglip2_preprocess(image, size, patch_size, num_patches, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], crop=True): + if size > 0: + return clip_preprocess(image, size=size, mean=mean, std=std, crop=crop) + + image = image[:, :, :, :3] if image.shape[3] > 3 else image + mean = torch.tensor(mean, device=image.device, dtype=image.dtype) + std = torch.tensor(std, device=image.device, dtype=image.dtype) + image = image.movedim(-1, 1) + + b, c, h, w = image.shape + h, w = siglip2_flex_calc_resolution(h, w, patch_size, num_patches) + + image = torch.nn.functional.interpolate(image, size=(h, w), mode="bilinear", antialias=True) + image = torch.clip((255. * image), 0, 255).round() / 255.0 + return (image - mean.view([3, 1, 1])) / std.view([3, 1, 1]) + class CLIPAttention(torch.nn.Module): def __init__(self, embed_dim, heads, dtype, device, operations): super().__init__() @@ -175,6 +209,27 @@ class CLIPTextModel(torch.nn.Module): out = self.text_projection(x[2]) return (x[0], x[1], out, x[2]) +def siglip2_pos_embed(embed_weight, embeds, orig_shape): + embed_weight_len = round(embed_weight.shape[0] ** 0.5) + embed_weight = comfy.ops.cast_to_input(embed_weight, embeds).movedim(1, 0).reshape(1, -1, embed_weight_len, embed_weight_len) + embed_weight = torch.nn.functional.interpolate(embed_weight, size=orig_shape, mode="bilinear", align_corners=False, antialias=True) + embed_weight = embed_weight.reshape(-1, embed_weight.shape[-2] * embed_weight.shape[-1]).movedim(0, 1) + return embeds + embed_weight + +class Siglip2Embeddings(torch.nn.Module): + def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", num_patches=None, dtype=None, device=None, operations=None): + super().__init__() + self.patch_embedding = operations.Linear(num_channels * patch_size * patch_size, embed_dim, dtype=dtype, device=device) + self.position_embedding = operations.Embedding(num_patches, embed_dim, dtype=dtype, device=device) + self.patch_size = patch_size + + def forward(self, pixel_values): + b, c, h, w = pixel_values.shape + img = pixel_values.movedim(1, -1).reshape(b, h // self.patch_size, self.patch_size, w // self.patch_size, self.patch_size, c) + img = img.permute(0, 1, 3, 2, 4, 5) + img = img.reshape(b, img.shape[1] * img.shape[2], -1) + img = self.patch_embedding(img) + return siglip2_pos_embed(self.position_embedding.weight, img, (h // self.patch_size, w // self.patch_size)) class CLIPVisionEmbeddings(torch.nn.Module): def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", dtype=None, device=None, operations=None): @@ -218,8 +273,11 @@ class CLIPVision(torch.nn.Module): intermediate_activation = config_dict["hidden_act"] model_type = config_dict["model_type"] - self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations) - if model_type == "siglip_vision_model": + if model_type in ["siglip2_vision_model"]: + self.embeddings = Siglip2Embeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, num_patches=config_dict.get("num_patches", None), dtype=dtype, device=device, operations=operations) + else: + self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations) + if model_type in ["siglip_vision_model", "siglip2_vision_model"]: self.pre_layrnorm = lambda a: a self.output_layernorm = True else: diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index d5fc53497..b28bf636c 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -21,6 +21,7 @@ clip_preprocess = comfy.clip_model.clip_preprocess # Prevent some stuff from br IMAGE_ENCODERS = { "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection, "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection, + "siglip2_vision_model": comfy.clip_model.CLIPVisionModelProjection, "dinov2": comfy.image_encoders.dino2.Dinov2Model, } @@ -32,9 +33,10 @@ class ClipVisionModel(): self.image_size = config.get("image_size", 224) self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073]) self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711]) - model_type = config.get("model_type", "clip_vision_model") - model_class = IMAGE_ENCODERS.get(model_type) - if model_type == "siglip_vision_model": + self.model_type = config.get("model_type", "clip_vision_model") + self.config = config.copy() + model_class = IMAGE_ENCODERS.get(self.model_type) + if self.model_type == "siglip_vision_model": self.return_all_hidden_states = True else: self.return_all_hidden_states = False @@ -55,12 +57,16 @@ class ClipVisionModel(): def encode_image(self, image, crop=True): comfy.model_management.load_model_gpu(self.patcher) - pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float() + if self.model_type == "siglip2_vision_model": + pixel_values = comfy.clip_model.siglip2_preprocess(image.to(self.load_device), size=self.image_size, patch_size=self.config.get("patch_size", 16), num_patches=self.config.get("num_patches", 256), mean=self.image_mean, std=self.image_std, crop=crop).float() + else: + pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float() out = self.model(pixel_values=pixel_values, intermediate_output='all' if self.return_all_hidden_states else -2) outputs = Output() outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device()) outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device()) + outputs["image_sizes"] = [pixel_values.shape[1:]] * pixel_values.shape[0] if self.return_all_hidden_states: all_hs = out[1].to(comfy.model_management.intermediate_device()) outputs["penultimate_hidden_states"] = all_hs[:, -2] @@ -107,10 +113,14 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False): elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd: embed_shape = sd["vision_model.embeddings.position_embedding.weight"].shape[0] if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152: - if embed_shape == 729: - json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json") - elif embed_shape == 1024: - json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json") + patch_embedding_shape = sd["vision_model.embeddings.patch_embedding.weight"].shape + if len(patch_embedding_shape) == 2: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip2_base_naflex.json") + else: + if embed_shape == 729: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json") + elif embed_shape == 1024: + json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json") elif embed_shape == 577: if "multi_modal_projector.linear_1.bias" in sd: json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json") diff --git a/comfy/clip_vision_siglip2_base_naflex.json b/comfy/clip_vision_siglip2_base_naflex.json new file mode 100644 index 000000000..6f6b99bd6 --- /dev/null +++ b/comfy/clip_vision_siglip2_base_naflex.json @@ -0,0 +1,14 @@ +{ + "num_channels": 3, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 1152, + "image_size": -1, + "intermediate_size": 4304, + "model_type": "siglip2_vision_model", + "num_attention_heads": 16, + "num_hidden_layers": 27, + "patch_size": 16, + "num_patches": 256, + "image_mean": [0.5, 0.5, 0.5], + "image_std": [0.5, 0.5, 0.5] +} diff --git a/comfy/float.py b/comfy/float.py index 521316fd2..88c47cd80 100644 --- a/comfy/float.py +++ b/comfy/float.py @@ -65,3 +65,147 @@ def stochastic_rounding(value, dtype, seed=0): return output return value.to(dtype=dtype) + + +# TODO: improve this? +def stochastic_float_to_fp4_e2m1(x, generator): + orig_shape = x.shape + sign = torch.signbit(x).to(torch.uint8) + + exp = torch.floor(torch.log2(x.abs()) + 1.0).clamp(0, 3) + x += (torch.rand(x.size(), dtype=x.dtype, layout=x.layout, device=x.device, generator=generator) - 0.5) * (2 ** (exp - 2.0)) * 1.25 + + x = x.abs() + exp = torch.floor(torch.log2(x) + 1.1925).clamp(0, 3) + + mantissa = torch.where( + exp > 0, + (x / (2.0 ** (exp - 1)) - 1.0) * 2.0, + (x * 2.0), + out=x + ).round().to(torch.uint8) + del x + + exp = exp.to(torch.uint8) + + fp4 = (sign << 3) | (exp << 1) | mantissa + del sign, exp, mantissa + + fp4_flat = fp4.view(-1) + packed = (fp4_flat[0::2] << 4) | fp4_flat[1::2] + return packed.reshape(list(orig_shape)[:-1] + [-1]) + + +def to_blocked(input_matrix, flatten: bool = True) -> torch.Tensor: + """ + Rearrange a large matrix by breaking it into blocks and applying the rearrangement pattern. + See: + https://docs.nvidia.com/cuda/cublas/index.html#d-block-scaling-factors-layout + + Args: + input_matrix: Input tensor of shape (H, W) + Returns: + Rearranged tensor of shape (32*ceil_div(H,128), 16*ceil_div(W,4)) + """ + + def ceil_div(a, b): + return (a + b - 1) // b + + rows, cols = input_matrix.shape + n_row_blocks = ceil_div(rows, 128) + n_col_blocks = ceil_div(cols, 4) + + # Calculate the padded shape + padded_rows = n_row_blocks * 128 + padded_cols = n_col_blocks * 4 + + padded = input_matrix + if (rows, cols) != (padded_rows, padded_cols): + padded = torch.zeros( + (padded_rows, padded_cols), + device=input_matrix.device, + dtype=input_matrix.dtype, + ) + padded[:rows, :cols] = input_matrix + + # Rearrange the blocks + blocks = padded.view(n_row_blocks, 128, n_col_blocks, 4).permute(0, 2, 1, 3) + rearranged = blocks.reshape(-1, 4, 32, 4).transpose(1, 2).reshape(-1, 32, 16) + if flatten: + return rearranged.flatten() + + return rearranged.reshape(padded_rows, padded_cols) + + +def stochastic_round_quantize_nvfp4_block(x, per_tensor_scale, generator): + F4_E2M1_MAX = 6.0 + F8_E4M3_MAX = 448.0 + + orig_shape = x.shape + + block_size = 16 + + x = x.reshape(orig_shape[0], -1, block_size) + scaled_block_scales_fp8 = torch.clamp(((torch.amax(torch.abs(x), dim=-1)) / F4_E2M1_MAX) / per_tensor_scale.to(x.dtype), max=F8_E4M3_MAX).to(torch.float8_e4m3fn) + x = x / (per_tensor_scale.to(x.dtype) * scaled_block_scales_fp8.to(x.dtype)).unsqueeze(-1) + + x = x.view(orig_shape).nan_to_num() + data_lp = stochastic_float_to_fp4_e2m1(x, generator=generator) + return data_lp, scaled_block_scales_fp8 + + +def stochastic_round_quantize_nvfp4(x, per_tensor_scale, pad_16x, seed=0): + def roundup(x: int, multiple: int) -> int: + """Round up x to the nearest multiple.""" + return ((x + multiple - 1) // multiple) * multiple + + generator = torch.Generator(device=x.device) + generator.manual_seed(seed) + + # Handle padding + if pad_16x: + rows, cols = x.shape + padded_rows = roundup(rows, 16) + padded_cols = roundup(cols, 16) + if padded_rows != rows or padded_cols != cols: + x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows)) + + x, blocked_scaled = stochastic_round_quantize_nvfp4_block(x, per_tensor_scale, generator) + return x, to_blocked(blocked_scaled, flatten=False) + + +def stochastic_round_quantize_nvfp4_by_block(x, per_tensor_scale, pad_16x, seed=0, block_size=4096 * 4096): + def roundup(x: int, multiple: int) -> int: + """Round up x to the nearest multiple.""" + return ((x + multiple - 1) // multiple) * multiple + + orig_shape = x.shape + + # Handle padding + if pad_16x: + rows, cols = x.shape + padded_rows = roundup(rows, 16) + padded_cols = roundup(cols, 16) + if padded_rows != rows or padded_cols != cols: + x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows)) + # Note: We update orig_shape because the output tensor logic below assumes x.shape matches + # what we want to produce. If we pad here, we want the padded output. + orig_shape = x.shape + + orig_shape = list(orig_shape) + + output_fp4 = torch.empty(orig_shape[:-1] + [orig_shape[-1] // 2], dtype=torch.uint8, device=x.device) + output_block = torch.empty(orig_shape[:-1] + [orig_shape[-1] // 16], dtype=torch.float8_e4m3fn, device=x.device) + + generator = torch.Generator(device=x.device) + generator.manual_seed(seed) + + num_slices = max(1, (x.numel() / block_size)) + slice_size = max(1, (round(x.shape[0] / num_slices))) + + for i in range(0, x.shape[0], slice_size): + fp4, block = stochastic_round_quantize_nvfp4_block(x[i: i + slice_size], per_tensor_scale, generator=generator) + output_fp4[i:i + slice_size].copy_(fp4) + output_block[i:i + slice_size].copy_(block) + + return output_fp4, to_blocked(output_block, flatten=False) diff --git a/comfy/ldm/hunyuan_video/upsampler.py b/comfy/ldm/hunyuan_video/upsampler.py index d9e76922f..51b6d1da8 100644 --- a/comfy/ldm/hunyuan_video/upsampler.py +++ b/comfy/ldm/hunyuan_video/upsampler.py @@ -3,8 +3,8 @@ import torch.nn as nn import torch.nn.functional as F from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, VideoConv3d from comfy.ldm.hunyuan_video.vae_refiner import RMS_norm -import model_management -import model_patcher +import comfy.model_management +import comfy.model_patcher class SRResidualCausalBlock3D(nn.Module): def __init__(self, channels: int): @@ -103,13 +103,13 @@ UPSAMPLERS = { class HunyuanVideo15SRModel(): def __init__(self, model_type, config): - self.load_device = model_management.vae_device() - offload_device = model_management.vae_offload_device() - self.dtype = model_management.vae_dtype(self.load_device) + self.load_device = comfy.model_management.vae_device() + offload_device = comfy.model_management.vae_offload_device() + self.dtype = comfy.model_management.vae_dtype(self.load_device) self.model_class = UPSAMPLERS.get(model_type) self.model = self.model_class(**config).eval() - self.patcher = model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) + self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) def load_sd(self, sd): return self.model.load_state_dict(sd, strict=True) @@ -118,5 +118,5 @@ class HunyuanVideo15SRModel(): return self.model.state_dict() def resample_latent(self, latent): - model_management.load_model_gpu(self.patcher) + comfy.model_management.load_model_gpu(self.patcher) return self.model(latent.to(self.load_device)) diff --git a/comfy/ldm/lightricks/av_model.py b/comfy/ldm/lightricks/av_model.py index 759535501..c12ace241 100644 --- a/comfy/ldm/lightricks/av_model.py +++ b/comfy/ldm/lightricks/av_model.py @@ -11,6 +11,69 @@ from comfy.ldm.lightricks.model import ( from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier import comfy.ldm.common_dit +class CompressedTimestep: + """Store video timestep embeddings in compressed form using per-frame indexing.""" + __slots__ = ('data', 'batch_size', 'num_frames', 'patches_per_frame', 'feature_dim') + + def __init__(self, tensor: torch.Tensor, patches_per_frame: int): + """ + tensor: [batch_size, num_tokens, feature_dim] tensor where num_tokens = num_frames * patches_per_frame + patches_per_frame: Number of spatial patches per frame (height * width in latent space) + """ + self.batch_size, num_tokens, self.feature_dim = tensor.shape + + # Check if compression is valid (num_tokens must be divisible by patches_per_frame) + if num_tokens % patches_per_frame == 0 and num_tokens >= patches_per_frame: + self.patches_per_frame = patches_per_frame + self.num_frames = num_tokens // patches_per_frame + + # Reshape to [batch, frames, patches_per_frame, feature_dim] and store one value per frame + # All patches in a frame are identical, so we only keep the first one + reshaped = tensor.view(self.batch_size, self.num_frames, patches_per_frame, self.feature_dim) + self.data = reshaped[:, :, 0, :].contiguous() # [batch, frames, feature_dim] + else: + # Not divisible or too small - store directly without compression + self.patches_per_frame = 1 + self.num_frames = num_tokens + self.data = tensor + + def expand(self): + """Expand back to original tensor.""" + if self.patches_per_frame == 1: + return self.data + + # [batch, frames, feature_dim] -> [batch, frames, patches_per_frame, feature_dim] -> [batch, tokens, feature_dim] + expanded = self.data.unsqueeze(2).expand(self.batch_size, self.num_frames, self.patches_per_frame, self.feature_dim) + return expanded.reshape(self.batch_size, -1, self.feature_dim) + + def expand_for_computation(self, scale_shift_table: torch.Tensor, batch_size: int, indices: slice = slice(None, None)): + """Compute ada values on compressed per-frame data, then expand spatially.""" + num_ada_params = scale_shift_table.shape[0] + + # No compression - compute directly + if self.patches_per_frame == 1: + num_tokens = self.data.shape[1] + dim_per_param = self.feature_dim // num_ada_params + reshaped = self.data.reshape(batch_size, num_tokens, num_ada_params, dim_per_param)[:, :, indices, :] + table_values = scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to(device=self.data.device, dtype=self.data.dtype) + ada_values = (table_values + reshaped).unbind(dim=2) + return ada_values + + # Compressed: compute on per-frame data then expand spatially + # Reshape: [batch, frames, feature_dim] -> [batch, frames, num_ada_params, dim_per_param] + frame_reshaped = self.data.reshape(batch_size, self.num_frames, num_ada_params, -1)[:, :, indices, :] + table_values = scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to( + device=self.data.device, dtype=self.data.dtype + ) + frame_ada = (table_values + frame_reshaped).unbind(dim=2) + + # Expand each ada parameter spatially: [batch, frames, dim] -> [batch, frames, patches, dim] -> [batch, tokens, dim] + return tuple( + frame_val.unsqueeze(2).expand(batch_size, self.num_frames, self.patches_per_frame, -1) + .reshape(batch_size, -1, frame_val.shape[-1]) + for frame_val in frame_ada + ) + class BasicAVTransformerBlock(nn.Module): def __init__( self, @@ -119,6 +182,9 @@ class BasicAVTransformerBlock(nn.Module): def get_ada_values( self, scale_shift_table: torch.Tensor, batch_size: int, timestep: torch.Tensor, indices: slice = slice(None, None) ): + if isinstance(timestep, CompressedTimestep): + return timestep.expand_for_computation(scale_shift_table, batch_size, indices) + num_ada_params = scale_shift_table.shape[0] ada_values = ( @@ -146,10 +212,7 @@ class BasicAVTransformerBlock(nn.Module): gate_timestep, ) - scale_shift_chunks = [t.squeeze(2) for t in scale_shift_ada_values] - gate_ada_values = [t.squeeze(2) for t in gate_ada_values] - - return (*scale_shift_chunks, *gate_ada_values) + return (*scale_shift_ada_values, *gate_ada_values) def forward( self, @@ -543,72 +606,80 @@ class LTXAVModel(LTXVModel): if grid_mask is not None: timestep = timestep[:, grid_mask] - timestep = timestep * self.timestep_scale_multiplier + timestep_scaled = timestep * self.timestep_scale_multiplier + v_timestep, v_embedded_timestep = self.adaln_single( - timestep.flatten(), + timestep_scaled.flatten(), {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) - # Second dimension is 1 or number of tokens (if timestep_per_token) - v_timestep = v_timestep.view(batch_size, -1, v_timestep.shape[-1]) - v_embedded_timestep = v_embedded_timestep.view( - batch_size, -1, v_embedded_timestep.shape[-1] - ) + # Calculate patches_per_frame from orig_shape: [batch, channels, frames, height, width] + # Video tokens are arranged as (frames * height * width), so patches_per_frame = height * width + orig_shape = kwargs.get("orig_shape") + v_patches_per_frame = None + if orig_shape is not None and len(orig_shape) == 5: + # orig_shape[3] = height, orig_shape[4] = width (in latent space) + v_patches_per_frame = orig_shape[3] * orig_shape[4] + + # Reshape to [batch_size, num_tokens, dim] and compress for storage + v_timestep = CompressedTimestep(v_timestep.view(batch_size, -1, v_timestep.shape[-1]), v_patches_per_frame) + v_embedded_timestep = CompressedTimestep(v_embedded_timestep.view(batch_size, -1, v_embedded_timestep.shape[-1]), v_patches_per_frame) # Prepare audio timestep a_timestep = kwargs.get("a_timestep") if a_timestep is not None: - a_timestep = a_timestep * self.timestep_scale_multiplier + a_timestep_scaled = a_timestep * self.timestep_scale_multiplier + a_timestep_flat = a_timestep_scaled.flatten() + timestep_flat = timestep_scaled.flatten() av_ca_factor = self.av_ca_timestep_scale_multiplier / self.timestep_scale_multiplier + # Cross-attention timesteps - compress these too av_ca_audio_scale_shift_timestep, _ = self.av_ca_audio_scale_shift_adaln_single( - a_timestep.flatten(), + a_timestep_flat, {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) av_ca_video_scale_shift_timestep, _ = self.av_ca_video_scale_shift_adaln_single( - timestep.flatten(), + timestep_flat, {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) av_ca_a2v_gate_noise_timestep, _ = self.av_ca_a2v_gate_adaln_single( - timestep.flatten() * av_ca_factor, + timestep_flat * av_ca_factor, {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) av_ca_v2a_gate_noise_timestep, _ = self.av_ca_v2a_gate_adaln_single( - a_timestep.flatten() * av_ca_factor, + a_timestep_flat * av_ca_factor, {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) + # Compress cross-attention timesteps (only video side, audio is too small to benefit) + cross_av_timestep_ss = [ + av_ca_audio_scale_shift_timestep.view(batch_size, -1, av_ca_audio_scale_shift_timestep.shape[-1]), + CompressedTimestep(av_ca_video_scale_shift_timestep.view(batch_size, -1, av_ca_video_scale_shift_timestep.shape[-1]), v_patches_per_frame), # video - compressed + CompressedTimestep(av_ca_a2v_gate_noise_timestep.view(batch_size, -1, av_ca_a2v_gate_noise_timestep.shape[-1]), v_patches_per_frame), # video - compressed + av_ca_v2a_gate_noise_timestep.view(batch_size, -1, av_ca_v2a_gate_noise_timestep.shape[-1]), + ] + a_timestep, a_embedded_timestep = self.audio_adaln_single( - a_timestep.flatten(), + a_timestep_flat, {"resolution": None, "aspect_ratio": None}, batch_size=batch_size, hidden_dtype=hidden_dtype, ) + # Audio timesteps a_timestep = a_timestep.view(batch_size, -1, a_timestep.shape[-1]) - a_embedded_timestep = a_embedded_timestep.view( - batch_size, -1, a_embedded_timestep.shape[-1] - ) - cross_av_timestep_ss = [ - av_ca_audio_scale_shift_timestep, - av_ca_video_scale_shift_timestep, - av_ca_a2v_gate_noise_timestep, - av_ca_v2a_gate_noise_timestep, - ] - cross_av_timestep_ss = list( - [t.view(batch_size, -1, t.shape[-1]) for t in cross_av_timestep_ss] - ) + a_embedded_timestep = a_embedded_timestep.view(batch_size, -1, a_embedded_timestep.shape[-1]) else: - a_timestep = timestep + a_timestep = timestep_scaled a_embedded_timestep = kwargs.get("embedded_timestep") cross_av_timestep_ss = [] @@ -767,6 +838,11 @@ class LTXAVModel(LTXVModel): ax = x[1] v_embedded_timestep = embedded_timestep[0] a_embedded_timestep = embedded_timestep[1] + + # Expand compressed video timestep if needed + if isinstance(v_embedded_timestep, CompressedTimestep): + v_embedded_timestep = v_embedded_timestep.expand() + vx = super()._process_output(vx, v_embedded_timestep, keyframe_idxs, **kwargs) # Process audio output diff --git a/comfy/lora.py b/comfy/lora.py index 2ed0acb9d..e8246bd66 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -322,6 +322,7 @@ def model_lora_keys_unet(model, key_map={}): key_map["diffusion_model.{}".format(key_lora)] = to key_map["transformer.{}".format(key_lora)] = to key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = to + key_map[key_lora] = to if isinstance(model, comfy.model_base.Kandinsky5): for k in sdk: diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 0853b3aec..aff5a50b9 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -237,6 +237,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): else: dit_config["vec_in_dim"] = None + dit_config["num_heads"] = dit_config["hidden_size"] // sum(dit_config["axes_dim"]) + dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.') dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.') if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma diff --git a/comfy/model_management.py b/comfy/model_management.py index 928282092..9d39be7b2 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -22,7 +22,6 @@ from enum import Enum from comfy.cli_args import args, PerformanceFeature import torch import sys -import importlib import platform import weakref import gc @@ -349,15 +348,27 @@ try: except: rocm_version = (6, -1) + def aotriton_supported(gpu_arch): + path = torch.__path__[0] + path = os.path.join(os.path.join(path, "lib"), "aotriton.images") + gfx = set(map(lambda a: a[4:], filter(lambda a: a.startswith("amd-gfx"), os.listdir(path)))) + if gpu_arch in gfx: + return True + if "{}x".format(gpu_arch[:-1]) in gfx: + return True + if "{}xx".format(gpu_arch[:-2]) in gfx: + return True + return False + logging.info("AMD arch: {}".format(arch)) logging.info("ROCm version: {}".format(rocm_version)) if args.use_split_cross_attention == False and args.use_quad_cross_attention == False: - if importlib.util.find_spec('triton') is not None: # AMD efficient attention implementation depends on triton. TODO: better way of detecting if it's compiled in or not. + if aotriton_supported(arch): # AMD efficient attention implementation depends on aotriton. if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950 ENABLE_PYTORCH_ATTENTION = True if rocm_version >= (7, 0): - if any((a in arch) for a in ["gfx1201"]): + if any((a in arch) for a in ["gfx1200", "gfx1201"]): ENABLE_PYTORCH_ATTENTION = True if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4): if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx950"]): # TODO: more arches, "gfx942" gives error on pytorch nightly 2.10 1013 rocm7.0 diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 93d26c690..f6b80a40f 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -718,6 +718,7 @@ class ModelPatcher: continue cast_weight = self.force_cast_weights + m.comfy_force_cast_weights = self.force_cast_weights if lowvram_weight: if hasattr(m, "comfy_cast_weights"): m.weight_function = [] @@ -790,11 +791,12 @@ class ModelPatcher: for param in params: self.pin_weight_to_device("{}.{}".format(n, param)) + usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else "" if lowvram_counter > 0: - logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter)) + logging.info("loaded partially; {} {:.2f} MB loaded, {:.2f} MB offloaded, {:.2f} MB buffer reserved, lowvram patches: {}".format(usable_stat, mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), offload_buffer / (1024 * 1024), patch_counter)) self.model.model_lowvram = True else: - logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load)) + logging.info("loaded completely; {} {:.2f} MB loaded, full load: {}".format(usable_stat, mem_counter / (1024 * 1024), full_load)) self.model.model_lowvram = False if full_load: self.model.to(device_to) diff --git a/comfy/ops.py b/comfy/ops.py index cd536e22d..415c39e92 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -546,7 +546,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec weight_key = f"{prefix}weight" weight = state_dict.pop(weight_key, None) if weight is None: - raise ValueError(f"Missing weight for layer {layer_name}") + logging.warning(f"Missing weight for layer {layer_name}") + return manually_loaded_keys = [weight_key] @@ -624,21 +625,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec missing_keys.remove(key) def state_dict(self, *args, destination=None, prefix="", **kwargs): - sd = super().state_dict(*args, destination=destination, prefix=prefix, **kwargs) - if isinstance(self.weight, QuantizedTensor): - layout_cls = self.weight._layout_cls + if destination is not None: + sd = destination + else: + sd = {} - # Check if it's any FP8 variant (E4M3 or E5M2) - if layout_cls in ("TensorCoreFP8E4M3Layout", "TensorCoreFP8E5M2Layout", "TensorCoreFP8Layout"): - sd["{}weight_scale".format(prefix)] = self.weight._params.scale - elif layout_cls == "TensorCoreNVFP4Layout": - sd["{}weight_scale_2".format(prefix)] = self.weight._params.scale - sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale + if self.bias is not None: + sd["{}bias".format(prefix)] = self.bias + + if isinstance(self.weight, QuantizedTensor): + sd_out = self.weight.state_dict("{}weight".format(prefix)) + for k in sd_out: + sd[k] = sd_out[k] quant_conf = {"format": self.quant_format} if self._full_precision_mm_config: quant_conf["full_precision_matrix_mult"] = True sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8) + + input_scale = getattr(self, 'input_scale', None) + if input_scale is not None: + sd["{}input_scale".format(prefix)] = input_scale + else: + sd["{}weight".format(prefix)] = self.weight return sd def _forward(self, input, weight, bias): @@ -654,29 +663,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec run_every_op() input_shape = input.shape - tensor_3d = input.ndim == 3 - - if self._full_precision_mm or self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0: - return self.forward_comfy_cast_weights(input, *args, **kwargs) + reshaped_3d = False if (getattr(self, 'layout_type', None) is not None and - not isinstance(input, QuantizedTensor)): + not isinstance(input, QuantizedTensor) and not self._full_precision_mm and + not getattr(self, 'comfy_force_cast_weights', False) and + len(self.weight_function) == 0 and len(self.bias_function) == 0): # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others) - if tensor_3d: - input = input.reshape(-1, input_shape[2]) + input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input - if input.ndim != 2: - # Fall back to comfy_cast_weights for non-2D tensors - return self.forward_comfy_cast_weights(input.reshape(input_shape), *args, **kwargs) + # Fall back to non-quantized for non-2D tensors + if input_reshaped.ndim == 2: + reshaped_3d = input.ndim == 3 + # dtype is now implicit in the layout class + scale = getattr(self, 'input_scale', None) + if scale is not None: + scale = comfy.model_management.cast_to_device(scale, input.device, None) + input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale) - # dtype is now implicit in the layout class - input = QuantizedTensor.from_float(input, self.layout_type, scale=getattr(self, 'input_scale', None)) - - output = self._forward(input, self.weight, self.bias) + output = self.forward_comfy_cast_weights(input) # Reshape output back to 3D if input was 3D - if tensor_3d: + if reshaped_3d: output = output.reshape((input_shape[0], input_shape[1], self.weight.shape[0])) return output @@ -690,7 +699,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec def set_weight(self, weight, inplace_update=False, seed=None, return_weight=False, **kwargs): if getattr(self, 'layout_type', None) is not None: # dtype is now implicit in the layout class - weight = QuantizedTensor.from_float(weight, self.layout_type, scale="recalculate", stochastic_rounding=seed, inplace_ops=True) + weight = QuantizedTensor.from_float(weight, self.layout_type, scale="recalculate", stochastic_rounding=seed, inplace_ops=True).to(self.weight.dtype) else: weight = weight.to(self.weight.dtype) if return_weight: diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 5a17bc6f5..15a4f457b 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -7,7 +7,7 @@ try: QuantizedTensor, QuantizedLayout, TensorCoreFP8Layout as _CKFp8Layout, - TensorCoreNVFP4Layout, # Direct import, no wrapper needed + TensorCoreNVFP4Layout as _CKNvfp4Layout, register_layout_op, register_layout_class, get_layout_class, @@ -19,6 +19,7 @@ try: cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) if cuda_version < (13,): ck.registry.disable("cuda") + logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") ck.registry.disable("triton") for k, v in ck.list_backends().items(): @@ -33,7 +34,7 @@ except ImportError as e: class _CKFp8Layout: pass - class TensorCoreNVFP4Layout: + class _CKNvfp4Layout: pass def register_layout_class(name, cls): @@ -83,6 +84,39 @@ class _TensorCoreFP8LayoutBase(_CKFp8Layout): return qdata, params +class TensorCoreNVFP4Layout(_CKNvfp4Layout): + @classmethod + def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False): + if tensor.dim() != 2: + raise ValueError(f"NVFP4 requires 2D tensor, got {tensor.dim()}D") + + orig_dtype = tensor.dtype + orig_shape = tuple(tensor.shape) + + if scale is None or (isinstance(scale, str) and scale == "recalculate"): + scale = torch.amax(tensor.abs()) / (ck.float_utils.F8_E4M3_MAX * ck.float_utils.F4_E2M1_MAX) + + if not isinstance(scale, torch.Tensor): + scale = torch.tensor(scale) + scale = scale.to(device=tensor.device, dtype=torch.float32) + + padded_shape = cls.get_padded_shape(orig_shape) + needs_padding = padded_shape != orig_shape + + if stochastic_rounding > 0: + qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4_by_block(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding) + else: + qdata, block_scale = ck.quantize_nvfp4(tensor, scale, pad_16x=needs_padding) + + params = cls.Params( + scale=scale, + orig_dtype=orig_dtype, + orig_shape=orig_shape, + block_scale=block_scale, + ) + return qdata, params + + class TensorCoreFP8E4M3Layout(_TensorCoreFP8LayoutBase): FP8_DTYPE = torch.float8_e4m3fn diff --git a/comfy/sd.py b/comfy/sd.py index 32157e18b..77700dfd3 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -218,7 +218,7 @@ class CLIP: if unprojected: self.cond_stage_model.set_clip_options({"projected_pooled": False}) - self.load_model() + self.load_model(tokens) self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device}) all_hooks.reset() self.patcher.patch_hooks(None) @@ -266,7 +266,7 @@ class CLIP: if return_pooled == "unprojected": self.cond_stage_model.set_clip_options({"projected_pooled": False}) - self.load_model() + self.load_model(tokens) self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device}) o = self.cond_stage_model.encode_token_weights(tokens) cond, pooled = o[:2] @@ -299,8 +299,11 @@ class CLIP: sd_clip[k] = sd_tokenizer[k] return sd_clip - def load_model(self): - model_management.load_model_gpu(self.patcher) + def load_model(self, tokens={}): + memory_used = 0 + if hasattr(self.cond_stage_model, "memory_estimation_function"): + memory_used = self.cond_stage_model.memory_estimation_function(tokens, device=self.patcher.load_device) + model_management.load_models_gpu([self.patcher], memory_required=memory_used) return self.patcher def get_key_patches(self): @@ -476,8 +479,8 @@ class VAE: self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config) self.latent_channels = 128 self.latent_dim = 3 - self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype) - self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype) + self.memory_used_decode = lambda shape, dtype: (1200 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype) + self.memory_used_encode = lambda shape, dtype: (80 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype) self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32) self.upscale_index_formula = (8, 32, 32) self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32) @@ -1011,6 +1014,7 @@ class CLIPType(Enum): KANDINSKY5 = 22 KANDINSKY5_IMAGE = 23 NEWBIE = 24 + FLUX2 = 25 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}): @@ -1043,6 +1047,7 @@ class TEModel(Enum): QWEN3_2B = 17 GEMMA_3_12B = 18 JINA_CLIP_2 = 19 + QWEN3_8B = 20 def detect_te_model(sd): @@ -1056,9 +1061,9 @@ def detect_te_model(sd): return TEModel.JINA_CLIP_2 if "encoder.block.23.layer.1.DenseReluDense.wi_1.weight" in sd: weight = sd["encoder.block.23.layer.1.DenseReluDense.wi_1.weight"] - if weight.shape[-1] == 4096: + if weight.shape[0] == 10240: return TEModel.T5_XXL - elif weight.shape[-1] == 2048: + elif weight.shape[0] == 5120: return TEModel.T5_XL if 'encoder.block.23.layer.1.DenseReluDense.wi.weight' in sd: return TEModel.T5_XXL_OLD @@ -1086,6 +1091,8 @@ def detect_te_model(sd): return TEModel.QWEN3_4B elif weight.shape[0] == 2048: return TEModel.QWEN3_2B + elif weight.shape[0] == 4096: + return TEModel.QWEN3_8B if weight.shape[0] == 5120: if "model.layers.39.post_attention_layernorm.weight" in sd: return TEModel.MISTRAL3_24B @@ -1211,11 +1218,18 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_target.tokenizer = comfy.text_encoders.flux.Flux2Tokenizer tokenizer_data["tekken_model"] = clip_data[0].get("tekken_model", None) elif te_model == TEModel.QWEN3_4B: - clip_target.clip = comfy.text_encoders.z_image.te(**llama_detect(clip_data)) - clip_target.tokenizer = comfy.text_encoders.z_image.ZImageTokenizer + if clip_type == CLIPType.FLUX or clip_type == CLIPType.FLUX2: + clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_4b") + clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer + else: + clip_target.clip = comfy.text_encoders.z_image.te(**llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.z_image.ZImageTokenizer elif te_model == TEModel.QWEN3_2B: clip_target.clip = comfy.text_encoders.ovis.te(**llama_detect(clip_data)) clip_target.tokenizer = comfy.text_encoders.ovis.OvisTokenizer + elif te_model == TEModel.QWEN3_8B: + clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b") + clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B elif te_model == TEModel.JINA_CLIP_2: clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper diff --git a/comfy/supported_models.py b/comfy/supported_models.py index ee9a79001..c8a7f6efb 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -763,7 +763,7 @@ class Flux2(Flux): def __init__(self, unet_config): super().__init__(unet_config) - self.memory_usage_factor = self.memory_usage_factor * (2.0 * 2.0) * 2.36 + self.memory_usage_factor = self.memory_usage_factor * (2.0 * 2.0) * (unet_config['hidden_size'] / 2604) def get_model(self, state_dict, prefix="", device=None): out = model_base.Flux2(self, device=device) @@ -845,7 +845,7 @@ class LTXAV(LTXV): def __init__(self, unet_config): super().__init__(unet_config) - self.memory_usage_factor = 0.055 # TODO + self.memory_usage_factor = 0.077 # TODO def get_model(self, state_dict, prefix="", device=None): out = model_base.LTXAV(self, device=device) @@ -1042,7 +1042,7 @@ class ZImage(Lumina2): "shift": 3.0, } - memory_usage_factor = 2.0 + memory_usage_factor = 2.8 supported_inference_dtypes = [torch.bfloat16, torch.float32] diff --git a/comfy/text_encoders/cosmos.py b/comfy/text_encoders/cosmos.py index 448381fa9..f4b40ac68 100644 --- a/comfy/text_encoders/cosmos.py +++ b/comfy/text_encoders/cosmos.py @@ -36,7 +36,7 @@ def te(dtype_t5=None, t5_quantization_metadata=None): if t5_quantization_metadata is not None: model_options = model_options.copy() model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata - if dtype is None: + if dtype_t5 is not None: dtype = dtype_t5 super().__init__(device=device, dtype=dtype, model_options=model_options) return CosmosTEModel_ diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index 21d93d757..4075afca4 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -3,7 +3,7 @@ import comfy.text_encoders.t5 import comfy.text_encoders.sd3_clip import comfy.text_encoders.llama import comfy.model_management -from transformers import T5TokenizerFast, LlamaTokenizerFast +from transformers import T5TokenizerFast, LlamaTokenizerFast, Qwen2Tokenizer import torch import os import json @@ -172,3 +172,60 @@ def flux2_te(dtype_llama=None, llama_quantization_metadata=None, pruned=False): model_options["num_layers"] = 30 super().__init__(device=device, dtype=dtype, model_options=model_options) return Flux2TEModel_ + +class Qwen3Tokenizer(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") + super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) + +class Qwen3Tokenizer8B(sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") + super().__init__(tokenizer_path, pad_with_end=False, embedding_size=4096, embedding_key='qwen3_8b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) + +class KleinTokenizer(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}, name="qwen3_4b"): + if name == "qwen3_4b": + tokenizer = Qwen3Tokenizer + elif name == "qwen3_8b": + tokenizer = Qwen3Tokenizer8B + + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name=name, tokenizer=tokenizer) + self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n\n\n\n\n" + + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs): + if llama_template is None: + llama_text = self.llama_template.format(text) + else: + llama_text = llama_template.format(text) + + tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs) + return tokens + +class KleinTokenizer8B(KleinTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}, name="qwen3_8b"): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name=name) + +class Qwen3_4BModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer=[9, 18, 27], layer_idx=None, dtype=None, attention_mask=True, model_options={}): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + +class Qwen3_8BModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer=[9, 18, 27], layer_idx=None, dtype=None, attention_mask=True, model_options={}): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_8B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + +def klein_te(dtype_llama=None, llama_quantization_metadata=None, model_type="qwen3_4b"): + if model_type == "qwen3_4b": + model = Qwen3_4BModel + elif model_type == "qwen3_8b": + model = Qwen3_8BModel + + class Flux2TEModel_(Flux2TEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + if dtype_llama is not None: + dtype = dtype_llama + super().__init__(device=device, dtype=dtype, name=model_type, model_options=model_options, clip_model=model) + return Flux2TEModel_ diff --git a/comfy/text_encoders/genmo.py b/comfy/text_encoders/genmo.py index 5daea8135..2d7a3fbce 100644 --- a/comfy/text_encoders/genmo.py +++ b/comfy/text_encoders/genmo.py @@ -32,7 +32,7 @@ def mochi_te(dtype_t5=None, t5_quantization_metadata=None): if t5_quantization_metadata is not None: model_options = model_options.copy() model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata - if dtype is None: + if dtype_t5 is not None: dtype = dtype_t5 super().__init__(device=device, dtype=dtype, model_options=model_options) return MochiTEModel_ diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py index 76731576b..331a30f61 100644 --- a/comfy/text_encoders/llama.py +++ b/comfy/text_encoders/llama.py @@ -99,6 +99,28 @@ class Qwen3_4BConfig: rope_scale = None final_norm: bool = True +@dataclass +class Qwen3_8BConfig: + vocab_size: int = 151936 + hidden_size: int = 4096 + intermediate_size: int = 12288 + num_hidden_layers: int = 36 + num_attention_heads: int = 32 + num_key_value_heads: int = 8 + max_position_embeddings: int = 40960 + rms_norm_eps: float = 1e-6 + rope_theta: float = 1000000.0 + transformer_type: str = "llama" + head_dim = 128 + rms_norm_add = False + mlp_activation = "silu" + qkv_bias = False + rope_dims = None + q_norm = "gemma3" + k_norm = "gemma3" + rope_scale = None + final_norm: bool = True + @dataclass class Ovis25_2BConfig: vocab_size: int = 151936 @@ -628,6 +650,15 @@ class Qwen3_4B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype +class Qwen3_8B(BaseLlama, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Qwen3_8BConfig(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + class Ovis25_2B(BaseLlama, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py index e5964e42b..c33c77db7 100644 --- a/comfy/text_encoders/lt.py +++ b/comfy/text_encoders/lt.py @@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer): class Gemma3_12BModel(sd1_clip.SDClipModel): def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}): - llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None) - if llama_scaled_fp8 is not None: + llama_quantization_metadata = model_options.get("llama_quantization_metadata", None) + if llama_quantization_metadata is not None: model_options = model_options.copy() - model_options["scaled_fp8"] = llama_scaled_fp8 + model_options["quantization_metadata"] = llama_quantization_metadata super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) @@ -98,10 +98,13 @@ class LTXAVTEModel(torch.nn.Module): out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs) out_device = out.device + if comfy.model_management.should_use_bf16(self.execution_device): + out = out.to(device=self.execution_device, dtype=torch.bfloat16) out = out.movedim(1, -1).to(self.execution_device) out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6) out = out.reshape((out.shape[0], out.shape[1], -1)) out = self.text_embedding_projection(out) + out = out.float() out_vid = self.video_embeddings_connector(out)[0] out_audio = self.audio_embeddings_connector(out)[0] out = torch.concat((out_vid, out_audio), dim=-1) @@ -115,16 +118,25 @@ class LTXAVTEModel(torch.nn.Module): sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True) if len(sdo) == 0: sdo = sd + missing, unexpected = self.load_state_dict(sdo, strict=False) + missing = [k for k in missing if not k.startswith("gemma3_12b.")] # filter out keys that belong to the main gemma model + return (missing, unexpected) - return self.load_state_dict(sdo, strict=False) + def memory_estimation_function(self, token_weight_pairs, device=None): + constant = 6.0 + if comfy.model_management.should_use_bf16(device): + constant /= 2.0 + token_weight_pairs = token_weight_pairs.get("gemma3_12b", []) + num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) + return num_tokens * constant * 1024 * 1024 -def ltxav_te(dtype_llama=None, llama_scaled_fp8=None): +def ltxav_te(dtype_llama=None, llama_quantization_metadata=None): class LTXAVTEModel_(LTXAVTEModel): def __init__(self, device="cpu", dtype=None, model_options={}): - if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options: + if llama_quantization_metadata is not None: model_options = model_options.copy() - model_options["llama_scaled_fp8"] = llama_scaled_fp8 + model_options["llama_quantization_metadata"] = llama_quantization_metadata if dtype_llama is not None: dtype = dtype_llama super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options) diff --git a/comfy/text_encoders/pixart_t5.py b/comfy/text_encoders/pixart_t5.py index e5e5f18be..51c6e50c7 100644 --- a/comfy/text_encoders/pixart_t5.py +++ b/comfy/text_encoders/pixart_t5.py @@ -36,7 +36,7 @@ def pixart_te(dtype_t5=None, t5_quantization_metadata=None): if t5_quantization_metadata is not None: model_options = model_options.copy() model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata - if dtype is None: + if dtype_t5 is not None: dtype = dtype_t5 super().__init__(device=device, dtype=dtype, model_options=model_options) return PixArtTEModel_ diff --git a/comfy/utils.py b/comfy/utils.py index ffa98c9b1..2e33a4258 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -30,6 +30,7 @@ from torch.nn.functional import interpolate from einops import rearrange from comfy.cli_args import args import json +import time MMAP_TORCH_FILES = args.mmap_torch_files DISABLE_MMAP = args.disable_mmap @@ -928,7 +929,9 @@ def bislerp(samples, width, height): return result.to(orig_dtype) def lanczos(samples, width, height): - images = [Image.fromarray(np.clip(255. * image.movedim(0, -1).cpu().numpy(), 0, 255).astype(np.uint8)) for image in samples] + #the below API is strict and expects grayscale to be squeezed + samples = samples.squeeze(1) if samples.shape[1] == 1 else samples.movedim(1, -1) + images = [Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8)) for image in samples] images = [image.resize((width, height), resample=Image.Resampling.LANCZOS) for image in images] images = [torch.from_numpy(np.array(image).astype(np.float32) / 255.0).movedim(-1, 0) for image in images] result = torch.stack(images) @@ -1097,6 +1100,10 @@ def set_progress_bar_global_hook(function): global PROGRESS_BAR_HOOK PROGRESS_BAR_HOOK = function +# Throttle settings for progress bar updates to reduce WebSocket flooding +PROGRESS_THROTTLE_MIN_INTERVAL = 0.1 # 100ms minimum between updates +PROGRESS_THROTTLE_MIN_PERCENT = 0.5 # 0.5% minimum progress change + class ProgressBar: def __init__(self, total, node_id=None): global PROGRESS_BAR_HOOK @@ -1104,6 +1111,8 @@ class ProgressBar: self.current = 0 self.hook = PROGRESS_BAR_HOOK self.node_id = node_id + self._last_update_time = 0.0 + self._last_sent_value = -1 def update_absolute(self, value, total=None, preview=None): if total is not None: @@ -1112,7 +1121,29 @@ class ProgressBar: value = self.total self.current = value if self.hook is not None: - self.hook(self.current, self.total, preview, node_id=self.node_id) + current_time = time.perf_counter() + is_first = (self._last_sent_value < 0) + is_final = (value >= self.total) + has_preview = (preview is not None) + + # Always send immediately for previews, first update, or final update + if has_preview or is_first or is_final: + self.hook(self.current, self.total, preview, node_id=self.node_id) + self._last_update_time = current_time + self._last_sent_value = value + return + + # Apply throttling for regular progress updates + if self.total > 0: + percent_changed = ((value - max(0, self._last_sent_value)) / self.total) * 100 + else: + percent_changed = 100 + time_elapsed = current_time - self._last_update_time + + if time_elapsed >= PROGRESS_THROTTLE_MIN_INTERVAL and percent_changed >= PROGRESS_THROTTLE_MIN_PERCENT: + self.hook(self.current, self.total, preview, node_id=self.node_id) + self._last_update_time = current_time + self._last_sent_value = value def update(self, value): self.update_absolute(self.current + value) diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index 764fa8b2b..e6a0d1821 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -1113,6 +1113,18 @@ class DynamicSlot(ComfyTypeI): out_dict[input_type][finalized_id] = value out_dict["dynamic_paths"][finalized_id] = finalize_prefix(curr_prefix, curr_prefix[-1]) +@comfytype(io_type="IMAGECOMPARE") +class ImageCompare(ComfyTypeI): + Type = dict + + class Input(WidgetInput): + def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, + socketless: bool=True): + super().__init__(id, display_name, optional, tooltip, None, None, socketless) + + def as_dict(self): + return super().as_dict() + DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {} def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]): DYNAMIC_INPUT_LOOKUP[io_type] = func @@ -1213,6 +1225,7 @@ class NodeInfoV1: deprecated: bool=None experimental: bool=None api_node: bool=None + price_badge: dict | None = None @dataclass class NodeInfoV3: @@ -1222,11 +1235,77 @@ class NodeInfoV3: name: str=None display_name: str=None description: str=None + python_module: Any = None category: str=None output_node: bool=None deprecated: bool=None experimental: bool=None api_node: bool=None + price_badge: dict | None = None + + +@dataclass +class PriceBadgeDepends: + widgets: list[str] = field(default_factory=list) + inputs: list[str] = field(default_factory=list) + input_groups: list[str] = field(default_factory=list) + + def validate(self) -> None: + if not isinstance(self.widgets, list) or any(not isinstance(x, str) for x in self.widgets): + raise ValueError("PriceBadgeDepends.widgets must be a list[str].") + if not isinstance(self.inputs, list) or any(not isinstance(x, str) for x in self.inputs): + raise ValueError("PriceBadgeDepends.inputs must be a list[str].") + if not isinstance(self.input_groups, list) or any(not isinstance(x, str) for x in self.input_groups): + raise ValueError("PriceBadgeDepends.input_groups must be a list[str].") + + def as_dict(self, schema_inputs: list["Input"]) -> dict[str, Any]: + # Build lookup: widget_id -> io_type + input_types: dict[str, str] = {} + for inp in schema_inputs: + all_inputs = inp.get_all() + input_types[inp.id] = inp.get_io_type() # First input is always the parent itself + for nested_inp in all_inputs[1:]: + # For DynamicCombo/DynamicSlot, nested inputs are prefixed with parent ID + # to match frontend naming convention (e.g., "should_texture.enable_pbr") + prefixed_id = f"{inp.id}.{nested_inp.id}" + input_types[prefixed_id] = nested_inp.get_io_type() + + # Enrich widgets with type information, raising error for unknown widgets + widgets_data: list[dict[str, str]] = [] + for w in self.widgets: + if w not in input_types: + raise ValueError( + f"PriceBadge depends_on.widgets references unknown widget '{w}'. " + f"Available widgets: {list(input_types.keys())}" + ) + widgets_data.append({"name": w, "type": input_types[w]}) + + return { + "widgets": widgets_data, + "inputs": self.inputs, + "input_groups": self.input_groups, + } + + +@dataclass +class PriceBadge: + expr: str + depends_on: PriceBadgeDepends = field(default_factory=PriceBadgeDepends) + engine: str = field(default="jsonata") + + def validate(self) -> None: + if self.engine != "jsonata": + raise ValueError(f"Unsupported PriceBadge.engine '{self.engine}'. Only 'jsonata' is supported.") + if not isinstance(self.expr, str) or not self.expr.strip(): + raise ValueError("PriceBadge.expr must be a non-empty string.") + self.depends_on.validate() + + def as_dict(self, schema_inputs: list["Input"]) -> dict[str, Any]: + return { + "engine": self.engine, + "depends_on": self.depends_on.as_dict(schema_inputs), + "expr": self.expr, + } @dataclass @@ -1272,6 +1351,8 @@ class Schema: """Flags a node as experimental, informing users that it may change or not work as expected.""" is_api_node: bool=False """Flags a node as an API node. See: https://docs.comfy.org/tutorials/api-nodes/overview.""" + price_badge: PriceBadge | None = None + """Optional client-evaluated pricing badge declaration for this node.""" not_idempotent: bool=False """Flags a node as not idempotent; when True, the node will run and not reuse the cached outputs when identical inputs are provided on a different node in the graph.""" enable_expand: bool=False @@ -1302,6 +1383,8 @@ class Schema: input.validate() for output in self.outputs: output.validate() + if self.price_badge is not None: + self.price_badge.validate() def finalize(self): """Add hidden based on selected schema options, and give outputs without ids default ids.""" @@ -1375,7 +1458,8 @@ class Schema: deprecated=self.is_deprecated, experimental=self.is_experimental, api_node=self.is_api_node, - python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes") + python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"), + price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None, ) return info @@ -1407,7 +1491,8 @@ class Schema: deprecated=self.is_deprecated, experimental=self.is_experimental, api_node=self.is_api_node, - python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes") + python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"), + price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None, ) return info @@ -1958,4 +2043,7 @@ __all__ = [ "add_to_dict_v1", "add_to_dict_v3", "V3Data", + "ImageCompare", + "PriceBadgeDepends", + "PriceBadge", ] diff --git a/comfy_api_nodes/apis/bytedance_api.py b/comfy_api_nodes/apis/bytedance_api.py index b8c2f618b..400648cca 100644 --- a/comfy_api_nodes/apis/bytedance_api.py +++ b/comfy_api_nodes/apis/bytedance_api.py @@ -65,11 +65,13 @@ class TaskImageContent(BaseModel): class Text2VideoTaskCreationRequest(BaseModel): model: str = Field(...) content: list[TaskTextContent] = Field(..., min_length=1) + generate_audio: bool | None = Field(...) class Image2VideoTaskCreationRequest(BaseModel): model: str = Field(...) content: list[TaskTextContent | TaskImageContent] = Field(..., min_length=2) + generate_audio: bool | None = Field(...) class TaskCreationResponse(BaseModel): @@ -141,4 +143,9 @@ VIDEO_TASKS_EXECUTION_TIME = { "720p": 65, "1080p": 100, }, + "seedance-1-5-pro-251215": { + "480p": 80, + "720p": 100, + "1080p": 150, + }, } diff --git a/comfy_api_nodes/apis/meshy.py b/comfy_api_nodes/apis/meshy.py new file mode 100644 index 000000000..be46d0d58 --- /dev/null +++ b/comfy_api_nodes/apis/meshy.py @@ -0,0 +1,160 @@ +from typing import TypedDict + +from pydantic import BaseModel, Field + +from comfy_api.latest import Input + + +class InputShouldRemesh(TypedDict): + should_remesh: str + topology: str + target_polycount: int + + +class InputShouldTexture(TypedDict): + should_texture: str + enable_pbr: bool + texture_prompt: str + texture_image: Input.Image | None + + +class MeshyTaskResponse(BaseModel): + result: str = Field(...) + + +class MeshyTextToModelRequest(BaseModel): + mode: str = Field("preview") + prompt: str = Field(..., max_length=600) + art_style: str = Field(..., description="'realistic' or 'sculpture'") + ai_model: str = Field(...) + topology: str | None = Field(..., description="'quad' or 'triangle'") + target_polycount: int | None = Field(..., ge=100, le=300000) + should_remesh: bool = Field( + True, + description="False returns the original mesh, ignoring topology and polycount.", + ) + symmetry_mode: str = Field(..., description="'auto', 'off' or 'on'") + pose_mode: str = Field(...) + seed: int = Field(...) + moderation: bool = Field(False) + + +class MeshyRefineTask(BaseModel): + mode: str = Field("refine") + preview_task_id: str = Field(...) + enable_pbr: bool | None = Field(...) + texture_prompt: str | None = Field(...) + texture_image_url: str | None = Field(...) + ai_model: str = Field(...) + moderation: bool = Field(False) + + +class MeshyImageToModelRequest(BaseModel): + image_url: str = Field(...) + ai_model: str = Field(...) + topology: str | None = Field(..., description="'quad' or 'triangle'") + target_polycount: int | None = Field(..., ge=100, le=300000) + symmetry_mode: str = Field(..., description="'auto', 'off' or 'on'") + should_remesh: bool = Field( + True, + description="False returns the original mesh, ignoring topology and polycount.", + ) + should_texture: bool = Field(...) + enable_pbr: bool | None = Field(...) + pose_mode: str = Field(...) + texture_prompt: str | None = Field(None, max_length=600) + texture_image_url: str | None = Field(None) + seed: int = Field(...) + moderation: bool = Field(False) + + +class MeshyMultiImageToModelRequest(BaseModel): + image_urls: list[str] = Field(...) + ai_model: str = Field(...) + topology: str | None = Field(..., description="'quad' or 'triangle'") + target_polycount: int | None = Field(..., ge=100, le=300000) + symmetry_mode: str = Field(..., description="'auto', 'off' or 'on'") + should_remesh: bool = Field( + True, + description="False returns the original mesh, ignoring topology and polycount.", + ) + should_texture: bool = Field(...) + enable_pbr: bool | None = Field(...) + pose_mode: str = Field(...) + texture_prompt: str | None = Field(None, max_length=600) + texture_image_url: str | None = Field(None) + seed: int = Field(...) + moderation: bool = Field(False) + + +class MeshyRiggingRequest(BaseModel): + input_task_id: str = Field(...) + height_meters: float = Field(...) + texture_image_url: str | None = Field(...) + + +class MeshyAnimationRequest(BaseModel): + rig_task_id: str = Field(...) + action_id: int = Field(...) + + +class MeshyTextureRequest(BaseModel): + input_task_id: str = Field(...) + ai_model: str = Field(...) + enable_original_uv: bool = Field(...) + enable_pbr: bool = Field(...) + text_style_prompt: str | None = Field(...) + image_style_url: str | None = Field(...) + + +class MeshyModelsUrls(BaseModel): + glb: str = Field("") + + +class MeshyRiggedModelsUrls(BaseModel): + rigged_character_glb_url: str = Field("") + + +class MeshyAnimatedModelsUrls(BaseModel): + animation_glb_url: str = Field("") + + +class MeshyResultTextureUrls(BaseModel): + base_color: str = Field(...) + metallic: str | None = Field(None) + normal: str | None = Field(None) + roughness: str | None = Field(None) + + +class MeshyTaskError(BaseModel): + message: str | None = Field(None) + + +class MeshyModelResult(BaseModel): + id: str = Field(...) + type: str = Field(...) + model_urls: MeshyModelsUrls = Field(MeshyModelsUrls()) + thumbnail_url: str = Field(...) + video_url: str | None = Field(None) + status: str = Field(...) + progress: int = Field(0) + texture_urls: list[MeshyResultTextureUrls] | None = Field([]) + task_error: MeshyTaskError | None = Field(None) + + +class MeshyRiggedResult(BaseModel): + id: str = Field(...) + type: str = Field(...) + status: str = Field(...) + progress: int = Field(0) + result: MeshyRiggedModelsUrls = Field(MeshyRiggedModelsUrls()) + task_error: MeshyTaskError | None = Field(None) + + +class MeshyAnimationResult(BaseModel): + id: str = Field(...) + type: str = Field(...) + status: str = Field(...) + progress: int = Field(0) + result: MeshyAnimatedModelsUrls = Field(MeshyAnimatedModelsUrls()) + task_error: MeshyTaskError | None = Field(None) diff --git a/comfy_api_nodes/apis/vidu.py b/comfy_api_nodes/apis/vidu.py new file mode 100644 index 000000000..a9bb6f7ce --- /dev/null +++ b/comfy_api_nodes/apis/vidu.py @@ -0,0 +1,41 @@ +from pydantic import BaseModel, Field + + +class SubjectReference(BaseModel): + id: str = Field(...) + images: list[str] = Field(...) + + +class TaskCreationRequest(BaseModel): + model: str = Field(...) + prompt: str = Field(..., max_length=2000) + duration: int = Field(...) + seed: int = Field(..., ge=0, le=2147483647) + aspect_ratio: str | None = Field(None) + resolution: str | None = Field(None) + movement_amplitude: str | None = Field(None) + images: list[str] | None = Field(None, description="Base64 encoded string or image URL") + subjects: list[SubjectReference] | None = Field(None) + bgm: bool | None = Field(None) + audio: bool | None = Field(None) + + +class TaskCreationResponse(BaseModel): + task_id: str = Field(...) + state: str = Field(...) + created_at: str = Field(...) + code: int | None = Field(None, description="Error code") + + +class TaskResult(BaseModel): + id: str = Field(..., description="Creation id") + url: str = Field(..., description="The URL of the generated results, valid for one hour") + cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour") + + +class TaskStatusResponse(BaseModel): + state: str = Field(...) + err_code: str | None = Field(None) + progress: float | None = Field(None) + credits: int | None = Field(None) + creations: list[TaskResult] = Field(..., description="Generated results") diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py index ce077d6b3..76021ef7f 100644 --- a/comfy_api_nodes/nodes_bfl.py +++ b/comfy_api_nodes/nodes_bfl.py @@ -97,6 +97,9 @@ class FluxProUltraImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.06}""", + ), ) @classmethod @@ -352,6 +355,9 @@ class FluxProExpandNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.05}""", + ), ) @classmethod @@ -458,6 +464,9 @@ class FluxProFillNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.05}""", + ), ) @classmethod @@ -511,6 +520,21 @@ class Flux2ProImageNode(IO.ComfyNode): NODE_ID = "Flux2ProImageNode" DISPLAY_NAME = "Flux.2 [pro] Image" API_ENDPOINT = "/proxy/bfl/flux-2-pro/generate" + PRICE_BADGE_EXPR = """ + ( + $MP := 1024 * 1024; + $outMP := $max([1, $floor(((widgets.width * widgets.height) + $MP - 1) / $MP)]); + $outputCost := 0.03 + 0.015 * ($outMP - 1); + inputs.images.connected + ? { + "type":"range_usd", + "min_usd": $outputCost + 0.015, + "max_usd": $outputCost + 0.12, + "format": { "approximate": true } + } + : {"type":"usd","usd": $outputCost} + ) + """ @classmethod def define_schema(cls) -> IO.Schema: @@ -563,6 +587,10 @@ class Flux2ProImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["width", "height"], inputs=["images"]), + expr=cls.PRICE_BADGE_EXPR, + ), ) @classmethod @@ -623,6 +651,22 @@ class Flux2MaxImageNode(Flux2ProImageNode): NODE_ID = "Flux2MaxImageNode" DISPLAY_NAME = "Flux.2 [max] Image" API_ENDPOINT = "/proxy/bfl/flux-2-max/generate" + PRICE_BADGE_EXPR = """ + ( + $MP := 1024 * 1024; + $outMP := $max([1, $floor(((widgets.width * widgets.height) + $MP - 1) / $MP)]); + $outputCost := 0.07 + 0.03 * ($outMP - 1); + + inputs.images.connected + ? { + "type":"range_usd", + "min_usd": $outputCost + 0.03, + "max_usd": $outputCost + 0.24, + "format": { "approximate": true } + } + : {"type":"usd","usd": $outputCost} + ) + """ class BFLExtension(ComfyExtension): diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index d4a2cfae6..9cb1ca004 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -126,6 +126,9 @@ class ByteDanceImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.03}""", + ), ) @classmethod @@ -367,6 +370,19 @@ class ByteDanceSeedreamNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $price := $contains(widgets.model, "seedream-4-5-251128") ? 0.04 : 0.03; + { + "type":"usd", + "usd": $price, + "format": { "suffix":" x images/Run", "approximate": true } + } + ) + """, + ), ) @classmethod @@ -461,7 +477,12 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): inputs=[ IO.Combo.Input( "model", - options=["seedance-1-0-pro-250528", "seedance-1-0-lite-t2v-250428", "seedance-1-0-pro-fast-251015"], + options=[ + "seedance-1-5-pro-251215", + "seedance-1-0-pro-250528", + "seedance-1-0-lite-t2v-250428", + "seedance-1-0-pro-fast-251015", + ], default="seedance-1-0-pro-fast-251015", ), IO.String.Input( @@ -512,6 +533,12 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, ), + IO.Boolean.Input( + "generate_audio", + default=False, + tooltip="This parameter is ignored for any model except seedance-1-5-pro.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -522,6 +549,7 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -535,7 +563,10 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): seed: int, camera_fixed: bool, watermark: bool, + generate_audio: bool = False, ) -> IO.NodeOutput: + if model == "seedance-1-5-pro-251215" and duration < 4: + raise ValueError("Minimum supported duration for Seedance 1.5 Pro is 4 seconds.") validate_string(prompt, strip_whitespace=True, min_length=1) raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"]) @@ -550,7 +581,11 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): ) return await process_video_task( cls, - payload=Text2VideoTaskCreationRequest(model=model, content=[TaskTextContent(text=prompt)]), + payload=Text2VideoTaskCreationRequest( + model=model, + content=[TaskTextContent(text=prompt)], + generate_audio=generate_audio if model == "seedance-1-5-pro-251215" else None, + ), estimated_duration=max(1, math.ceil(VIDEO_TASKS_EXECUTION_TIME[model][resolution] * (duration / 10.0))), ) @@ -567,7 +602,12 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): inputs=[ IO.Combo.Input( "model", - options=["seedance-1-0-pro-250528", "seedance-1-0-lite-t2v-250428", "seedance-1-0-pro-fast-251015"], + options=[ + "seedance-1-5-pro-251215", + "seedance-1-0-pro-250528", + "seedance-1-0-lite-i2v-250428", + "seedance-1-0-pro-fast-251015", + ], default="seedance-1-0-pro-fast-251015", ), IO.String.Input( @@ -622,6 +662,12 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, ), + IO.Boolean.Input( + "generate_audio", + default=False, + tooltip="This parameter is ignored for any model except seedance-1-5-pro.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -632,6 +678,7 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -646,7 +693,10 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): seed: int, camera_fixed: bool, watermark: bool, + generate_audio: bool = False, ) -> IO.NodeOutput: + if model == "seedance-1-5-pro-251215" and duration < 4: + raise ValueError("Minimum supported duration for Seedance 1.5 Pro is 4 seconds.") validate_string(prompt, strip_whitespace=True, min_length=1) raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"]) validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000) @@ -668,6 +718,7 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): payload=Image2VideoTaskCreationRequest( model=model, content=[TaskTextContent(text=prompt), TaskImageContent(image_url=TaskImageContentUrl(url=image_url))], + generate_audio=generate_audio if model == "seedance-1-5-pro-251215" else None, ), estimated_duration=max(1, math.ceil(VIDEO_TASKS_EXECUTION_TIME[model][resolution] * (duration / 10.0))), ) @@ -685,7 +736,7 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): inputs=[ IO.Combo.Input( "model", - options=["seedance-1-0-pro-250528", "seedance-1-0-lite-i2v-250428"], + options=["seedance-1-5-pro-251215", "seedance-1-0-pro-250528", "seedance-1-0-lite-i2v-250428"], default="seedance-1-0-lite-i2v-250428", ), IO.String.Input( @@ -744,6 +795,12 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, ), + IO.Boolean.Input( + "generate_audio", + default=False, + tooltip="This parameter is ignored for any model except seedance-1-5-pro.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -754,6 +811,7 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -769,7 +827,10 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): seed: int, camera_fixed: bool, watermark: bool, + generate_audio: bool = False, ) -> IO.NodeOutput: + if model == "seedance-1-5-pro-251215" and duration < 4: + raise ValueError("Minimum supported duration for Seedance 1.5 Pro is 4 seconds.") validate_string(prompt, strip_whitespace=True, min_length=1) raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"]) for i in (first_frame, last_frame): @@ -802,6 +863,7 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): TaskImageContent(image_url=TaskImageContentUrl(url=str(download_urls[0])), role="first_frame"), TaskImageContent(image_url=TaskImageContentUrl(url=str(download_urls[1])), role="last_frame"), ], + generate_audio=generate_audio if model == "seedance-1-5-pro-251215" else None, ), estimated_duration=max(1, math.ceil(VIDEO_TASKS_EXECUTION_TIME[model][resolution] * (duration / 10.0))), ) @@ -877,6 +939,41 @@ class ByteDanceImageReferenceNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "duration", "resolution"]), + expr=""" + ( + $priceByModel := { + "seedance-1-0-pro": { + "480p":[0.23,0.24], + "720p":[0.51,0.56] + }, + "seedance-1-0-lite": { + "480p":[0.17,0.18], + "720p":[0.37,0.41] + } + }; + $model := widgets.model; + $modelKey := + $contains($model, "seedance-1-0-pro") ? "seedance-1-0-pro" : + "seedance-1-0-lite"; + $resolution := widgets.resolution; + $resKey := + $contains($resolution, "720") ? "720p" : + "480p"; + $modelPrices := $lookup($priceByModel, $modelKey); + $baseRange := $lookup($modelPrices, $resKey); + $min10s := $baseRange[0]; + $max10s := $baseRange[1]; + $scale := widgets.duration / 10; + $minCost := $min10s * $scale; + $maxCost := $max10s * $scale; + ($minCost = $maxCost) + ? {"type":"usd","usd": $minCost} + : {"type":"range_usd","min_usd": $minCost, "max_usd": $maxCost} + ) + """, + ), ) @classmethod @@ -946,6 +1043,59 @@ def raise_if_text_params(prompt: str, text_params: list[str]) -> None: ) +PRICE_BADGE_VIDEO = IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "duration", "resolution", "generate_audio"]), + expr=""" + ( + $priceByModel := { + "seedance-1-5-pro": { + "480p":[0.12,0.12], + "720p":[0.26,0.26], + "1080p":[0.58,0.59] + }, + "seedance-1-0-pro": { + "480p":[0.23,0.24], + "720p":[0.51,0.56], + "1080p":[1.18,1.22] + }, + "seedance-1-0-pro-fast": { + "480p":[0.09,0.1], + "720p":[0.21,0.23], + "1080p":[0.47,0.49] + }, + "seedance-1-0-lite": { + "480p":[0.17,0.18], + "720p":[0.37,0.41], + "1080p":[0.85,0.88] + } + }; + $model := widgets.model; + $modelKey := + $contains($model, "seedance-1-5-pro") ? "seedance-1-5-pro" : + $contains($model, "seedance-1-0-pro-fast") ? "seedance-1-0-pro-fast" : + $contains($model, "seedance-1-0-pro") ? "seedance-1-0-pro" : + "seedance-1-0-lite"; + $resolution := widgets.resolution; + $resKey := + $contains($resolution, "1080") ? "1080p" : + $contains($resolution, "720") ? "720p" : + "480p"; + $modelPrices := $lookup($priceByModel, $modelKey); + $baseRange := $lookup($modelPrices, $resKey); + $min10s := $baseRange[0]; + $max10s := $baseRange[1]; + $scale := widgets.duration / 10; + $audioMultiplier := ($modelKey = "seedance-1-5-pro" and widgets.generate_audio) ? 2 : 1; + $minCost := $min10s * $scale * $audioMultiplier; + $maxCost := $max10s * $scale * $audioMultiplier; + ($minCost = $maxCost) + ? {"type":"usd","usd": $minCost, "format": { "approximate": true }} + : {"type":"range_usd","min_usd": $minCost, "max_usd": $maxCost, "format": { "approximate": true }} + ) + """, +) + + class ByteDanceExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py index e8ed7e797..a2daea50a 100644 --- a/comfy_api_nodes/nodes_gemini.py +++ b/comfy_api_nodes/nodes_gemini.py @@ -130,7 +130,7 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera Returns: List of response parts matching the requested type. """ - if response.candidates is None: + if not response.candidates: if response.promptFeedback and response.promptFeedback.blockReason: feedback = response.promptFeedback raise ValueError( @@ -141,14 +141,24 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera "try changing it to `IMAGE+TEXT` to view the model's reasoning and understand why image generation failed." ) parts = [] - for part in response.candidates[0].content.parts: - if part_type == "text" and part.text: - parts.append(part) - elif part.inlineData and part.inlineData.mimeType == part_type: - parts.append(part) - elif part.fileData and part.fileData.mimeType == part_type: - parts.append(part) - # Skip parts that don't match the requested type + blocked_reasons = [] + for candidate in response.candidates: + if candidate.finishReason and candidate.finishReason.upper() == "IMAGE_PROHIBITED_CONTENT": + blocked_reasons.append(candidate.finishReason) + continue + if candidate.content is None or candidate.content.parts is None: + continue + for part in candidate.content.parts: + if part_type == "text" and part.text: + parts.append(part) + elif part.inlineData and part.inlineData.mimeType == part_type: + parts.append(part) + elif part.fileData and part.fileData.mimeType == part_type: + parts.append(part) + + if not parts and blocked_reasons: + raise ValueError(f"Gemini API blocked the request. Reasons: {blocked_reasons}") + return parts @@ -309,6 +319,30 @@ class GeminiNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m, "gemini-2.5-flash") ? { + "type": "list_usd", + "usd": [0.0003, 0.0025], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens"} + } + : $contains($m, "gemini-2.5-pro") ? { + "type": "list_usd", + "usd": [0.00125, 0.01], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gemini-3-pro-preview") ? { + "type": "list_usd", + "usd": [0.002, 0.012], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : {"type":"text", "text":"Token-based"} + ) + """, + ), ) @classmethod @@ -570,6 +604,9 @@ class GeminiImage(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.039,"format":{"suffix":"/Image (1K)","approximate":true}}""", + ), ) @classmethod @@ -700,6 +737,19 @@ class GeminiImage2(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["resolution"]), + expr=""" + ( + $r := widgets.resolution; + ($contains($r,"1k") or $contains($r,"2k")) + ? {"type":"usd","usd":0.134,"format":{"suffix":"/Image","approximate":true}} + : $contains($r,"4k") + ? {"type":"usd","usd":0.24,"format":{"suffix":"/Image","approximate":true}} + : {"type":"text","text":"Token-based"} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_ideogram.py b/comfy_api_nodes/nodes_ideogram.py index 48f94e612..827b3523a 100644 --- a/comfy_api_nodes/nodes_ideogram.py +++ b/comfy_api_nodes/nodes_ideogram.py @@ -236,7 +236,6 @@ class IdeogramV1(IO.ComfyNode): display_name="Ideogram V1", category="api node/image/Ideogram", description="Generates images using the Ideogram V1 model.", - is_api_node=True, inputs=[ IO.String.Input( "prompt", @@ -298,6 +297,17 @@ class IdeogramV1(IO.ComfyNode): IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["num_images", "turbo"]), + expr=""" + ( + $n := widgets.num_images; + $base := (widgets.turbo = true) ? 0.0286 : 0.0858; + {"type":"usd","usd": $round($base * $n, 2)} + ) + """, + ), ) @classmethod @@ -351,7 +361,6 @@ class IdeogramV2(IO.ComfyNode): display_name="Ideogram V2", category="api node/image/Ideogram", description="Generates images using the Ideogram V2 model.", - is_api_node=True, inputs=[ IO.String.Input( "prompt", @@ -436,6 +445,17 @@ class IdeogramV2(IO.ComfyNode): IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["num_images", "turbo"]), + expr=""" + ( + $n := widgets.num_images; + $base := (widgets.turbo = true) ? 0.0715 : 0.1144; + {"type":"usd","usd": $round($base * $n, 2)} + ) + """, + ), ) @classmethod @@ -506,7 +526,6 @@ class IdeogramV3(IO.ComfyNode): category="api node/image/Ideogram", description="Generates images using the Ideogram V3 model. " "Supports both regular image generation from text prompts and image editing with mask.", - is_api_node=True, inputs=[ IO.String.Input( "prompt", @@ -591,6 +610,23 @@ class IdeogramV3(IO.ComfyNode): IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["rendering_speed", "num_images"], inputs=["character_image"]), + expr=""" + ( + $n := widgets.num_images; + $speed := widgets.rendering_speed; + $hasChar := inputs.character_image.connected; + $base := + $contains($speed,"quality") ? ($hasChar ? 0.286 : 0.1287) : + $contains($speed,"default") ? ($hasChar ? 0.2145 : 0.0858) : + $contains($speed,"turbo") ? ($hasChar ? 0.143 : 0.0429) : + 0.0858; + {"type":"usd","usd": $round($base * $n, 2)} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index 9c707a339..05dde88b1 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -567,7 +567,7 @@ async def execute_lipsync( # Upload the audio file to Comfy API and get download URL if audio: audio_url = await upload_audio_to_comfyapi( - cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg", filename="output.mp3" + cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg" ) logging.info("Uploaded audio to Comfy API. URL: %s", audio_url) else: @@ -764,6 +764,33 @@ class KlingTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode"]), + expr=""" + ( + $m := widgets.mode; + $contains($m,"v2-5-turbo") + ? ($contains($m,"10") ? {"type":"usd","usd":0.7} : {"type":"usd","usd":0.35}) + : $contains($m,"v2-1-master") + ? ($contains($m,"10s") ? {"type":"usd","usd":2.8} : {"type":"usd","usd":1.4}) + : $contains($m,"v2-master") + ? ($contains($m,"10s") ? {"type":"usd","usd":2.8} : {"type":"usd","usd":1.4}) + : $contains($m,"v1-6") + ? ( + $contains($m,"pro") + ? ($contains($m,"10s") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($m,"10s") ? {"type":"usd","usd":0.56} : {"type":"usd","usd":0.28}) + ) + : $contains($m,"v1") + ? ( + $contains($m,"pro") + ? ($contains($m,"10s") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($m,"10s") ? {"type":"usd","usd":0.28} : {"type":"usd","usd":0.14}) + ) + : {"type":"usd","usd":0.14} + ) + """, + ), ) @classmethod @@ -818,6 +845,16 @@ class OmniProTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $rates := {"std": 0.084, "pro": 0.112}; + {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} + ) + """, + ), ) @classmethod @@ -886,6 +923,16 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $rates := {"std": 0.084, "pro": 0.112}; + {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} + ) + """, + ), ) @classmethod @@ -981,6 +1028,16 @@ class OmniProImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $rates := {"std": 0.084, "pro": 0.112}; + {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} + ) + """, + ), ) @classmethod @@ -1056,6 +1113,16 @@ class OmniProVideoToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $rates := {"std": 0.126, "pro": 0.168}; + {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} + ) + """, + ), ) @classmethod @@ -1142,6 +1209,16 @@ class OmniProEditVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["resolution"]), + expr=""" + ( + $mode := (widgets.resolution = "720p") ? "std" : "pro"; + $rates := {"std": 0.126, "pro": 0.168}; + {"type":"usd","usd": $lookup($rates, $mode), "format":{"suffix":"/second"}} + ) + """, + ), ) @classmethod @@ -1228,6 +1305,9 @@ class OmniProImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.028}""", + ), ) @classmethod @@ -1313,6 +1393,9 @@ class KlingCameraControlT2VNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.14}""", + ), ) @classmethod @@ -1375,6 +1458,33 @@ class KlingImage2VideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode", "model_name", "duration"]), + expr=""" + ( + $mode := widgets.mode; + $model := widgets.model_name; + $dur := widgets.duration; + $contains($model,"v2-5-turbo") + ? ($contains($dur,"10") ? {"type":"usd","usd":0.7} : {"type":"usd","usd":0.35}) + : ($contains($model,"v2-1-master") or $contains($model,"v2-master")) + ? ($contains($dur,"10") ? {"type":"usd","usd":2.8} : {"type":"usd","usd":1.4}) + : ($contains($model,"v2-1") or $contains($model,"v1-6") or $contains($model,"v1-5")) + ? ( + $contains($mode,"pro") + ? ($contains($dur,"10") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($dur,"10") ? {"type":"usd","usd":0.56} : {"type":"usd","usd":0.28}) + ) + : $contains($model,"v1") + ? ( + $contains($mode,"pro") + ? ($contains($dur,"10") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($dur,"10") ? {"type":"usd","usd":0.28} : {"type":"usd","usd":0.14}) + ) + : {"type":"usd","usd":0.14} + ) + """, + ), ) @classmethod @@ -1448,6 +1558,9 @@ class KlingCameraControlI2VNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.49}""", + ), ) @classmethod @@ -1518,6 +1631,33 @@ class KlingStartEndFrameNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode"]), + expr=""" + ( + $m := widgets.mode; + $contains($m,"v2-5-turbo") + ? ($contains($m,"10") ? {"type":"usd","usd":0.7} : {"type":"usd","usd":0.35}) + : $contains($m,"v2-1") + ? ($contains($m,"10s") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : $contains($m,"v2-master") + ? ($contains($m,"10s") ? {"type":"usd","usd":2.8} : {"type":"usd","usd":1.4}) + : $contains($m,"v1-6") + ? ( + $contains($m,"pro") + ? ($contains($m,"10s") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($m,"10s") ? {"type":"usd","usd":0.56} : {"type":"usd","usd":0.28}) + ) + : $contains($m,"v1") + ? ( + $contains($m,"pro") + ? ($contains($m,"10s") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($m,"10s") ? {"type":"usd","usd":0.28} : {"type":"usd","usd":0.14}) + ) + : {"type":"usd","usd":0.14} + ) + """, + ), ) @classmethod @@ -1583,6 +1723,9 @@ class KlingVideoExtendNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.28}""", + ), ) @classmethod @@ -1664,6 +1807,29 @@ class KlingDualCharacterVideoEffectNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode", "model_name", "duration"]), + expr=""" + ( + $mode := widgets.mode; + $model := widgets.model_name; + $dur := widgets.duration; + ($contains($model,"v1-6") or $contains($model,"v1-5")) + ? ( + $contains($mode,"pro") + ? ($contains($dur,"10") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($dur,"10") ? {"type":"usd","usd":0.56} : {"type":"usd","usd":0.28}) + ) + : $contains($model,"v1") + ? ( + $contains($mode,"pro") + ? ($contains($dur,"10") ? {"type":"usd","usd":0.98} : {"type":"usd","usd":0.49}) + : ($contains($dur,"10") ? {"type":"usd","usd":0.28} : {"type":"usd","usd":0.14}) + ) + : {"type":"usd","usd":0.14} + ) + """, + ), ) @classmethod @@ -1728,6 +1894,16 @@ class KlingSingleImageVideoEffectNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["effect_scene"]), + expr=""" + ( + ($contains(widgets.effect_scene,"dizzydizzy") or $contains(widgets.effect_scene,"bloombloom")) + ? {"type":"usd","usd":0.49} + : {"type":"usd","usd":0.28} + ) + """, + ), ) @classmethod @@ -1782,6 +1958,9 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.1,"format":{"approximate":true}}""", + ), ) @classmethod @@ -1842,6 +2021,9 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.1,"format":{"approximate":true}}""", + ), ) @classmethod @@ -1892,6 +2074,9 @@ class KlingVirtualTryOnNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.7}""", + ), ) @classmethod @@ -1991,6 +2176,19 @@ class KlingImageGenerationNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model_name", "n"], inputs=["image"]), + expr=""" + ( + $m := widgets.model_name; + $base := + $contains($m,"kling-v1-5") + ? (inputs.image.connected ? 0.028 : 0.014) + : ($contains($m,"kling-v1") ? 0.0035 : 0.014); + {"type":"usd","usd": $base * widgets.n} + ) + """, + ), ) @classmethod @@ -2074,6 +2272,10 @@ class TextToVideoWithAudio(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "generate_audio"]), + expr="""{"type":"usd","usd": 0.07 * widgets.duration * (widgets.generate_audio ? 2 : 1)}""", + ), ) @classmethod @@ -2138,6 +2340,10 @@ class ImageToVideoWithAudio(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "generate_audio"]), + expr="""{"type":"usd","usd": 0.07 * widgets.duration * (widgets.generate_audio ? 2 : 1)}""", + ), ) @classmethod @@ -2218,6 +2424,15 @@ class MotionControl(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode"]), + expr=""" + ( + $prices := {"std": 0.07, "pro": 0.112}; + {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_ltxv.py b/comfy_api_nodes/nodes_ltxv.py index 7e61560dc..c6424af92 100644 --- a/comfy_api_nodes/nodes_ltxv.py +++ b/comfy_api_nodes/nodes_ltxv.py @@ -28,6 +28,22 @@ class ExecuteTaskRequest(BaseModel): image_uri: str | None = Field(None) +PRICE_BADGE = IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "duration", "resolution"]), + expr=""" + ( + $prices := { + "ltx-2 (pro)": {"1920x1080":0.06,"2560x1440":0.12,"3840x2160":0.24}, + "ltx-2 (fast)": {"1920x1080":0.04,"2560x1440":0.08,"3840x2160":0.16} + }; + $modelPrices := $lookup($prices, $lowercase(widgets.model)); + $pps := $lookup($modelPrices, widgets.resolution); + {"type":"usd","usd": $pps * widgets.duration} + ) + """, +) + + class TextToVideoNode(IO.ComfyNode): @classmethod def define_schema(cls): @@ -69,6 +85,7 @@ class TextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE, ) @classmethod @@ -145,6 +162,7 @@ class ImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE, ) @classmethod diff --git a/comfy_api_nodes/nodes_luma.py b/comfy_api_nodes/nodes_luma.py index 894f2b08c..95cb442e5 100644 --- a/comfy_api_nodes/nodes_luma.py +++ b/comfy_api_nodes/nodes_luma.py @@ -189,6 +189,19 @@ class LumaImageGenerationNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m,"photon-flash-1") + ? {"type":"usd","usd":0.0027} + : $contains($m,"photon-1") + ? {"type":"usd","usd":0.0104} + : {"type":"usd","usd":0.0246} + ) + """, + ), ) @classmethod @@ -303,6 +316,19 @@ class LumaImageModifyNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m,"photon-flash-1") + ? {"type":"usd","usd":0.0027} + : $contains($m,"photon-1") + ? {"type":"usd","usd":0.0104} + : {"type":"usd","usd":0.0246} + ) + """, + ), ) @classmethod @@ -395,6 +421,7 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -505,6 +532,8 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, + ) @classmethod @@ -568,6 +597,53 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode): return LumaKeyframes(frame0=frame0, frame1=frame1) +PRICE_BADGE_VIDEO = IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "resolution", "duration"]), + expr=""" + ( + $p := { + "ray-flash-2": { + "5s": {"4k":3.13,"1080p":0.79,"720p":0.34,"540p":0.2}, + "9s": {"4k":5.65,"1080p":1.42,"720p":0.61,"540p":0.36} + }, + "ray-2": { + "5s": {"4k":9.11,"1080p":2.27,"720p":1.02,"540p":0.57}, + "9s": {"4k":16.4,"1080p":4.1,"720p":1.83,"540p":1.03} + } + }; + + $m := widgets.model; + $d := widgets.duration; + $r := widgets.resolution; + + $modelKey := + $contains($m,"ray-flash-2") ? "ray-flash-2" : + $contains($m,"ray-2") ? "ray-2" : + $contains($m,"ray-1-6") ? "ray-1-6" : + "other"; + + $durKey := $contains($d,"5s") ? "5s" : $contains($d,"9s") ? "9s" : ""; + $resKey := + $contains($r,"4k") ? "4k" : + $contains($r,"1080p") ? "1080p" : + $contains($r,"720p") ? "720p" : + $contains($r,"540p") ? "540p" : ""; + + $modelPrices := $lookup($p, $modelKey); + $durPrices := $lookup($modelPrices, $durKey); + $v := $lookup($durPrices, $resKey); + + $price := + ($modelKey = "ray-1-6") ? 0.5 : + ($modelKey = "other") ? 0.79 : + ($exists($v) ? $v : 0.79); + + {"type":"usd","usd": $price} + ) + """, +) + + class LumaExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: diff --git a/comfy_api_nodes/nodes_meshy.py b/comfy_api_nodes/nodes_meshy.py new file mode 100644 index 000000000..740607983 --- /dev/null +++ b/comfy_api_nodes/nodes_meshy.py @@ -0,0 +1,790 @@ +import os + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.meshy import ( + InputShouldRemesh, + InputShouldTexture, + MeshyAnimationRequest, + MeshyAnimationResult, + MeshyImageToModelRequest, + MeshyModelResult, + MeshyMultiImageToModelRequest, + MeshyRefineTask, + MeshyRiggedResult, + MeshyRiggingRequest, + MeshyTaskResponse, + MeshyTextToModelRequest, + MeshyTextureRequest, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + download_url_to_bytesio, + poll_op, + sync_op, + upload_images_to_comfyapi, + validate_string, +) +from folder_paths import get_output_directory + + +class MeshyTextToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyTextToModelNode", + display_name="Meshy: Text to Model", + category="api node/3d/Meshy", + inputs=[ + IO.Combo.Input("model", options=["latest"]), + IO.String.Input("prompt", multiline=True, default=""), + IO.Combo.Input("style", options=["realistic", "sculpture"]), + IO.DynamicCombo.Input( + "should_remesh", + options=[ + IO.DynamicCombo.Option( + "true", + [ + IO.Combo.Input("topology", options=["triangle", "quad"]), + IO.Int.Input( + "target_polycount", + default=300000, + min=100, + max=300000, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + IO.DynamicCombo.Option("false", []), + ], + tooltip="When set to false, returns an unprocessed triangular mesh.", + ), + IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]), + IO.Combo.Input( + "pose_mode", + options=["", "A-pose", "T-pose"], + tooltip="Specify the pose mode for the generated model.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.8}""", + ), + ) + + @classmethod + async def execute( + cls, + model: str, + prompt: str, + style: str, + should_remesh: InputShouldRemesh, + symmetry_mode: str, + pose_mode: str, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, field_name="prompt", min_length=1, max_length=600) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/meshy/openapi/v2/text-to-3d", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyTextToModelRequest( + prompt=prompt, + art_style=style, + ai_model=model, + topology=should_remesh.get("topology", None), + target_polycount=should_remesh.get("target_polycount", None), + should_remesh=should_remesh["should_remesh"] == "true", + symmetry_mode=symmetry_mode, + pose_mode=pose_mode.lower(), + seed=seed, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{response.result}"), + response_model=MeshyModelResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyRefineNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyRefineNode", + display_name="Meshy: Refine Draft Model", + category="api node/3d/Meshy", + description="Refine a previously created draft model.", + inputs=[ + IO.Combo.Input("model", options=["latest"]), + IO.Custom("MESHY_TASK_ID").Input("meshy_task_id"), + IO.Boolean.Input( + "enable_pbr", + default=False, + tooltip="Generate PBR Maps (metallic, roughness, normal) in addition to the base color. " + "Note: this should be set to false when using Sculpture style, " + "as Sculpture style generates its own set of PBR maps.", + ), + IO.String.Input( + "texture_prompt", + default="", + multiline=True, + tooltip="Provide a text prompt to guide the texturing process. " + "Maximum 600 characters. Cannot be used at the same time as 'texture_image'.", + ), + IO.Image.Input( + "texture_image", + tooltip="Only one of 'texture_image' or 'texture_prompt' may be used at the same time.", + optional=True, + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), + ) + + @classmethod + async def execute( + cls, + model: str, + meshy_task_id: str, + enable_pbr: bool, + texture_prompt: str, + texture_image: Input.Image | None = None, + ) -> IO.NodeOutput: + if texture_prompt and texture_image is not None: + raise ValueError("texture_prompt and texture_image cannot be used at the same time") + texture_image_url = None + if texture_prompt: + validate_string(texture_prompt, field_name="texture_prompt", max_length=600) + if texture_image is not None: + texture_image_url = (await upload_images_to_comfyapi(cls, texture_image, wait_label="Uploading texture"))[0] + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/meshy/openapi/v2/text-to-3d", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyRefineTask( + preview_task_id=meshy_task_id, + enable_pbr=enable_pbr, + texture_prompt=texture_prompt if texture_prompt else None, + texture_image_url=texture_image_url, + ai_model=model, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{response.result}"), + response_model=MeshyModelResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyImageToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyImageToModelNode", + display_name="Meshy: Image to Model", + category="api node/3d/Meshy", + inputs=[ + IO.Combo.Input("model", options=["latest"]), + IO.Image.Input("image"), + IO.DynamicCombo.Input( + "should_remesh", + options=[ + IO.DynamicCombo.Option( + "true", + [ + IO.Combo.Input("topology", options=["triangle", "quad"]), + IO.Int.Input( + "target_polycount", + default=300000, + min=100, + max=300000, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + IO.DynamicCombo.Option("false", []), + ], + tooltip="When set to false, returns an unprocessed triangular mesh.", + ), + IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]), + IO.DynamicCombo.Input( + "should_texture", + options=[ + IO.DynamicCombo.Option( + "true", + [ + IO.Boolean.Input( + "enable_pbr", + default=False, + tooltip="Generate PBR Maps (metallic, roughness, normal) " + "in addition to the base color.", + ), + IO.String.Input( + "texture_prompt", + default="", + multiline=True, + tooltip="Provide a text prompt to guide the texturing process. " + "Maximum 600 characters. Cannot be used at the same time as 'texture_image'.", + ), + IO.Image.Input( + "texture_image", + tooltip="Only one of 'texture_image' or 'texture_prompt' " + "may be used at the same time.", + optional=True, + ), + ], + ), + IO.DynamicCombo.Option("false", []), + ], + tooltip="Determines whether textures are generated. " + "Setting it to false skips the texture phase and returns a mesh without textures.", + ), + IO.Combo.Input( + "pose_mode", + options=["", "A-pose", "T-pose"], + tooltip="Specify the pose mode for the generated model.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["should_texture"]), + expr=""" + ( + $prices := {"true": 1.2, "false": 0.8}; + {"type":"usd","usd": $lookup($prices, widgets.should_texture)} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + image: Input.Image, + should_remesh: InputShouldRemesh, + symmetry_mode: str, + should_texture: InputShouldTexture, + pose_mode: str, + seed: int, + ) -> IO.NodeOutput: + texture = should_texture["should_texture"] == "true" + texture_image_url = texture_prompt = None + if texture: + if should_texture["texture_prompt"] and should_texture["texture_image"] is not None: + raise ValueError("texture_prompt and texture_image cannot be used at the same time") + if should_texture["texture_prompt"]: + validate_string(should_texture["texture_prompt"], field_name="texture_prompt", max_length=600) + texture_prompt = should_texture["texture_prompt"] + if should_texture["texture_image"] is not None: + texture_image_url = ( + await upload_images_to_comfyapi( + cls, should_texture["texture_image"], wait_label="Uploading texture" + ) + )[0] + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/meshy/openapi/v1/image-to-3d", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyImageToModelRequest( + image_url=(await upload_images_to_comfyapi(cls, image, wait_label="Uploading base image"))[0], + ai_model=model, + topology=should_remesh.get("topology", None), + target_polycount=should_remesh.get("target_polycount", None), + symmetry_mode=symmetry_mode, + should_remesh=should_remesh["should_remesh"] == "true", + should_texture=texture, + enable_pbr=should_texture.get("enable_pbr", None), + pose_mode=pose_mode.lower(), + texture_prompt=texture_prompt, + texture_image_url=texture_image_url, + seed=seed, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/image-to-3d/{response.result}"), + response_model=MeshyModelResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyMultiImageToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyMultiImageToModelNode", + display_name="Meshy: Multi-Image to Model", + category="api node/3d/Meshy", + inputs=[ + IO.Combo.Input("model", options=["latest"]), + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplatePrefix(IO.Image.Input("image"), prefix="image", min=2, max=4), + ), + IO.DynamicCombo.Input( + "should_remesh", + options=[ + IO.DynamicCombo.Option( + "true", + [ + IO.Combo.Input("topology", options=["triangle", "quad"]), + IO.Int.Input( + "target_polycount", + default=300000, + min=100, + max=300000, + display_mode=IO.NumberDisplay.number, + ), + ], + ), + IO.DynamicCombo.Option("false", []), + ], + tooltip="When set to false, returns an unprocessed triangular mesh.", + ), + IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]), + IO.DynamicCombo.Input( + "should_texture", + options=[ + IO.DynamicCombo.Option( + "true", + [ + IO.Boolean.Input( + "enable_pbr", + default=False, + tooltip="Generate PBR Maps (metallic, roughness, normal) " + "in addition to the base color.", + ), + IO.String.Input( + "texture_prompt", + default="", + multiline=True, + tooltip="Provide a text prompt to guide the texturing process. " + "Maximum 600 characters. Cannot be used at the same time as 'texture_image'.", + ), + IO.Image.Input( + "texture_image", + tooltip="Only one of 'texture_image' or 'texture_prompt' " + "may be used at the same time.", + optional=True, + ), + ], + ), + IO.DynamicCombo.Option("false", []), + ], + tooltip="Determines whether textures are generated. " + "Setting it to false skips the texture phase and returns a mesh without textures.", + ), + IO.Combo.Input( + "pose_mode", + options=["", "A-pose", "T-pose"], + tooltip="Specify the pose mode for the generated model.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["should_texture"]), + expr=""" + ( + $prices := {"true": 0.6, "false": 0.2}; + {"type":"usd","usd": $lookup($prices, widgets.should_texture)} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + images: IO.Autogrow.Type, + should_remesh: InputShouldRemesh, + symmetry_mode: str, + should_texture: InputShouldTexture, + pose_mode: str, + seed: int, + ) -> IO.NodeOutput: + texture = should_texture["should_texture"] == "true" + texture_image_url = texture_prompt = None + if texture: + if should_texture["texture_prompt"] and should_texture["texture_image"] is not None: + raise ValueError("texture_prompt and texture_image cannot be used at the same time") + if should_texture["texture_prompt"]: + validate_string(should_texture["texture_prompt"], field_name="texture_prompt", max_length=600) + texture_prompt = should_texture["texture_prompt"] + if should_texture["texture_image"] is not None: + texture_image_url = ( + await upload_images_to_comfyapi( + cls, should_texture["texture_image"], wait_label="Uploading texture" + ) + )[0] + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/meshy/openapi/v1/multi-image-to-3d", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyMultiImageToModelRequest( + image_urls=await upload_images_to_comfyapi( + cls, list(images.values()), wait_label="Uploading base images" + ), + ai_model=model, + topology=should_remesh.get("topology", None), + target_polycount=should_remesh.get("target_polycount", None), + symmetry_mode=symmetry_mode, + should_remesh=should_remesh["should_remesh"] == "true", + should_texture=texture, + enable_pbr=should_texture.get("enable_pbr", None), + pose_mode=pose_mode.lower(), + texture_prompt=texture_prompt, + texture_image_url=texture_image_url, + seed=seed, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/multi-image-to-3d/{response.result}"), + response_model=MeshyModelResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyRigModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyRigModelNode", + display_name="Meshy: Rig Model", + category="api node/3d/Meshy", + description="Provides a rigged character in standard formats. " + "Auto-rigging is currently not suitable for untextured meshes, non-humanoid assets, " + "or humanoid assets with unclear limb and body structure.", + inputs=[ + IO.Custom("MESHY_TASK_ID").Input("meshy_task_id"), + IO.Float.Input( + "height_meters", + min=0.1, + max=15.0, + default=1.7, + tooltip="The approximate height of the character model in meters. " + "This aids in scaling and rigging accuracy.", + ), + IO.Image.Input( + "texture_image", + tooltip="The model's UV-unwrapped base color texture image.", + optional=True, + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MESHY_RIGGED_TASK_ID").Output(display_name="rig_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.2}""", + ), + ) + + @classmethod + async def execute( + cls, + meshy_task_id: str, + height_meters: float, + texture_image: Input.Image | None = None, + ) -> IO.NodeOutput: + texture_image_url = None + if texture_image is not None: + texture_image_url = (await upload_images_to_comfyapi(cls, texture_image, wait_label="Uploading texture"))[0] + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/meshy/openapi/v1/rigging", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyRiggingRequest( + input_task_id=meshy_task_id, + height_meters=height_meters, + texture_image_url=texture_image_url, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/rigging/{response.result}"), + response_model=MeshyRiggedResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio( + result.result.rigged_character_glb_url, os.path.join(get_output_directory(), model_file) + ) + return IO.NodeOutput(model_file, response.result) + + +class MeshyAnimateModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyAnimateModelNode", + display_name="Meshy: Animate Model", + category="api node/3d/Meshy", + description="Apply a specific animation action to a previously rigged character.", + inputs=[ + IO.Custom("MESHY_RIGGED_TASK_ID").Input("rig_task_id"), + IO.Int.Input( + "action_id", + default=0, + min=0, + max=696, + tooltip="Visit https://docs.meshy.ai/en/api/animation-library for a list of available values.", + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.12}""", + ), + ) + + @classmethod + async def execute( + cls, + rig_task_id: str, + action_id: int, + ) -> IO.NodeOutput: + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/meshy/openapi/v1/animations", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyAnimationRequest( + rig_task_id=rig_task_id, + action_id=action_id, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/animations/{response.result}"), + response_model=MeshyAnimationResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.result.animation_glb_url, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyTextureNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="MeshyTextureNode", + display_name="Meshy: Texture Model", + category="api node/3d/Meshy", + inputs=[ + IO.Combo.Input("model", options=["latest"]), + IO.Custom("MESHY_TASK_ID").Input("meshy_task_id"), + IO.Boolean.Input( + "enable_original_uv", + default=True, + tooltip="Use the original UV of the model instead of generating new UVs. " + "When enabled, Meshy preserves existing textures from the uploaded model. " + "If the model has no original UV, the quality of the output might not be as good.", + ), + IO.Boolean.Input("pbr", default=False), + IO.String.Input( + "text_style_prompt", + default="", + multiline=True, + tooltip="Describe your desired texture style of the object using text. Maximum 600 characters." + "Maximum 600 characters. Cannot be used at the same time as 'image_style'.", + ), + IO.Image.Input( + "image_style", + optional=True, + tooltip="A 2d image to guide the texturing process. " + "Can not be used at the same time with 'text_style_prompt'.", + ), + ], + outputs=[ + IO.String.Output(display_name="model_file"), + IO.Custom("MODEL_TASK_ID").Output(display_name="meshy_task_id"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), + ) + + @classmethod + async def execute( + cls, + model: str, + meshy_task_id: str, + enable_original_uv: bool, + pbr: bool, + text_style_prompt: str, + image_style: Input.Image | None = None, + ) -> IO.NodeOutput: + if text_style_prompt and image_style is not None: + raise ValueError("text_style_prompt and image_style cannot be used at the same time") + if not text_style_prompt and image_style is None: + raise ValueError("Either text_style_prompt or image_style is required") + image_style_url = None + if image_style is not None: + image_style_url = (await upload_images_to_comfyapi(cls, image_style, wait_label="Uploading style"))[0] + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/meshy/openapi/v1/retexture", method="POST"), + response_model=MeshyTaskResponse, + data=MeshyTextureRequest( + input_task_id=meshy_task_id, + ai_model=model, + enable_original_uv=enable_original_uv, + enable_pbr=pbr, + text_style_prompt=text_style_prompt if text_style_prompt else None, + image_style_url=image_style_url, + ), + ) + result = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/retexture/{response.result}"), + response_model=MeshyModelResult, + status_extractor=lambda r: r.status, + progress_extractor=lambda r: r.progress, + ) + model_file = f"meshy_model_{response.result}.glb" + await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) + return IO.NodeOutput(model_file, response.result) + + +class MeshyExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + MeshyTextToModelNode, + MeshyRefineNode, + MeshyImageToModelNode, + MeshyMultiImageToModelNode, + MeshyRigModelNode, + MeshyAnimateModelNode, + MeshyTextureNode, + ] + + +async def comfy_entrypoint() -> MeshyExtension: + return MeshyExtension() diff --git a/comfy_api_nodes/nodes_minimax.py b/comfy_api_nodes/nodes_minimax.py index 05cbb700f..43a15d50d 100644 --- a/comfy_api_nodes/nodes_minimax.py +++ b/comfy_api_nodes/nodes_minimax.py @@ -134,6 +134,9 @@ class MinimaxTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.43}""", + ), ) @classmethod @@ -197,6 +200,9 @@ class MinimaxImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.43}""", + ), ) @classmethod @@ -340,6 +346,20 @@ class MinimaxHailuoVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["resolution", "duration"]), + expr=""" + ( + $prices := { + "768p": {"6": 0.28, "10": 0.56}, + "1080p": {"6": 0.49} + }; + $resPrices := $lookup($prices, $lowercase(widgets.resolution)); + $price := $lookup($resPrices, $string(widgets.duration)); + {"type":"usd","usd": $price ? $price : 0.43} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py index 2771e4790..769b171b7 100644 --- a/comfy_api_nodes/nodes_moonvalley.py +++ b/comfy_api_nodes/nodes_moonvalley.py @@ -233,6 +233,10 @@ class MoonvalleyImg2VideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(), + expr="""{"type":"usd","usd": 1.5}""", + ), ) @classmethod @@ -351,6 +355,10 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(), + expr="""{"type":"usd","usd": 2.25}""", + ), ) @classmethod @@ -471,6 +479,10 @@ class MoonvalleyTxt2VideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(), + expr="""{"type":"usd","usd": 1.5}""", + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index a6205a34f..2f144c5c3 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -160,6 +160,23 @@ class OpenAIDalle2(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["size", "n"]), + expr=""" + ( + $size := widgets.size; + $nRaw := widgets.n; + $n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1; + + $base := + $contains($size, "256x256") ? 0.016 : + $contains($size, "512x512") ? 0.018 : + 0.02; + + {"type":"usd","usd": $round($base * $n, 3)} + ) + """, + ), ) @classmethod @@ -287,6 +304,25 @@ class OpenAIDalle3(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["size", "quality"]), + expr=""" + ( + $size := widgets.size; + $q := widgets.quality; + $hd := $contains($q, "hd"); + + $price := + $contains($size, "1024x1024") + ? ($hd ? 0.08 : 0.04) + : (($contains($size, "1792x1024") or $contains($size, "1024x1792")) + ? ($hd ? 0.12 : 0.08) + : 0.04); + + {"type":"usd","usd": $price} + ) + """, + ), ) @classmethod @@ -411,6 +447,28 @@ class OpenAIGPTImage1(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["quality", "n"]), + expr=""" + ( + $ranges := { + "low": [0.011, 0.02], + "medium": [0.046, 0.07], + "high": [0.167, 0.3] + }; + $range := $lookup($ranges, widgets.quality); + $n := widgets.n; + ($n = 1) + ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1]} + : { + "type":"range_usd", + "min_usd": $range[0], + "max_usd": $range[1], + "format": { "suffix": " x " & $string($n) & "/Run" } + } + ) + """, + ), ) @classmethod @@ -566,6 +624,75 @@ class OpenAIChatNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m, "o4-mini") ? { + "type": "list_usd", + "usd": [0.0011, 0.0044], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "o1-pro") ? { + "type": "list_usd", + "usd": [0.15, 0.6], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "o1") ? { + "type": "list_usd", + "usd": [0.015, 0.06], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "o3-mini") ? { + "type": "list_usd", + "usd": [0.0011, 0.0044], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "o3") ? { + "type": "list_usd", + "usd": [0.01, 0.04], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-4o") ? { + "type": "list_usd", + "usd": [0.0025, 0.01], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-4.1-nano") ? { + "type": "list_usd", + "usd": [0.0001, 0.0004], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-4.1-mini") ? { + "type": "list_usd", + "usd": [0.0004, 0.0016], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-4.1") ? { + "type": "list_usd", + "usd": [0.002, 0.008], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-5-nano") ? { + "type": "list_usd", + "usd": [0.00005, 0.0004], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-5-mini") ? { + "type": "list_usd", + "usd": [0.00025, 0.002], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : $contains($m, "gpt-5") ? { + "type": "list_usd", + "usd": [0.00125, 0.01], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + : {"type": "text", "text": "Token-based"} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_pixverse.py b/comfy_api_nodes/nodes_pixverse.py index 6e1686af0..86ddb3ab9 100644 --- a/comfy_api_nodes/nodes_pixverse.py +++ b/comfy_api_nodes/nodes_pixverse.py @@ -128,6 +128,7 @@ class PixverseTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -242,6 +243,7 @@ class PixverseImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -355,6 +357,7 @@ class PixverseTransitionVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=PRICE_BADGE_VIDEO, ) @classmethod @@ -416,6 +419,33 @@ class PixverseTransitionVideoNode(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(response_poll.Resp.url)) +PRICE_BADGE_VIDEO = IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration_seconds", "quality", "motion_mode"]), + expr=""" + ( + $prices := { + "5": { + "1080p": {"normal": 1.2, "fast": 1.2}, + "720p": {"normal": 0.6, "fast": 1.2}, + "540p": {"normal": 0.45, "fast": 0.9}, + "360p": {"normal": 0.45, "fast": 0.9} + }, + "8": { + "1080p": {"normal": 1.2, "fast": 1.2}, + "720p": {"normal": 1.2, "fast": 1.2}, + "540p": {"normal": 0.9, "fast": 1.2}, + "360p": {"normal": 0.9, "fast": 1.2} + } + }; + $durPrices := $lookup($prices, $string(widgets.duration_seconds)); + $qualityPrices := $lookup($durPrices, widgets.quality); + $price := $lookup($qualityPrices, widgets.motion_mode); + {"type":"usd","usd": $price ? $price : 0.9} + ) + """, +) + + class PixVerseExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py index e3440b946..05dc151ad 100644 --- a/comfy_api_nodes/nodes_recraft.py +++ b/comfy_api_nodes/nodes_recraft.py @@ -378,6 +378,10 @@ class RecraftTextToImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["n"]), + expr="""{"type":"usd","usd": $round(0.04 * widgets.n, 2)}""", + ), ) @classmethod @@ -490,6 +494,10 @@ class RecraftImageToImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["n"]), + expr="""{"type":"usd","usd": $round(0.04 * widgets.n, 2)}""", + ), ) @classmethod @@ -591,6 +599,10 @@ class RecraftImageInpaintingNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["n"]), + expr="""{"type":"usd","usd": $round(0.04 * widgets.n, 2)}""", + ), ) @classmethod @@ -692,6 +704,10 @@ class RecraftTextToVectorNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["n"]), + expr="""{"type":"usd","usd": $round(0.08 * widgets.n, 2)}""", + ), ) @classmethod @@ -759,6 +775,10 @@ class RecraftVectorizeImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(), + expr="""{"type":"usd","usd": 0.01}""", + ), ) @classmethod @@ -817,6 +837,9 @@ class RecraftReplaceBackgroundNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.04}""", + ), ) @classmethod @@ -883,6 +906,9 @@ class RecraftRemoveBackgroundNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.01}""", + ), ) @classmethod @@ -929,6 +955,9 @@ class RecraftCrispUpscaleNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.004}""", + ), ) @classmethod @@ -972,6 +1001,9 @@ class RecraftCreativeUpscaleNode(RecraftCrispUpscaleNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.25}""", + ), ) diff --git a/comfy_api_nodes/nodes_rodin.py b/comfy_api_nodes/nodes_rodin.py index e60e7a6d6..b4420cb93 100644 --- a/comfy_api_nodes/nodes_rodin.py +++ b/comfy_api_nodes/nodes_rodin.py @@ -241,6 +241,9 @@ class Rodin3D_Regular(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod @@ -294,6 +297,9 @@ class Rodin3D_Detail(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod @@ -347,6 +353,9 @@ class Rodin3D_Smooth(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod @@ -406,6 +415,9 @@ class Rodin3D_Sketch(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_runway.py b/comfy_api_nodes/nodes_runway.py index 3c55039c9..d19fdb365 100644 --- a/comfy_api_nodes/nodes_runway.py +++ b/comfy_api_nodes/nodes_runway.py @@ -184,6 +184,10 @@ class RunwayImageToVideoNodeGen3a(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration"]), + expr="""{"type":"usd","usd": 0.0715 * widgets.duration}""", + ), ) @classmethod @@ -274,6 +278,10 @@ class RunwayImageToVideoNodeGen4(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration"]), + expr="""{"type":"usd","usd": 0.0715 * widgets.duration}""", + ), ) @classmethod @@ -372,6 +380,10 @@ class RunwayFirstLastFrameNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration"]), + expr="""{"type":"usd","usd": 0.0715 * widgets.duration}""", + ), ) @classmethod @@ -457,6 +469,9 @@ class RunwayTextToImageNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.11}""", + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_sora.py b/comfy_api_nodes/nodes_sora.py index 92b225d40..87e663845 100644 --- a/comfy_api_nodes/nodes_sora.py +++ b/comfy_api_nodes/nodes_sora.py @@ -89,6 +89,24 @@ class OpenAIVideoSora2(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "size", "duration"]), + expr=""" + ( + $m := widgets.model; + $size := widgets.size; + $dur := widgets.duration; + $isPro := $contains($m, "sora-2-pro"); + $isSora2 := $contains($m, "sora-2"); + $isProSize := ($size = "1024x1792" or $size = "1792x1024"); + $perSec := + $isPro ? ($isProSize ? 0.5 : 0.3) : + $isSora2 ? 0.1 : + ($isProSize ? 0.5 : 0.1); + {"type":"usd","usd": $round($perSec * $dur, 2)} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_stability.py b/comfy_api_nodes/nodes_stability.py index bb7ceed78..5c48c1f1e 100644 --- a/comfy_api_nodes/nodes_stability.py +++ b/comfy_api_nodes/nodes_stability.py @@ -127,6 +127,9 @@ class StabilityStableImageUltraNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.08}""", + ), ) @classmethod @@ -264,6 +267,16 @@ class StabilityStableImageSD_3_5Node(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $contains(widgets.model,"large") + ? {"type":"usd","usd":0.065} + : {"type":"usd","usd":0.035} + ) + """, + ), ) @classmethod @@ -382,6 +395,9 @@ class StabilityUpscaleConservativeNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.25}""", + ), ) @classmethod @@ -486,6 +502,9 @@ class StabilityUpscaleCreativeNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.25}""", + ), ) @classmethod @@ -566,6 +585,9 @@ class StabilityUpscaleFastNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.01}""", + ), ) @classmethod @@ -648,6 +670,9 @@ class StabilityTextToAudio(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.2}""", + ), ) @classmethod @@ -732,6 +757,9 @@ class StabilityAudioToAudio(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.2}""", + ), ) @classmethod @@ -828,6 +856,9 @@ class StabilityAudioInpaint(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.2}""", + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_topaz.py b/comfy_api_nodes/nodes_topaz.py index b04575ad8..9dc5f45bc 100644 --- a/comfy_api_nodes/nodes_topaz.py +++ b/comfy_api_nodes/nodes_topaz.py @@ -2,7 +2,6 @@ import builtins from io import BytesIO import aiohttp -import torch from typing_extensions import override from comfy_api.latest import IO, ComfyExtension, Input @@ -138,7 +137,7 @@ class TopazImageEnhance(IO.ComfyNode): async def execute( cls, model: str, - image: torch.Tensor, + image: Input.Image, prompt: str = "", subject_detection: str = "All", face_enhancement: bool = True, @@ -153,7 +152,9 @@ class TopazImageEnhance(IO.ComfyNode): ) -> IO.NodeOutput: if get_number_of_images(image) != 1: raise ValueError("Only one input image is supported.") - download_url = await upload_images_to_comfyapi(cls, image, max_images=1, mime_type="image/png") + download_url = await upload_images_to_comfyapi( + cls, image, max_images=1, mime_type="image/png", total_pixels=4096*4096 + ) initial_response = await sync_op( cls, ApiEndpoint(path="/proxy/topaz/image/v1/enhance-gen/async", method="POST"), diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py index e72f8e96a..aa790143d 100644 --- a/comfy_api_nodes/nodes_tripo.py +++ b/comfy_api_nodes/nodes_tripo.py @@ -117,6 +117,38 @@ class TripoTextToModelNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "model_version", + "style", + "texture", + "pbr", + "quad", + "texture_quality", + "geometry_quality", + ], + ), + expr=""" + ( + $isV14 := $contains(widgets.model_version,"v1.4"); + $style := widgets.style; + $hasStyle := ($style != "" and $style != "none"); + $withTexture := widgets.texture or widgets.pbr; + $isHdTexture := (widgets.texture_quality = "detailed"); + $isDetailedGeometry := (widgets.geometry_quality = "detailed"); + $baseCredits := + $isV14 ? 20 : ($withTexture ? 20 : 10); + $credits := + $baseCredits + + ($hasStyle ? 5 : 0) + + (widgets.quad ? 5 : 0) + + ($isHdTexture ? 10 : 0) + + ($isDetailedGeometry ? 20 : 0); + {"type":"usd","usd": $round($credits * 0.01, 2)} + ) + """, + ), ) @classmethod @@ -210,6 +242,38 @@ class TripoImageToModelNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "model_version", + "style", + "texture", + "pbr", + "quad", + "texture_quality", + "geometry_quality", + ], + ), + expr=""" + ( + $isV14 := $contains(widgets.model_version,"v1.4"); + $style := widgets.style; + $hasStyle := ($style != "" and $style != "none"); + $withTexture := widgets.texture or widgets.pbr; + $isHdTexture := (widgets.texture_quality = "detailed"); + $isDetailedGeometry := (widgets.geometry_quality = "detailed"); + $baseCredits := + $isV14 ? 30 : ($withTexture ? 30 : 20); + $credits := + $baseCredits + + ($hasStyle ? 5 : 0) + + (widgets.quad ? 5 : 0) + + ($isHdTexture ? 10 : 0) + + ($isDetailedGeometry ? 20 : 0); + {"type":"usd","usd": $round($credits * 0.01, 2)} + ) + """, + ), ) @classmethod @@ -314,6 +378,34 @@ class TripoMultiviewToModelNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "model_version", + "texture", + "pbr", + "quad", + "texture_quality", + "geometry_quality", + ], + ), + expr=""" + ( + $isV14 := $contains(widgets.model_version,"v1.4"); + $withTexture := widgets.texture or widgets.pbr; + $isHdTexture := (widgets.texture_quality = "detailed"); + $isDetailedGeometry := (widgets.geometry_quality = "detailed"); + $baseCredits := + $isV14 ? 30 : ($withTexture ? 30 : 20); + $credits := + $baseCredits + + (widgets.quad ? 5 : 0) + + ($isHdTexture ? 10 : 0) + + ($isDetailedGeometry ? 20 : 0); + {"type":"usd","usd": $round($credits * 0.01, 2)} + ) + """, + ), ) @classmethod @@ -405,6 +497,15 @@ class TripoTextureNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["texture_quality"]), + expr=""" + ( + $tq := widgets.texture_quality; + {"type":"usd","usd": ($contains($tq,"detailed") ? 0.2 : 0.1)} + ) + """, + ), ) @classmethod @@ -456,6 +557,9 @@ class TripoRefineNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.3}""", + ), ) @classmethod @@ -489,6 +593,9 @@ class TripoRigNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.25}""", + ), ) @classmethod @@ -545,6 +652,9 @@ class TripoRetargetNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.1}""", + ), ) @classmethod @@ -638,6 +748,60 @@ class TripoConversionNode(IO.ComfyNode): ], is_api_node=True, is_output_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "quad", + "face_limit", + "texture_size", + "texture_format", + "force_symmetry", + "flatten_bottom", + "flatten_bottom_threshold", + "pivot_to_center_bottom", + "scale_factor", + "with_animation", + "pack_uv", + "bake", + "part_names", + "fbx_preset", + "export_vertex_colors", + "export_orientation", + "animate_in_place", + ], + ), + expr=""" + ( + $face := (widgets.face_limit != null) ? widgets.face_limit : -1; + $texSize := (widgets.texture_size != null) ? widgets.texture_size : 4096; + $flatThresh := (widgets.flatten_bottom_threshold != null) ? widgets.flatten_bottom_threshold : 0; + $scale := (widgets.scale_factor != null) ? widgets.scale_factor : 1; + $texFmt := (widgets.texture_format != "" ? widgets.texture_format : "jpeg"); + $part := widgets.part_names; + $fbx := (widgets.fbx_preset != "" ? widgets.fbx_preset : "blender"); + $orient := (widgets.export_orientation != "" ? widgets.export_orientation : "default"); + $advanced := + widgets.quad or + widgets.force_symmetry or + widgets.flatten_bottom or + widgets.pivot_to_center_bottom or + widgets.with_animation or + widgets.pack_uv or + widgets.bake or + widgets.export_vertex_colors or + widgets.animate_in_place or + ($face != -1) or + ($texSize != 4096) or + ($flatThresh != 0) or + ($scale != 1) or + ($texFmt != "jpeg") or + ($part != "") or + ($fbx != "blender") or + ($orient != "default"); + {"type":"usd","usd": ($advanced ? 0.1 : 0.05)} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py index 13a6bfd91..c14d6ad68 100644 --- a/comfy_api_nodes/nodes_veo2.py +++ b/comfy_api_nodes/nodes_veo2.py @@ -122,6 +122,10 @@ class VeoVideoGenerationNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration_seconds"]), + expr="""{"type":"usd","usd": 0.5 * widgets.duration_seconds}""", + ), ) @classmethod @@ -347,6 +351,20 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio"]), + expr=""" + ( + $m := widgets.model; + $a := widgets.generate_audio; + ($contains($m,"veo-3.0-fast-generate-001") or $contains($m,"veo-3.1-fast-generate")) + ? {"type":"usd","usd": ($a ? 1.2 : 0.8)} + : ($contains($m,"veo-3.0-generate-001") or $contains($m,"veo-3.1-generate")) + ? {"type":"usd","usd": ($a ? 3.2 : 1.6)} + : {"type":"range_usd","min_usd":0.8,"max_usd":3.2} + ) + """, + ), ) @@ -420,6 +438,30 @@ class Veo3FirstLastFrameNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "duration"]), + expr=""" + ( + $prices := { + "veo-3.1-fast-generate": { "audio": 0.15, "no_audio": 0.10 }, + "veo-3.1-generate": { "audio": 0.40, "no_audio": 0.20 } + }; + $m := widgets.model; + $ga := (widgets.generate_audio = "true"); + $seconds := widgets.duration; + $modelKey := + $contains($m, "veo-3.1-fast-generate") ? "veo-3.1-fast-generate" : + $contains($m, "veo-3.1-generate") ? "veo-3.1-generate" : + ""; + $audioKey := $ga ? "audio" : "no_audio"; + $modelPrices := $lookup($prices, $modelKey); + $pps := $lookup($modelPrices, $audioKey); + ($pps != null) + ? {"type":"usd","usd": $pps * $seconds} + : {"type":"range_usd","min_usd": 0.4, "max_usd": 3.2} + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py index 7a679f0d9..8edb02f39 100644 --- a/comfy_api_nodes/nodes_vidu.py +++ b/comfy_api_nodes/nodes_vidu.py @@ -1,12 +1,13 @@ -import logging -from enum import Enum -from typing import Literal, Optional, TypeVar - -import torch -from pydantic import BaseModel, Field from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.vidu import ( + SubjectReference, + TaskCreationRequest, + TaskCreationResponse, + TaskResult, + TaskStatusResponse, +) from comfy_api_nodes.util import ( ApiEndpoint, download_url_to_video_output, @@ -17,6 +18,7 @@ from comfy_api_nodes.util import ( validate_image_aspect_ratio, validate_image_dimensions, validate_images_aspect_ratio_closeness, + validate_string, ) VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video" @@ -25,98 +27,33 @@ VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video" VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video" VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations" -R = TypeVar("R") - - -class VideoModelName(str, Enum): - vidu_q1 = "viduq1" - - -class AspectRatio(str, Enum): - r_16_9 = "16:9" - r_9_16 = "9:16" - r_1_1 = "1:1" - - -class Resolution(str, Enum): - r_1080p = "1080p" - - -class MovementAmplitude(str, Enum): - auto = "auto" - small = "small" - medium = "medium" - large = "large" - - -class TaskCreationRequest(BaseModel): - model: VideoModelName = VideoModelName.vidu_q1 - prompt: Optional[str] = Field(None, max_length=1500) - duration: Optional[Literal[5]] = 5 - seed: Optional[int] = Field(0, ge=0, le=2147483647) - aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9 - resolution: Optional[Resolution] = Resolution.r_1080p - movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto - images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL") - - -class TaskCreationResponse(BaseModel): - task_id: str = Field(...) - state: str = Field(...) - created_at: str = Field(...) - code: Optional[int] = Field(None, description="Error code") - - -class TaskResult(BaseModel): - id: str = Field(..., description="Creation id") - url: str = Field(..., description="The URL of the generated results, valid for one hour") - cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour") - - -class TaskStatusResponse(BaseModel): - state: str = Field(...) - err_code: Optional[str] = Field(None) - creations: list[TaskResult] = Field(..., description="Generated results") - - -def get_video_url_from_response(response) -> Optional[str]: - if response.creations: - return response.creations[0].url - return None - - -def get_video_from_response(response) -> TaskResult: - if not response.creations: - error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}" - logging.info(error_msg) - raise RuntimeError(error_msg) - logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url) - return response.creations[0] - async def execute_task( cls: type[IO.ComfyNode], vidu_endpoint: str, payload: TaskCreationRequest, - estimated_duration: int, -) -> R: - response = await sync_op( +) -> list[TaskResult]: + task_creation_response = await sync_op( cls, endpoint=ApiEndpoint(path=vidu_endpoint, method="POST"), response_model=TaskCreationResponse, data=payload, ) - if response.state == "failed": - error_msg = f"Vidu request failed. Code: {response.code}" - logging.error(error_msg) - raise RuntimeError(error_msg) - return await poll_op( + if task_creation_response.state == "failed": + raise RuntimeError(f"Vidu request failed. Code: {task_creation_response.code}") + response = await poll_op( cls, - ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % response.task_id), + ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % task_creation_response.task_id), response_model=TaskStatusResponse, status_extractor=lambda r: r.state, - estimated_duration=estimated_duration, + progress_extractor=lambda r: r.progress, + max_poll_attempts=320, ) + if not response.creations: + raise RuntimeError( + f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}" + ) + return response.creations class ViduTextToVideoNode(IO.ComfyNode): @@ -127,14 +64,9 @@ class ViduTextToVideoNode(IO.ComfyNode): node_id="ViduTextToVideoNode", display_name="Vidu Text To Video Generation", category="api node/video/Vidu", - description="Generate video from text prompt", + description="Generate video from a text prompt", inputs=[ - IO.Combo.Input( - "model", - options=VideoModelName, - default=VideoModelName.vidu_q1, - tooltip="Model name", - ), + IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"), IO.String.Input( "prompt", multiline=True, @@ -163,22 +95,19 @@ class ViduTextToVideoNode(IO.ComfyNode): ), IO.Combo.Input( "aspect_ratio", - options=AspectRatio, - default=AspectRatio.r_16_9, + options=["16:9", "9:16", "1:1"], tooltip="The aspect ratio of the output video", optional=True, ), IO.Combo.Input( "resolution", - options=Resolution, - default=Resolution.r_1080p, + options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, ), IO.Combo.Input( "movement_amplitude", - options=MovementAmplitude, - default=MovementAmplitude.auto, + options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, ), @@ -192,6 +121,9 @@ class ViduTextToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod @@ -208,7 +140,7 @@ class ViduTextToVideoNode(IO.ComfyNode): if not prompt: raise ValueError("The prompt field is required and cannot be empty.") payload = TaskCreationRequest( - model_name=model, + model=model, prompt=prompt, duration=duration, seed=seed, @@ -216,8 +148,8 @@ class ViduTextToVideoNode(IO.ComfyNode): resolution=resolution, movement_amplitude=movement_amplitude, ) - results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload, 320) - return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) + results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) class ViduImageToVideoNode(IO.ComfyNode): @@ -230,12 +162,7 @@ class ViduImageToVideoNode(IO.ComfyNode): category="api node/video/Vidu", description="Generate video from image and optional prompt", inputs=[ - IO.Combo.Input( - "model", - options=VideoModelName, - default=VideoModelName.vidu_q1, - tooltip="Model name", - ), + IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"), IO.Image.Input( "image", tooltip="An image to be used as the start frame of the generated video", @@ -270,15 +197,13 @@ class ViduImageToVideoNode(IO.ComfyNode): ), IO.Combo.Input( "resolution", - options=Resolution, - default=Resolution.r_1080p, + options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, ), IO.Combo.Input( "movement_amplitude", - options=MovementAmplitude, - default=MovementAmplitude.auto.value, + options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, ), @@ -292,13 +217,16 @@ class ViduImageToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod async def execute( cls, model: str, - image: torch.Tensor, + image: Input.Image, prompt: str, duration: int, seed: int, @@ -309,7 +237,7 @@ class ViduImageToVideoNode(IO.ComfyNode): raise ValueError("Only one input image is allowed.") validate_image_aspect_ratio(image, (1, 4), (4, 1)) payload = TaskCreationRequest( - model_name=model, + model=model, prompt=prompt, duration=duration, seed=seed, @@ -322,8 +250,8 @@ class ViduImageToVideoNode(IO.ComfyNode): max_images=1, mime_type="image/png", ) - results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload, 120) - return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) + results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) class ViduReferenceVideoNode(IO.ComfyNode): @@ -334,14 +262,9 @@ class ViduReferenceVideoNode(IO.ComfyNode): node_id="ViduReferenceVideoNode", display_name="Vidu Reference To Video Generation", category="api node/video/Vidu", - description="Generate video from multiple images and prompt", + description="Generate video from multiple images and a prompt", inputs=[ - IO.Combo.Input( - "model", - options=VideoModelName, - default=VideoModelName.vidu_q1, - tooltip="Model name", - ), + IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"), IO.Image.Input( "images", tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).", @@ -374,22 +297,19 @@ class ViduReferenceVideoNode(IO.ComfyNode): ), IO.Combo.Input( "aspect_ratio", - options=AspectRatio, - default=AspectRatio.r_16_9, + options=["16:9", "9:16", "1:1"], tooltip="The aspect ratio of the output video", optional=True, ), IO.Combo.Input( "resolution", - options=[model.value for model in Resolution], - default=Resolution.r_1080p.value, + options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, ), IO.Combo.Input( "movement_amplitude", - options=[model.value for model in MovementAmplitude], - default=MovementAmplitude.auto.value, + options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, ), @@ -403,13 +323,16 @@ class ViduReferenceVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod async def execute( cls, model: str, - images: torch.Tensor, + images: Input.Image, prompt: str, duration: int, seed: int, @@ -426,7 +349,7 @@ class ViduReferenceVideoNode(IO.ComfyNode): validate_image_aspect_ratio(image, (1, 4), (4, 1)) validate_image_dimensions(image, min_width=128, min_height=128) payload = TaskCreationRequest( - model_name=model, + model=model, prompt=prompt, duration=duration, seed=seed, @@ -440,8 +363,8 @@ class ViduReferenceVideoNode(IO.ComfyNode): max_images=7, mime_type="image/png", ) - results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload, 120) - return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) + results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) class ViduStartEndToVideoNode(IO.ComfyNode): @@ -454,12 +377,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode): category="api node/video/Vidu", description="Generate a video from start and end frames and a prompt", inputs=[ - IO.Combo.Input( - "model", - options=[model.value for model in VideoModelName], - default=VideoModelName.vidu_q1.value, - tooltip="Model name", - ), + IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"), IO.Image.Input( "first_frame", tooltip="Start frame", @@ -497,15 +415,13 @@ class ViduStartEndToVideoNode(IO.ComfyNode): ), IO.Combo.Input( "resolution", - options=[model.value for model in Resolution], - default=Resolution.r_1080p.value, + options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, ), IO.Combo.Input( "movement_amplitude", - options=[model.value for model in MovementAmplitude], - default=MovementAmplitude.auto.value, + options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, ), @@ -519,14 +435,17 @@ class ViduStartEndToVideoNode(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod async def execute( cls, model: str, - first_frame: torch.Tensor, - end_frame: torch.Tensor, + first_frame: Input.Image, + end_frame: Input.Image, prompt: str, duration: int, seed: int, @@ -535,7 +454,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode): ) -> IO.NodeOutput: validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False) payload = TaskCreationRequest( - model_name=model, + model=model, prompt=prompt, duration=duration, seed=seed, @@ -546,8 +465,479 @@ class ViduStartEndToVideoNode(IO.ComfyNode): (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0] for frame in (first_frame, end_frame) ] - results = await execute_task(cls, VIDU_START_END_VIDEO, payload, 96) - return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url)) + results = await execute_task(cls, VIDU_START_END_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) + + +class Vidu2TextToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Vidu2TextToVideoNode", + display_name="Vidu2 Text-to-Video Generation", + category="api node/video/Vidu", + description="Generate video from a text prompt", + inputs=[ + IO.Combo.Input("model", options=["viduq2"]), + IO.String.Input( + "prompt", + multiline=True, + tooltip="A textual description for video generation, with a maximum length of 2000 characters.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=10, + step=1, + display_mode=IO.NumberDisplay.slider, + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + ), + IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "3:4", "4:3", "1:1"]), + IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Boolean.Input( + "background_music", + default=False, + tooltip="Whether to add background music to the generated video.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $is1080 := widgets.resolution = "1080p"; + $base := $is1080 ? 0.1 : 0.075; + $perSec := $is1080 ? 0.05 : 0.025; + {"type":"usd","usd": $base + $perSec * (widgets.duration - 1)} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + prompt: str, + duration: int, + seed: int, + aspect_ratio: str, + resolution: str, + background_music: bool, + ) -> IO.NodeOutput: + validate_string(prompt, min_length=1, max_length=2000) + results = await execute_task( + cls, + VIDU_TEXT_TO_VIDEO, + TaskCreationRequest( + model=model, + prompt=prompt, + duration=duration, + seed=seed, + aspect_ratio=aspect_ratio, + resolution=resolution, + bgm=background_music, + ), + ) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) + + +class Vidu2ImageToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Vidu2ImageToVideoNode", + display_name="Vidu2 Image-to-Video Generation", + category="api node/video/Vidu", + description="Generate a video from an image and an optional prompt.", + inputs=[ + IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]), + IO.Image.Input( + "image", + tooltip="An image to be used as the start frame of the generated video.", + ), + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="An optional text prompt for video generation (max 2000 characters).", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=10, + step=1, + display_mode=IO.NumberDisplay.slider, + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + ), + IO.Combo.Input( + "resolution", + options=["720p", "1080p"], + ), + IO.Combo.Input( + "movement_amplitude", + options=["auto", "small", "medium", "large"], + tooltip="The movement amplitude of objects in the frame.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "duration", "resolution"]), + expr=""" + ( + $m := widgets.model; + $d := widgets.duration; + $is1080 := widgets.resolution = "1080p"; + $contains($m, "pro-fast") + ? ( + $base := $is1080 ? 0.08 : 0.04; + $perSec := $is1080 ? 0.02 : 0.01; + {"type":"usd","usd": $base + $perSec * ($d - 1)} + ) + : $contains($m, "pro") + ? ( + $base := $is1080 ? 0.275 : 0.075; + $perSec := $is1080 ? 0.075 : 0.05; + {"type":"usd","usd": $base + $perSec * ($d - 1)} + ) + : $contains($m, "turbo") + ? ( + $is1080 + ? {"type":"usd","usd": 0.175 + 0.05 * ($d - 1)} + : ( + $d <= 1 ? {"type":"usd","usd": 0.04} + : $d <= 2 ? {"type":"usd","usd": 0.05} + : {"type":"usd","usd": 0.05 + 0.05 * ($d - 2)} + ) + ) + : {"type":"usd","usd": 0.04} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + image: Input.Image, + prompt: str, + duration: int, + seed: int, + resolution: str, + movement_amplitude: str, + ) -> IO.NodeOutput: + if get_number_of_images(image) > 1: + raise ValueError("Only one input image is allowed.") + validate_image_aspect_ratio(image, (1, 4), (4, 1)) + validate_string(prompt, max_length=2000) + results = await execute_task( + cls, + VIDU_IMAGE_TO_VIDEO, + TaskCreationRequest( + model=model, + prompt=prompt, + duration=duration, + seed=seed, + resolution=resolution, + movement_amplitude=movement_amplitude, + images=await upload_images_to_comfyapi( + cls, + image, + max_images=1, + mime_type="image/png", + ), + ), + ) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) + + +class Vidu2ReferenceVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Vidu2ReferenceVideoNode", + display_name="Vidu2 Reference-to-Video Generation", + category="api node/video/Vidu", + description="Generate a video from multiple reference images and a prompt.", + inputs=[ + IO.Combo.Input("model", options=["viduq2"]), + IO.Autogrow.Input( + "subjects", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_images"), + names=["subject1", "subject2", "subject3"], + min=1, + ), + tooltip="For each subject, provide up to 3 reference images (7 images total across all subjects). " + "Reference them in prompts via @subject{subject_id}.", + ), + IO.String.Input( + "prompt", + multiline=True, + tooltip="When enabled, the video will include generated speech and background music " + "based on the prompt.", + ), + IO.Boolean.Input( + "audio", + default=False, + tooltip="When enabled video will contain generated speech and background music based on the prompt.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=10, + step=1, + display_mode=IO.NumberDisplay.slider, + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + ), + IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "4:3", "3:4", "1:1"]), + IO.Combo.Input("resolution", options=["720p"]), + IO.Combo.Input( + "movement_amplitude", + options=["auto", "small", "medium", "large"], + tooltip="The movement amplitude of objects in the frame.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["audio", "duration", "resolution"]), + expr=""" + ( + $is1080 := widgets.resolution = "1080p"; + $base := $is1080 ? 0.375 : 0.125; + $perSec := $is1080 ? 0.05 : 0.025; + $audioCost := widgets.audio = true ? 0.075 : 0; + {"type":"usd","usd": $base + $perSec * (widgets.duration - 1) + $audioCost} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + subjects: IO.Autogrow.Type, + prompt: str, + audio: bool, + duration: int, + seed: int, + aspect_ratio: str, + resolution: str, + movement_amplitude: str, + ) -> IO.NodeOutput: + validate_string(prompt, min_length=1, max_length=2000) + total_images = 0 + for i in subjects: + if get_number_of_images(subjects[i]) > 3: + raise ValueError("Maximum number of images per subject is 3.") + for im in subjects[i]: + total_images += 1 + validate_image_aspect_ratio(im, (1, 4), (4, 1)) + validate_image_dimensions(im, min_width=128, min_height=128) + if total_images > 7: + raise ValueError("Too many reference images; the maximum allowed is 7.") + subjects_param: list[SubjectReference] = [] + for i in subjects: + subjects_param.append( + SubjectReference( + id=i, + images=await upload_images_to_comfyapi( + cls, + subjects[i], + max_images=3, + mime_type="image/png", + wait_label=f"Uploading reference images for {i}", + ), + ), + ) + payload = TaskCreationRequest( + model=model, + prompt=prompt, + audio=audio, + duration=duration, + seed=seed, + aspect_ratio=aspect_ratio, + resolution=resolution, + movement_amplitude=movement_amplitude, + subjects=subjects_param, + ) + results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) + + +class Vidu2StartEndToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Vidu2StartEndToVideoNode", + display_name="Vidu2 Start/End Frame-to-Video Generation", + category="api node/video/Vidu", + description="Generate a video from a start frame, an end frame, and a prompt.", + inputs=[ + IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]), + IO.Image.Input("first_frame"), + IO.Image.Input("end_frame"), + IO.String.Input( + "prompt", + multiline=True, + tooltip="Prompt description (max 2000 characters).", + ), + IO.Int.Input( + "duration", + default=5, + min=2, + max=8, + step=1, + display_mode=IO.NumberDisplay.slider, + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + ), + IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Combo.Input( + "movement_amplitude", + options=["auto", "small", "medium", "large"], + tooltip="The movement amplitude of objects in the frame.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "duration", "resolution"]), + expr=""" + ( + $m := widgets.model; + $d := widgets.duration; + $is1080 := widgets.resolution = "1080p"; + $contains($m, "pro-fast") + ? ( + $base := $is1080 ? 0.08 : 0.04; + $perSec := $is1080 ? 0.02 : 0.01; + {"type":"usd","usd": $base + $perSec * ($d - 1)} + ) + : $contains($m, "pro") + ? ( + $base := $is1080 ? 0.275 : 0.075; + $perSec := $is1080 ? 0.075 : 0.05; + {"type":"usd","usd": $base + $perSec * ($d - 1)} + ) + : $contains($m, "turbo") + ? ( + $is1080 + ? {"type":"usd","usd": 0.175 + 0.05 * ($d - 1)} + : ( + $d <= 2 ? {"type":"usd","usd": 0.05} + : {"type":"usd","usd": 0.05 + 0.05 * ($d - 2)} + ) + ) + : {"type":"usd","usd": 0.04} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + first_frame: Input.Image, + end_frame: Input.Image, + prompt: str, + duration: int, + seed: int, + resolution: str, + movement_amplitude: str, + ) -> IO.NodeOutput: + validate_string(prompt, max_length=2000) + if get_number_of_images(first_frame) > 1: + raise ValueError("Only one input image is allowed for `first_frame`.") + if get_number_of_images(end_frame) > 1: + raise ValueError("Only one input image is allowed for `end_frame`.") + validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False) + payload = TaskCreationRequest( + model=model, + prompt=prompt, + duration=duration, + seed=seed, + resolution=resolution, + movement_amplitude=movement_amplitude, + images=[ + (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0] + for frame in (first_frame, end_frame) + ], + ) + results = await execute_task(cls, VIDU_START_END_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) class ViduExtension(ComfyExtension): @@ -558,6 +948,10 @@ class ViduExtension(ComfyExtension): ViduImageToVideoNode, ViduReferenceVideoNode, ViduStartEndToVideoNode, + Vidu2TextToVideoNode, + Vidu2ImageToVideoNode, + Vidu2ReferenceVideoNode, + Vidu2StartEndToVideoNode, ] diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py index 3e04786a9..a1355d4f1 100644 --- a/comfy_api_nodes/nodes_wan.py +++ b/comfy_api_nodes/nodes_wan.py @@ -244,6 +244,9 @@ class WanTextToImageApi(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.03}""", + ), ) @classmethod @@ -363,6 +366,9 @@ class WanImageToImageApi(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.03}""", + ), ) @classmethod @@ -520,6 +526,17 @@ class WanTextToVideoApi(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "size"]), + expr=""" + ( + $ppsTable := { "480p": 0.05, "720p": 0.1, "1080p": 0.15 }; + $resKey := $substringBefore(widgets.size, ":"); + $pps := $lookup($ppsTable, $resKey); + { "type": "usd", "usd": $round($pps * widgets.duration, 2) } + ) + """, + ), ) @classmethod @@ -681,6 +698,16 @@ class WanImageToVideoApi(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + expr=""" + ( + $ppsTable := { "480p": 0.05, "720p": 0.1, "1080p": 0.15 }; + $pps := $lookup($ppsTable, widgets.resolution); + { "type": "usd", "usd": $round($pps * widgets.duration, 2) } + ) + """, + ), ) @classmethod @@ -828,6 +855,22 @@ class WanReferenceVideoApi(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["size", "duration"]), + expr=""" + ( + $rate := $contains(widgets.size, "1080p") ? 0.15 : 0.10; + $inputMin := 2 * $rate; + $inputMax := 5 * $rate; + $outputPrice := widgets.duration * $rate; + { + "type": "range_usd", + "min_usd": $inputMin + $outputPrice, + "max_usd": $inputMax + $outputPrice + } + ) + """, + ), ) @classmethod diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py index d64239c86..99c302a2a 100644 --- a/comfy_api_nodes/util/conversions.py +++ b/comfy_api_nodes/util/conversions.py @@ -55,7 +55,7 @@ def image_tensor_pair_to_batch(image1: torch.Tensor, image2: torch.Tensor) -> to def tensor_to_bytesio( image: torch.Tensor, - name: str | None = None, + *, total_pixels: int = 2048 * 2048, mime_type: str = "image/png", ) -> BytesIO: @@ -75,7 +75,7 @@ def tensor_to_bytesio( pil_image = tensor_to_pil(image, total_pixels=total_pixels) img_binary = pil_to_bytesio(pil_image, mime_type=mime_type) - img_binary.name = f"{name if name else uuid.uuid4()}.{mimetype_to_extension(mime_type)}" + img_binary.name = f"{uuid.uuid4()}.{mimetype_to_extension(mime_type)}" return img_binary diff --git a/comfy_api_nodes/util/upload_helpers.py b/comfy_api_nodes/util/upload_helpers.py index f1ed7fe9c..2794be35c 100644 --- a/comfy_api_nodes/util/upload_helpers.py +++ b/comfy_api_nodes/util/upload_helpers.py @@ -43,27 +43,41 @@ class UploadResponse(BaseModel): async def upload_images_to_comfyapi( cls: type[IO.ComfyNode], - image: torch.Tensor, + image: torch.Tensor | list[torch.Tensor], *, max_images: int = 8, mime_type: str | None = None, wait_label: str | None = "Uploading", show_batch_index: bool = True, + total_pixels: int = 2048 * 2048, ) -> list[str]: """ Uploads images to ComfyUI API and returns download URLs. To upload multiple images, stack them in the batch dimension first. """ + tensors: list[torch.Tensor] = [] + if isinstance(image, list): + for img in image: + is_batch = len(img.shape) > 3 + if is_batch: + tensors.extend(img[i] for i in range(img.shape[0])) + else: + tensors.append(img) + else: + is_batch = len(image.shape) > 3 + if is_batch: + tensors.extend(image[i] for i in range(image.shape[0])) + else: + tensors.append(image) + # if batched, try to upload each file if max_images is greater than 0 download_urls: list[str] = [] - is_batch = len(image.shape) > 3 - batch_len = image.shape[0] if is_batch else 1 - num_to_upload = min(batch_len, max_images) + num_to_upload = min(len(tensors), max_images) batch_start_ts = time.monotonic() for idx in range(num_to_upload): - tensor = image[idx] if is_batch else image - img_io = tensor_to_bytesio(tensor, mime_type=mime_type) + tensor = tensors[idx] + img_io = tensor_to_bytesio(tensor, total_pixels=total_pixels, mime_type=mime_type) effective_label = wait_label if wait_label and show_batch_index and num_to_upload > 1: @@ -81,7 +95,6 @@ async def upload_audio_to_comfyapi( container_format: str = "mp4", codec_name: str = "aac", mime_type: str = "audio/mp4", - filename: str = "uploaded_audio.mp4", ) -> str: """ Uploads a single audio input to ComfyUI API and returns its download URL. @@ -91,7 +104,7 @@ async def upload_audio_to_comfyapi( waveform: torch.Tensor = audio["waveform"] audio_data_np = audio_tensor_to_contiguous_ndarray(waveform) audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, container_format, codec_name) - return await upload_file_to_comfyapi(cls, audio_bytes_io, filename, mime_type) + return await upload_file_to_comfyapi(cls, audio_bytes_io, f"{uuid.uuid4()}.{container_format}", mime_type) async def upload_video_to_comfyapi( diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py index 59fb49357..97fd803b8 100644 --- a/comfy_execution/jobs.py +++ b/comfy_execution/jobs.py @@ -14,8 +14,9 @@ class JobStatus: IN_PROGRESS = 'in_progress' COMPLETED = 'completed' FAILED = 'failed' + CANCELLED = 'cancelled' - ALL = [PENDING, IN_PROGRESS, COMPLETED, FAILED] + ALL = [PENDING, IN_PROGRESS, COMPLETED, FAILED, CANCELLED] # Media types that can be previewed in the frontend @@ -94,12 +95,6 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs: status_info = history_item.get('status', {}) status_str = status_info.get('status_str') if status_info else None - if status_str == 'success': - status = JobStatus.COMPLETED - elif status_str == 'error': - status = JobStatus.FAILED - else: - status = JobStatus.COMPLETED outputs = history_item.get('outputs', {}) outputs_count, preview_output = get_outputs_summary(outputs) @@ -107,6 +102,7 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs: execution_error = None execution_start_time = None execution_end_time = None + was_interrupted = False if status_info: messages = status_info.get('messages', []) for entry in messages: @@ -119,6 +115,15 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs: execution_end_time = event_data.get('timestamp') if event_name == 'execution_error': execution_error = event_data + elif event_name == 'execution_interrupted': + was_interrupted = True + + if status_str == 'success': + status = JobStatus.COMPLETED + elif status_str == 'error': + status = JobStatus.CANCELLED if was_interrupted else JobStatus.FAILED + else: + status = JobStatus.COMPLETED job = prune_dict({ 'id': prompt_id, @@ -268,13 +273,13 @@ def get_all_jobs( for item in queued: jobs.append(normalize_queue_item(item, JobStatus.PENDING)) - include_completed = JobStatus.COMPLETED in status_filter - include_failed = JobStatus.FAILED in status_filter - if include_completed or include_failed: + history_statuses = {JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED} + requested_history_statuses = history_statuses & set(status_filter) + if requested_history_statuses: for prompt_id, history_item in history.items(): - is_failed = history_item.get('status', {}).get('status_str') == 'error' - if (is_failed and include_failed) or (not is_failed and include_completed): - jobs.append(normalize_history_item(prompt_id, history_item)) + job = normalize_history_item(prompt_id, history_item) + if job.get('status') in requested_history_statuses: + jobs.append(job) if workflow_id: jobs = [j for j in jobs if j.get('workflow_id') == workflow_id] diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 94ad5e8a8..15b3aa401 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -399,6 +399,58 @@ class SplitAudioChannels(IO.ComfyNode): separate = execute # TODO: remove +class JoinAudioChannels(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="JoinAudioChannels", + display_name="Join Audio Channels", + description="Joins left and right mono audio channels into a stereo audio.", + category="audio", + inputs=[ + IO.Audio.Input("audio_left"), + IO.Audio.Input("audio_right"), + ], + outputs=[ + IO.Audio.Output(display_name="audio"), + ], + ) + + @classmethod + def execute(cls, audio_left, audio_right) -> IO.NodeOutput: + waveform_left = audio_left["waveform"] + sample_rate_left = audio_left["sample_rate"] + waveform_right = audio_right["waveform"] + sample_rate_right = audio_right["sample_rate"] + + if waveform_left.shape[1] != 1 or waveform_right.shape[1] != 1: + raise ValueError("AudioJoin: Both input audios must be mono.") + + # Handle different sample rates by resampling to the higher rate + waveform_left, waveform_right, output_sample_rate = match_audio_sample_rates( + waveform_left, sample_rate_left, waveform_right, sample_rate_right + ) + + # Handle different lengths by trimming to the shorter length + length_left = waveform_left.shape[-1] + length_right = waveform_right.shape[-1] + + if length_left != length_right: + min_length = min(length_left, length_right) + if length_left > min_length: + logging.info(f"JoinAudioChannels: Trimming left channel from {length_left} to {min_length} samples.") + waveform_left = waveform_left[..., :min_length] + if length_right > min_length: + logging.info(f"JoinAudioChannels: Trimming right channel from {length_right} to {min_length} samples.") + waveform_right = waveform_right[..., :min_length] + + # Join the channels into stereo + left_channel = waveform_left[..., 0:1, :] + right_channel = waveform_right[..., 0:1, :] + stereo_waveform = torch.cat([left_channel, right_channel], dim=1) + + return IO.NodeOutput({"waveform": stereo_waveform, "sample_rate": output_sample_rate}) + def match_audio_sample_rates(waveform_1, sample_rate_1, waveform_2, sample_rate_2): if sample_rate_1 != sample_rate_2: @@ -616,6 +668,7 @@ class AudioExtension(ComfyExtension): RecordAudio, TrimAudioDuration, SplitAudioChannels, + JoinAudioChannels, AudioConcat, AudioMerge, AudioAdjustVolume, diff --git a/comfy_extras/nodes_image_compare.py b/comfy_extras/nodes_image_compare.py new file mode 100644 index 000000000..8e9f809e6 --- /dev/null +++ b/comfy_extras/nodes_image_compare.py @@ -0,0 +1,53 @@ +import nodes + +from typing_extensions import override +from comfy_api.latest import IO, ComfyExtension + + +class ImageCompare(IO.ComfyNode): + """Compares two images with a slider interface.""" + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ImageCompare", + display_name="Image Compare", + description="Compares two images side by side with a slider.", + category="image", + is_experimental=True, + is_output_node=True, + inputs=[ + IO.Image.Input("image_a", optional=True), + IO.Image.Input("image_b", optional=True), + IO.ImageCompare.Input("compare_view"), + ], + outputs=[], + ) + + @classmethod + def execute(cls, image_a=None, image_b=None, compare_view=None) -> IO.NodeOutput: + result = {"a_images": [], "b_images": []} + + preview_node = nodes.PreviewImage() + + if image_a is not None and len(image_a) > 0: + saved = preview_node.save_images(image_a, "comfy.compare.a") + result["a_images"] = saved["ui"]["images"] + + if image_b is not None and len(image_b) > 0: + saved = preview_node.save_images(image_b, "comfy.compare.b") + result["b_images"] = saved["ui"]["images"] + + return IO.NodeOutput(ui=result) + + +class ImageCompareExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + ImageCompare, + ] + + +async def comfy_entrypoint() -> ImageCompareExtension: + return ImageCompareExtension() diff --git a/comfy_extras/nodes_lt_audio.py b/comfy_extras/nodes_lt_audio.py index 26b0160d2..1966fd1bf 100644 --- a/comfy_extras/nodes_lt_audio.py +++ b/comfy_extras/nodes_lt_audio.py @@ -185,6 +185,10 @@ class LTXAVTextEncoderLoader(io.ComfyNode): io.Combo.Input( "ckpt_name", options=folder_paths.get_filename_list("checkpoints"), + ), + io.Combo.Input( + "device", + options=["default", "cpu"], ) ], outputs=[io.Clip.Output()], @@ -197,7 +201,11 @@ class LTXAVTextEncoderLoader(io.ComfyNode): clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", text_encoder) clip_path2 = folder_paths.get_full_path_or_raise("checkpoints", ckpt_name) - clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type) + model_options = {} + if device == "cpu": + model_options["load_device"] = model_options["offload_device"] = torch.device("cpu") + + clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2], embedding_directory=folder_paths.get_folder_paths("embeddings"), clip_type=clip_type, model_options=model_options) return io.NodeOutput(clip) diff --git a/comfy_extras/nodes_model_patch.py b/comfy_extras/nodes_model_patch.py index 1355b3c93..f66d28fc9 100644 --- a/comfy_extras/nodes_model_patch.py +++ b/comfy_extras/nodes_model_patch.py @@ -244,6 +244,10 @@ class ModelPatchLoader: elif 'control_all_x_embedder.2-1.weight' in sd: # alipai z image fun controlnet sd = z_image_convert(sd) config = {} + if 'control_layers.4.adaLN_modulation.0.weight' not in sd: + config['n_control_layers'] = 3 + config['additional_in_dim'] = 17 + config['refiner_control'] = True if 'control_layers.14.adaLN_modulation.0.weight' in sd: config['n_control_layers'] = 15 config['additional_in_dim'] = 17 diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index 01afa13a1..2e559c35c 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -254,6 +254,7 @@ class ResizeType(str, Enum): SCALE_HEIGHT = "scale height" SCALE_TOTAL_PIXELS = "scale total pixels" MATCH_SIZE = "match size" + SCALE_TO_MULTIPLE = "scale to multiple" def is_image(input: torch.Tensor) -> bool: # images have 4 dimensions: [batch, height, width, channels] @@ -328,7 +329,7 @@ def scale_shorter_dimension(input: torch.Tensor, shorter_size: int, scale_method if height < width: width = round((width / height) * shorter_size) height = shorter_size - elif width > height: + elif width < height: height = round((height / width) * shorter_size) width = shorter_size else: @@ -363,6 +364,43 @@ def scale_match_size(input: torch.Tensor, match: torch.Tensor, scale_method: str input = finalize_image_mask_input(input, is_type_image) return input +def scale_to_multiple_cover(input: torch.Tensor, multiple: int, scale_method: str) -> torch.Tensor: + if multiple <= 1: + return input + is_type_image = is_image(input) + if is_type_image: + _, height, width, _ = input.shape + else: + _, height, width = input.shape + target_w = (width // multiple) * multiple + target_h = (height // multiple) * multiple + if target_w == 0 or target_h == 0: + return input + if target_w == width and target_h == height: + return input + s_w = target_w / width + s_h = target_h / height + if s_w >= s_h: + scaled_w = target_w + scaled_h = int(math.ceil(height * s_w)) + if scaled_h < target_h: + scaled_h = target_h + else: + scaled_h = target_h + scaled_w = int(math.ceil(width * s_h)) + if scaled_w < target_w: + scaled_w = target_w + input = init_image_mask_input(input, is_type_image) + input = comfy.utils.common_upscale(input, scaled_w, scaled_h, scale_method, "disabled") + input = finalize_image_mask_input(input, is_type_image) + x0 = (scaled_w - target_w) // 2 + y0 = (scaled_h - target_h) // 2 + x1 = x0 + target_w + y1 = y0 + target_h + if is_type_image: + return input[:, y0:y1, x0:x1, :] + return input[:, y0:y1, x0:x1] + class ResizeImageMaskNode(io.ComfyNode): scale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"] @@ -378,6 +416,7 @@ class ResizeImageMaskNode(io.ComfyNode): longer_size: int shorter_size: int megapixels: float + multiple: int @classmethod def define_schema(cls): @@ -417,6 +456,9 @@ class ResizeImageMaskNode(io.ComfyNode): io.MultiType.Input("match", [io.Image, io.Mask]), crop_combo, ]), + io.DynamicCombo.Option(ResizeType.SCALE_TO_MULTIPLE, [ + io.Int.Input("multiple", default=8, min=1, max=MAX_RESOLUTION, step=1), + ]), ]), io.Combo.Input("scale_method", options=cls.scale_methods, default="area"), ], @@ -442,6 +484,8 @@ class ResizeImageMaskNode(io.ComfyNode): return io.NodeOutput(scale_total_pixels(input, resize_type["megapixels"], scale_method)) elif selected_type == ResizeType.MATCH_SIZE: return io.NodeOutput(scale_match_size(input, resize_type["match"], scale_method, resize_type["crop"])) + elif selected_type == ResizeType.SCALE_TO_MULTIPLE: + return io.NodeOutput(scale_to_multiple_cover(input, resize_type["multiple"], scale_method)) raise ValueError(f"Unsupported resize type: {selected_type}") def batch_images(images: list[torch.Tensor]) -> torch.Tensor | None: diff --git a/comfyui_version.py b/comfyui_version.py index 750673f08..dbb57b4e5 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.8.0" +__version__ = "0.9.2" diff --git a/main.py b/main.py index 0e07a95da..37b06c1fa 100644 --- a/main.py +++ b/main.py @@ -7,6 +7,7 @@ import folder_paths import time from comfy.cli_args import args from app.logger import setup_logger +from app.assets.scanner import seed_assets import itertools import utils.extra_config import logging @@ -324,6 +325,8 @@ def setup_database(): from app.database.db import init_db, dependencies_available if dependencies_available(): init_db() + if not args.disable_assets_autoscan: + seed_assets(["models"], enable_logging=True) except Exception as e: logging.error(f"Failed to initialize database. Please ensure you have installed the latest requirements. If the error persists, please report this as in future the database will be required: {e}") diff --git a/manager_requirements.txt b/manager_requirements.txt index 6585b0c19..bea6d4927 100644 --- a/manager_requirements.txt +++ b/manager_requirements.txt @@ -1 +1 @@ -comfyui_manager==4.0.4 +comfyui_manager==4.0.5 diff --git a/nodes.py b/nodes.py index 56b74ebe3..f19d5fd1c 100644 --- a/nodes.py +++ b/nodes.py @@ -378,14 +378,15 @@ class VAEEncodeForInpaint: CATEGORY = "latent/inpaint" def encode(self, vae, pixels, mask, grow_mask_by=6): - x = (pixels.shape[1] // vae.downscale_ratio) * vae.downscale_ratio - y = (pixels.shape[2] // vae.downscale_ratio) * vae.downscale_ratio + downscale_ratio = vae.spacial_compression_encode() + x = (pixels.shape[1] // downscale_ratio) * downscale_ratio + y = (pixels.shape[2] // downscale_ratio) * downscale_ratio mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") pixels = pixels.clone() if pixels.shape[1] != x or pixels.shape[2] != y: - x_offset = (pixels.shape[1] % vae.downscale_ratio) // 2 - y_offset = (pixels.shape[2] % vae.downscale_ratio) // 2 + x_offset = (pixels.shape[1] % downscale_ratio) // 2 + y_offset = (pixels.shape[2] % downscale_ratio) // 2 pixels = pixels[:,x_offset:x + x_offset, y_offset:y + y_offset,:] mask = mask[:,:,x_offset:x + x_offset, y_offset:y + y_offset] @@ -787,6 +788,7 @@ class VAELoader: #TODO: scale factor? def load_vae(self, vae_name): + metadata = None if vae_name == "pixel_space": sd = {} sd["pixel_space_vae"] = torch.tensor(1.0) @@ -797,8 +799,8 @@ class VAELoader: vae_path = folder_paths.get_full_path_or_raise("vae_approx", vae_name) else: vae_path = folder_paths.get_full_path_or_raise("vae", vae_name) - sd = comfy.utils.load_torch_file(vae_path) - vae = comfy.sd.VAE(sd=sd) + sd, metadata = comfy.utils.load_torch_file(vae_path, return_metadata=True) + vae = comfy.sd.VAE(sd=sd, metadata=metadata) vae.throw_exception_if_invalid() return (vae,) @@ -2369,6 +2371,7 @@ async def init_builtin_extra_nodes(): "nodes_nop.py", "nodes_kandinsky5.py", "nodes_wanmove.py", + "nodes_image_compare.py", ] import_failed = [] @@ -2398,6 +2401,7 @@ async def init_builtin_api_nodes(): "nodes_sora.py", "nodes_topaz.py", "nodes_tripo.py", + "nodes_meshy.py", "nodes_moonvalley.py", "nodes_rodin.py", "nodes_gemini.py", diff --git a/pyproject.toml b/pyproject.toml index 951c2c978..9ea73da05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.8.0" +version = "0.9.2" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10" diff --git a/requirements.txt b/requirements.txt index bc8346bcf..3876274f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -comfyui-frontend-package==1.35.9 -comfyui-workflow-templates==0.7.67 -comfyui-embedded-docs==0.3.1 +comfyui-frontend-package==1.36.14 +comfyui-workflow-templates==0.8.11 +comfyui-embedded-docs==0.4.0 torch torchsde torchvision @@ -21,7 +21,7 @@ psutil alembic SQLAlchemy av>=14.2.0 -comfy-kitchen>=0.2.3 +comfy-kitchen>=0.2.6 #non essential dependencies: kornia>=0.7.1 diff --git a/server.py b/server.py index 70c8b5e3b..04a577488 100644 --- a/server.py +++ b/server.py @@ -33,6 +33,8 @@ import node_helpers from comfyui_version import __version__ from app.frontend_management import FrontendManager, parse_version from comfy_api.internal import _ComfyNodeInternal +from app.assets.scanner import seed_assets +from app.assets.api.routes import register_assets_system from app.user_manager import UserManager from app.model_manager import ModelFileManager @@ -184,7 +186,7 @@ def create_block_external_middleware(): else: response = await handler(request) - response.headers['Content-Security-Policy'] = "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self'; frame-src 'self'; object-src 'self';" + response.headers['Content-Security-Policy'] = "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self' data:; frame-src 'self'; object-src 'self';" return response return block_external_middleware @@ -235,6 +237,7 @@ class PromptServer(): else args.front_end_root ) logging.info(f"[Prompt Server] web root: {self.web_root}") + register_assets_system(self.app, self.user_manager) routes = web.RouteTableDef() self.routes = routes self.last_node_id = None @@ -683,6 +686,10 @@ class PromptServer(): @routes.get("/object_info") async def get_object_info(request): + try: + seed_assets(["models"]) + except Exception as e: + logging.error(f"Failed to seed assets: {e}") with folder_paths.cache_helper: out = {} for x in nodes.NODE_CLASS_MAPPINGS: diff --git a/tests-unit/comfy_quant/test_mixed_precision.py b/tests-unit/comfy_quant/test_mixed_precision.py index 7b2eac940..7c740491d 100644 --- a/tests-unit/comfy_quant/test_mixed_precision.py +++ b/tests-unit/comfy_quant/test_mixed_precision.py @@ -153,9 +153,9 @@ class TestMixedPrecisionOps(unittest.TestCase): state_dict2 = model.state_dict() # Verify layer1.weight is a QuantizedTensor with scale preserved - self.assertIsInstance(state_dict2["layer1.weight"], QuantizedTensor) - self.assertEqual(state_dict2["layer1.weight"]._params.scale.item(), 3.0) - self.assertEqual(state_dict2["layer1.weight"]._layout_cls, "TensorCoreFP8E4M3Layout") + self.assertTrue(torch.equal(state_dict2["layer1.weight"].view(torch.uint8), fp8_weight.view(torch.uint8))) + self.assertEqual(state_dict2["layer1.weight_scale"].item(), 3.0) + self.assertEqual(model.layer1.weight._layout_cls, "TensorCoreFP8E4M3Layout") # Verify non-quantized layers are standard tensors self.assertNotIsInstance(state_dict2["layer2.weight"], QuantizedTensor) diff --git a/tests/execution/test_jobs.py b/tests/execution/test_jobs.py index 918c8080a..4d2f9ed36 100644 --- a/tests/execution/test_jobs.py +++ b/tests/execution/test_jobs.py @@ -19,6 +19,7 @@ class TestJobStatus: assert JobStatus.IN_PROGRESS == 'in_progress' assert JobStatus.COMPLETED == 'completed' assert JobStatus.FAILED == 'failed' + assert JobStatus.CANCELLED == 'cancelled' def test_all_contains_all_statuses(self): """ALL should contain all status values.""" @@ -26,7 +27,8 @@ class TestJobStatus: assert JobStatus.IN_PROGRESS in JobStatus.ALL assert JobStatus.COMPLETED in JobStatus.ALL assert JobStatus.FAILED in JobStatus.ALL - assert len(JobStatus.ALL) == 4 + assert JobStatus.CANCELLED in JobStatus.ALL + assert len(JobStatus.ALL) == 5 class TestIsPreviewable: @@ -336,6 +338,40 @@ class TestNormalizeHistoryItem: assert job['execution_error']['node_type'] == 'KSampler' assert job['execution_error']['exception_message'] == 'CUDA out of memory' + def test_cancelled_job(self): + """Cancelled/interrupted history item should have cancelled status.""" + history_item = { + 'prompt': ( + 5, + 'prompt-cancelled', + {'nodes': {}}, + {'create_time': 1234567890000}, + ['node1'], + ), + 'status': { + 'status_str': 'error', + 'completed': False, + 'messages': [ + ('execution_start', {'prompt_id': 'prompt-cancelled', 'timestamp': 1234567890500}), + ('execution_interrupted', { + 'prompt_id': 'prompt-cancelled', + 'node_id': '5', + 'node_type': 'KSampler', + 'executed': ['1', '2', '3'], + 'timestamp': 1234567891000, + }) + ] + }, + 'outputs': {}, + } + + job = normalize_history_item('prompt-cancelled', history_item) + assert job['status'] == 'cancelled' + assert job['execution_start_time'] == 1234567890500 + assert job['execution_end_time'] == 1234567891000 + # Cancelled jobs should not have execution_error set + assert 'execution_error' not in job + def test_include_outputs(self): """When include_outputs=True, should include full output data.""" history_item = {