Fix #13 audio nodes now work and test correctly

2026-03-20 00:24:59 +08:00 · 2024-07-18 17:15:44 -07:00 · 2024-07-18 17:15:44 -07:00 · 0c34c2b99d
commit 0c34c2b99d
parent cc99d89ac6
14 changed files with 224 additions and 45 deletions
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@ -1,23 +0,0 @@
-name: Python Linting
-
-on: [push, pull_request]
-
-jobs:
-  pylint:
-    name: Run Pylint
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.x
-
-    - name: Install Pylint
-      run: pip install pylint
-
-    - name: Run Pylint
-      run: pylint --rcfile=.pylintrc $(find . -type f -name "*.py")
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -1,3 +1,4 @@
+# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
 name: Backend Tests

 #
@ -9,6 +10,7 @@ on: [ push ]

 jobs:
  build_and_execute_macos:
+    if: false
    name: Installation, Unit and Workflow Tests for macOS
    runs-on: ${{ matrix.runner.labels }}
    strategy:
@ -31,11 +33,13 @@ jobs:
      run: |
        source venv/bin/activate
        pytest -v tests/inference
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
    - name: Lint for errors
      run: |
        source venv/bin/activate
-        pylint comfy/
-        pylint comfy_extras/
+        pylint --rcfile=.pylintrc comfy/
+        pylint --rcfile=.pylintrc comfy_extras/
  build_and_execute_linux:
    name: Installation, Unit and Workflow Tests for Linux
    runs-on: ${{ matrix.runner.labels }}
@ -46,10 +50,11 @@ jobs:
        runner:
          - labels: [self-hosted, Linux, X64, cpu]
            container: "ubuntu"
-          - labels: [self-hosted, Linux, X64, ipex-a380-6gb]
-            container: "intel/intel-optimized-pytorch:2.1.20-xpu-pip-base"
-          - labels: [self-hosted, Linux, X64, rocm-7600-8gb]
-            container: "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2"
+            # todo: re-enable these when they actually work lol
+#          - labels: [self-hosted, Linux, X64, ipex-a380-6gb]
+#            container: "intel/intel-optimized-pytorch:2.1.20-xpu-pip-base"
+#          - labels: [self-hosted, Linux, X64, rocm-7600-8gb]
+#            container: "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2"
          - labels: [self-hosted, Linux, X64, cuda-3060-12gb]
            container: "nvcr.io/nvidia/pytorch:24.03-py3"
    steps:
@ -75,7 +80,9 @@ jobs:
        run: |
          export HSA_OVERRIDE_GFX_VERSION=11.0.0
          pytest -v tests/inference
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
      - name: Lint for errors
        run: |
-          pylint comfy/
-          pylint comfy_extras/
+          pylint --rcfile=.pylintrc comfy/
+          pylint --rcfile=.pylintrc comfy_extras/
--- a/README.md
+++ b/README.md
@ -172,6 +172,10 @@ These packages have been adapted to be installable with `pip` and download model
 - **LayerDiffuse**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-layerdiffuse.git`.
 - **BRIA Background Removal**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-bria-bg-removal.git`
 - **IP Adapter**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-ipadapter-plus`
+- **Video Frame Interpolation**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-video-frame-interpolation`
+- **Video Helper Suite**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-video-helper-suite`
+- **AnimateDiff Evolved**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-animatediff-evolved`
+- **Impact Pack**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-impact-pack`

 Custom nodes are generally supported by this fork. Use these for a bug-free experience.

--- a/comfy/cmd/server.py
+++ b/comfy/cmd/server.py
@ -28,6 +28,7 @@ from typing_extensions import NamedTuple
 from .latent_preview_image_encoding import encode_preview_image
 from .. import interruption
 from .. import model_management
+from .. import node_helpers
 from .. import utils
 from ..app.frontend_management import FrontendManager
 from ..app.user_manager import UserManager
@ -38,7 +39,6 @@ from ..cmd import folder_paths
 from ..component_model.abstract_prompt_queue import AbstractPromptQueue, AsyncAbstractPromptQueue
 from ..component_model.executor_types import ExecutorToClientProgress, StatusMessage, QueueInfo, ExecInfo
 from ..component_model.file_output_path import file_output_path
-from ..component_model.files import get_package_as_path
 from ..component_model.queue_types import QueueItem, HistoryEntry, BinaryEventTypes, TaskInvocation, ExecutionError, \
    ExecutionStatus
 from ..digest import digest
--- a/comfy/model_downloader.py
+++ b/comfy/model_downloader.py
@ -38,7 +38,7 @@ def get_filename_list_with_downloadable(folder_name: str, known_files: Optional[
    return sorted(list(existing | downloadable))


-def get_or_download(folder_name: str, filename: str, known_files: Optional[List[Downloadable]] = None) -> Optional[str]:
+def get_or_download(folder_name: str, filename: str, known_files: Optional[List[Downloadable] | KnownDownloadables] = None) -> Optional[str]:
    if known_files is None:
        known_files = _get_known_models_for_folder_name(folder_name)

@ -219,6 +219,8 @@ KNOWN_CHECKPOINTS: Final[KnownDownloadables] = KnownDownloadables([
    HuggingFile("stabilityai/stable-diffusion-3-medium", filename="sd3_medium_incl_clips.safetensors"),
    HuggingFile("stabilityai/stable-diffusion-3-medium", filename="sd3_medium_incl_clips_t5xxlfp8.safetensors"),
    HuggingFile("fal/AuraFlow", filename="aura_flow_0.1.safetensors"),
+    # stable audio, # uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/
+    HuggingFile("stabilityai/stable-audio-open-1.0", "model.safetensors", save_with_filename="stable_audio_open_1.0.safetensors")
 ], folder_name="checkpoints")

 KNOWN_UNCLIP_CHECKPOINTS: Final[KnownDownloadables] = KnownDownloadables([
@ -382,6 +384,8 @@ KNOWN_CLIP_MODELS: Final[KnownDownloadables] = KnownDownloadables([
    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp8_e4m3fn.safetensors", save_with_filename="t5xxl_fp8_e4m3fn.safetensors"),
    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors", save_with_filename="clip_g.safetensors"),
    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_l.safetensors", save_with_filename="clip_l.safetensors"),
+    # uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/
+    HuggingFile("google-t5/t5-base", "model.safetensors", save_with_filename="t5_base.safetensors"),
 ], folder_name="clip")

 _known_models_db: list[KnownDownloadables] = [
--- a/comfy/model_downloader_types.py
+++ b/comfy/model_downloader_types.py
@ -6,7 +6,7 @@ from os.path import split
 from pathlib import PurePosixPath
 from typing import Optional, List, Sequence, Union

-from can_ada import parse, URL
+from can_ada import parse, URL  # pylint: disable=no-name-in-module
 from typing_extensions import TypedDict, NotRequired


--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@ -105,7 +105,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
            special_tokens = {"start": 49406, "end": 49407, "pad": 49407}
        assert layer in self.LAYERS

-        config = get_path_as_dict(textmodel_json_config, "sd1_clip_config.json")
+        config = get_path_as_dict(textmodel_json_config, "sd1_clip_config.json", package=__package__)
        self.transformer = model_class(config, dtype, device, ops.manual_cast)
        self.num_layers = self.transformer.num_layers

--- a/comfy/sd2_clip.py
+++ b/comfy/sd2_clip.py
@ -9,7 +9,7 @@ class SD2ClipHModel(sd1_clip.SDClipModel):
            layer = "hidden"
            layer_idx = -2

-        textmodel_json_config = get_path_as_dict(textmodel_json_config, "sd2_clip_config.json")
+        textmodel_json_config = get_path_as_dict(textmodel_json_config, "sd2_clip_config.json", package=__package__)
        super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0})


--- a/comfy/text_encoders/aura_t5.py
+++ b/comfy/text_encoders/aura_t5.py
@ -8,7 +8,7 @@ from ..component_model.files import get_path_as_dict

 class PT5XlModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
-        textmodel_json_config = get_path_as_dict(textmodel_json_config, "t5_pile_config_xl.json", package="comfy.text_encoders")
+        textmodel_json_config = get_path_as_dict(textmodel_json_config, "t5_pile_config_xl.json", package=__package__)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 2, "pad": 1}, model_class=t5.T5, enable_attention_masks=True, zero_out_masked=True)


--- a/comfy/text_encoders/llama_tokenizer.py
+++ b/comfy/text_encoders/llama_tokenizer.py
@ -10,9 +10,9 @@ class LLAMATokenizer:
        self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path)  # pylint: disable=unexpected-keyword-arg
        self.end = self.tokenizer.eos_id()
        self.eos_token_id = self.end
-        self.eos_token = self.tokenizer.id_to_piece(self.eos_token_id)
+        self.eos_token = self.tokenizer.id_to_piece(self.eos_token_id)  # pylint: disable=no-member
        self._vocab = {
-            self.tokenizer.id_to_piece(i): i for i in range(self.tokenizer.get_piece_size())
+            self.tokenizer.id_to_piece(i): i for i in range(self.tokenizer.get_piece_size())  # pylint: disable=no-member
        }

    def get_vocab(self):
--- a/comfy/text_encoders/sa_t5.py
+++ b/comfy/text_encoders/sa_t5.py
@ -4,15 +4,16 @@ import comfy.text_encoders.t5
 from comfy import sd1_clip
 from comfy.component_model import files

+
 class T5BaseModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
-        textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_base.json")
+        textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_base.json", package=__package__)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=True, zero_out_masked=True)


 class T5BaseTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None):
-        tokenizer_path = files.get_package_as_path("comfy.t5_tokenizer")
+        tokenizer_path = files.get_package_as_path("comfy.text_encoders.t5_tokenizer")
        super().__init__(tokenizer_path, pad_with_end=False, embedding_size=768, embedding_key='t5base', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=128)


--- a/comfy/text_encoders/sd3_clip.py
+++ b/comfy/text_encoders/sd3_clip.py
@ -12,7 +12,7 @@ from comfy.component_model import files

 class T5XXLModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
-        textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_xxl.json", package="comfy.text_encoders")
+        textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_xxl.json", package=__package__)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5)


--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,6 @@
 torch
 torchvision
+torchaudio
 torchdiffeq>=0.2.3
 torchsde>=0.2.6
 einops>=0.6.0
@ -56,4 +57,5 @@ certifi
 spandrel
 numpy>=1.26.3,<2.0.0
 soundfile
-watchdog
+watchdog
+PySoundFile
--- a/tests/inference/test_workflows.py
+++ b/tests/inference/test_workflows.py
@ -6,6 +6,186 @@ from comfy.model_downloader import add_known_models, KNOWN_LORAS
 from comfy.model_downloader_types import CivitFile

 _workflows = {
+    "audio_1": {
+        "14": {
+            "inputs": {
+                "ckpt_name": "stable_audio_open_1.0.safetensors"
+            },
+            "class_type": "CheckpointLoaderSimple",
+            "_meta": {
+                "title": "Load Checkpoint"
+            }
+        },
+        "15": {
+            "inputs": {
+                "clip_name": "t5_base.safetensors",
+                "type": "stable_audio"
+            },
+            "class_type": "CLIPLoader",
+            "_meta": {
+                "title": "Load CLIP"
+            }
+        },
+        "16": {
+            "inputs": {
+                "text": "hard bop, upright bass, slappy bass, low frequencies, drum kit brushed hi-hat, snare with ghost notes, syncopated, groove",
+                "clip": [
+                    "15",
+                    0
+                ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+                "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "17": {
+            "inputs": {
+                "text": "",
+                "clip": [
+                    "15",
+                    0
+                ]
+            },
+            "class_type": "CLIPTextEncode",
+            "_meta": {
+                "title": "CLIP Text Encode (Prompt)"
+            }
+        },
+        "19": {
+            "inputs": {
+                "cfg": 7,
+                "model": [
+                    "14",
+                    0
+                ],
+                "positive": [
+                    "16",
+                    0
+                ],
+                "negative": [
+                    "17",
+                    0
+                ]
+            },
+            "class_type": "CFGGuider",
+            "_meta": {
+                "title": "CFGGuider"
+            }
+        },
+        "21": {
+            "inputs": {
+                "noise_seed": 600769511872395
+            },
+            "class_type": "RandomNoise",
+            "_meta": {
+                "title": "RandomNoise"
+            }
+        },
+        "22": {
+            "inputs": {
+                "noise": [
+                    "21",
+                    0
+                ],
+                "guider": [
+                    "19",
+                    0
+                ],
+                "sampler": [
+                    "29",
+                    0
+                ],
+                "sigmas": [
+                    "24",
+                    0
+                ],
+                "latent_image": [
+                    "30",
+                    0
+                ]
+            },
+            "class_type": "SamplerCustomAdvanced",
+            "_meta": {
+                "title": "SamplerCustomAdvanced"
+            }
+        },
+        "24": {
+            "inputs": {
+                "scheduler": "exponential",
+                "steps": 20,
+                "denoise": 1,
+                "model": [
+                    "14",
+                    0
+                ]
+            },
+            "class_type": "BasicScheduler",
+            "_meta": {
+                "title": "BasicScheduler"
+            }
+        },
+        "26": {
+            "inputs": {
+                "samples": [
+                    "22",
+                    1
+                ],
+                "vae": [
+                    "14",
+                    2
+                ]
+            },
+            "class_type": "VAEDecodeAudio",
+            "_meta": {
+                "title": "VAEDecodeAudio"
+            }
+        },
+        "27": {
+            "inputs": {
+                "filename_prefix": "audio/objectobject",
+                "audio": [
+                    "26",
+                    0
+                ]
+            },
+            "class_type": "SaveAudio",
+            "_meta": {
+                "title": "SaveAudio"
+            }
+        },
+        "29": {
+            "inputs": {
+                "version": "regular"
+            },
+            "class_type": "SamplerEulerCFGpp",
+            "_meta": {
+                "title": "SamplerEulerCFG++"
+            }
+        },
+        "30": {
+            "inputs": {
+                "seconds": 47.6
+            },
+            "class_type": "EmptyLatentAudio",
+            "_meta": {
+                "title": "EmptyLatentAudio"
+            }
+        },
+        "31": {
+            "inputs": {
+                "filename_prefix": "latents/ComfyUI",
+                "samples": [
+                    "22",
+                    1
+                ]
+            },
+            "class_type": "SaveLatent",
+            "_meta": {
+                "title": "SaveLatent"
+            }
+        }
+    },
    "auraflow_1": {
        "1": {
            "inputs": {
@ -273,5 +453,9 @@ async def test_workflow(workflow_name: str, workflow: dict, has_gpu: bool, clien
    # todo: add all the models we want to test a bit more elegantly
    outputs = await client.queue_prompt(prompt)

-    save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveImage")
-    assert outputs[save_image_node_id]["images"][0]["abs_path"] is not None
+    if any(v.class_type == "SaveImage" for v in prompt.values()):
+        save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveImage")
+        assert outputs[save_image_node_id]["images"][0]["abs_path"] is not None
+    elif any(v.class_type == "SaveAudio" for v in prompt.values()):
+        save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveAudio")
+        assert outputs[save_image_node_id]["audio"][0]["filename"] is not None