Fix #13 audio nodes now work and test correctly

This commit is contained in:
doctorpangloss 2024-07-18 17:15:44 -07:00
parent cc99d89ac6
commit 0c34c2b99d
14 changed files with 224 additions and 45 deletions

View File

@ -1,23 +0,0 @@
name: Python Linting
on: [push, pull_request]
jobs:
pylint:
name: Run Pylint
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.x
- name: Install Pylint
run: pip install pylint
- name: Run Pylint
run: pylint --rcfile=.pylintrc $(find . -type f -name "*.py")

View File

@ -1,3 +1,4 @@
# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json
name: Backend Tests
#
@ -9,6 +10,7 @@ on: [ push ]
jobs:
build_and_execute_macos:
if: false
name: Installation, Unit and Workflow Tests for macOS
runs-on: ${{ matrix.runner.labels }}
strategy:
@ -31,11 +33,13 @@ jobs:
run: |
source venv/bin/activate
pytest -v tests/inference
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
- name: Lint for errors
run: |
source venv/bin/activate
pylint comfy/
pylint comfy_extras/
pylint --rcfile=.pylintrc comfy/
pylint --rcfile=.pylintrc comfy_extras/
build_and_execute_linux:
name: Installation, Unit and Workflow Tests for Linux
runs-on: ${{ matrix.runner.labels }}
@ -46,10 +50,11 @@ jobs:
runner:
- labels: [self-hosted, Linux, X64, cpu]
container: "ubuntu"
- labels: [self-hosted, Linux, X64, ipex-a380-6gb]
container: "intel/intel-optimized-pytorch:2.1.20-xpu-pip-base"
- labels: [self-hosted, Linux, X64, rocm-7600-8gb]
container: "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2"
# todo: re-enable these when they actually work lol
# - labels: [self-hosted, Linux, X64, ipex-a380-6gb]
# container: "intel/intel-optimized-pytorch:2.1.20-xpu-pip-base"
# - labels: [self-hosted, Linux, X64, rocm-7600-8gb]
# container: "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2"
- labels: [self-hosted, Linux, X64, cuda-3060-12gb]
container: "nvcr.io/nvidia/pytorch:24.03-py3"
steps:
@ -75,7 +80,9 @@ jobs:
run: |
export HSA_OVERRIDE_GFX_VERSION=11.0.0
pytest -v tests/inference
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
- name: Lint for errors
run: |
pylint comfy/
pylint comfy_extras/
pylint --rcfile=.pylintrc comfy/
pylint --rcfile=.pylintrc comfy_extras/

View File

@ -172,6 +172,10 @@ These packages have been adapted to be installable with `pip` and download model
- **LayerDiffuse**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-layerdiffuse.git`.
- **BRIA Background Removal**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-bria-bg-removal.git`
- **IP Adapter**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-ipadapter-plus`
- **Video Frame Interpolation**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-video-frame-interpolation`
- **Video Helper Suite**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-video-helper-suite`
- **AnimateDiff Evolved**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-animatediff-evolved`
- **Impact Pack**: `pip install git+https://github.com/AppMana/appmana-comfyui-nodes-impact-pack`
Custom nodes are generally supported by this fork. Use these for a bug-free experience.

View File

@ -28,6 +28,7 @@ from typing_extensions import NamedTuple
from .latent_preview_image_encoding import encode_preview_image
from .. import interruption
from .. import model_management
from .. import node_helpers
from .. import utils
from ..app.frontend_management import FrontendManager
from ..app.user_manager import UserManager
@ -38,7 +39,6 @@ from ..cmd import folder_paths
from ..component_model.abstract_prompt_queue import AbstractPromptQueue, AsyncAbstractPromptQueue
from ..component_model.executor_types import ExecutorToClientProgress, StatusMessage, QueueInfo, ExecInfo
from ..component_model.file_output_path import file_output_path
from ..component_model.files import get_package_as_path
from ..component_model.queue_types import QueueItem, HistoryEntry, BinaryEventTypes, TaskInvocation, ExecutionError, \
ExecutionStatus
from ..digest import digest

View File

@ -38,7 +38,7 @@ def get_filename_list_with_downloadable(folder_name: str, known_files: Optional[
return sorted(list(existing | downloadable))
def get_or_download(folder_name: str, filename: str, known_files: Optional[List[Downloadable]] = None) -> Optional[str]:
def get_or_download(folder_name: str, filename: str, known_files: Optional[List[Downloadable] | KnownDownloadables] = None) -> Optional[str]:
if known_files is None:
known_files = _get_known_models_for_folder_name(folder_name)
@ -219,6 +219,8 @@ KNOWN_CHECKPOINTS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("stabilityai/stable-diffusion-3-medium", filename="sd3_medium_incl_clips.safetensors"),
HuggingFile("stabilityai/stable-diffusion-3-medium", filename="sd3_medium_incl_clips_t5xxlfp8.safetensors"),
HuggingFile("fal/AuraFlow", filename="aura_flow_0.1.safetensors"),
# stable audio, # uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/
HuggingFile("stabilityai/stable-audio-open-1.0", "model.safetensors", save_with_filename="stable_audio_open_1.0.safetensors")
], folder_name="checkpoints")
KNOWN_UNCLIP_CHECKPOINTS: Final[KnownDownloadables] = KnownDownloadables([
@ -382,6 +384,8 @@ KNOWN_CLIP_MODELS: Final[KnownDownloadables] = KnownDownloadables([
HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp8_e4m3fn.safetensors", save_with_filename="t5xxl_fp8_e4m3fn.safetensors"),
HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors", save_with_filename="clip_g.safetensors"),
HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_l.safetensors", save_with_filename="clip_l.safetensors"),
# uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/
HuggingFile("google-t5/t5-base", "model.safetensors", save_with_filename="t5_base.safetensors"),
], folder_name="clip")
_known_models_db: list[KnownDownloadables] = [

View File

@ -6,7 +6,7 @@ from os.path import split
from pathlib import PurePosixPath
from typing import Optional, List, Sequence, Union
from can_ada import parse, URL
from can_ada import parse, URL # pylint: disable=no-name-in-module
from typing_extensions import TypedDict, NotRequired

View File

@ -105,7 +105,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
special_tokens = {"start": 49406, "end": 49407, "pad": 49407}
assert layer in self.LAYERS
config = get_path_as_dict(textmodel_json_config, "sd1_clip_config.json")
config = get_path_as_dict(textmodel_json_config, "sd1_clip_config.json", package=__package__)
self.transformer = model_class(config, dtype, device, ops.manual_cast)
self.num_layers = self.transformer.num_layers

View File

@ -9,7 +9,7 @@ class SD2ClipHModel(sd1_clip.SDClipModel):
layer = "hidden"
layer_idx = -2
textmodel_json_config = get_path_as_dict(textmodel_json_config, "sd2_clip_config.json")
textmodel_json_config = get_path_as_dict(textmodel_json_config, "sd2_clip_config.json", package=__package__)
super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0})

View File

@ -8,7 +8,7 @@ from ..component_model.files import get_path_as_dict
class PT5XlModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
textmodel_json_config = get_path_as_dict(textmodel_json_config, "t5_pile_config_xl.json", package="comfy.text_encoders")
textmodel_json_config = get_path_as_dict(textmodel_json_config, "t5_pile_config_xl.json", package=__package__)
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 2, "pad": 1}, model_class=t5.T5, enable_attention_masks=True, zero_out_masked=True)

View File

@ -10,9 +10,9 @@ class LLAMATokenizer:
self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path) # pylint: disable=unexpected-keyword-arg
self.end = self.tokenizer.eos_id()
self.eos_token_id = self.end
self.eos_token = self.tokenizer.id_to_piece(self.eos_token_id)
self.eos_token = self.tokenizer.id_to_piece(self.eos_token_id) # pylint: disable=no-member
self._vocab = {
self.tokenizer.id_to_piece(i): i for i in range(self.tokenizer.get_piece_size())
self.tokenizer.id_to_piece(i): i for i in range(self.tokenizer.get_piece_size()) # pylint: disable=no-member
}
def get_vocab(self):

View File

@ -4,15 +4,16 @@ import comfy.text_encoders.t5
from comfy import sd1_clip
from comfy.component_model import files
class T5BaseModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_base.json")
textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_base.json", package=__package__)
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=True, zero_out_masked=True)
class T5BaseTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None):
tokenizer_path = files.get_package_as_path("comfy.t5_tokenizer")
tokenizer_path = files.get_package_as_path("comfy.text_encoders.t5_tokenizer")
super().__init__(tokenizer_path, pad_with_end=False, embedding_size=768, embedding_key='t5base', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=128)

View File

@ -12,7 +12,7 @@ from comfy.component_model import files
class T5XXLModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, textmodel_json_config=None):
textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_xxl.json", package="comfy.text_encoders")
textmodel_json_config = files.get_path_as_dict(textmodel_json_config, "t5_config_xxl.json", package=__package__)
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5)

View File

@ -1,5 +1,6 @@
torch
torchvision
torchaudio
torchdiffeq>=0.2.3
torchsde>=0.2.6
einops>=0.6.0
@ -56,4 +57,5 @@ certifi
spandrel
numpy>=1.26.3,<2.0.0
soundfile
watchdog
watchdog
PySoundFile

View File

@ -6,6 +6,186 @@ from comfy.model_downloader import add_known_models, KNOWN_LORAS
from comfy.model_downloader_types import CivitFile
_workflows = {
"audio_1": {
"14": {
"inputs": {
"ckpt_name": "stable_audio_open_1.0.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"15": {
"inputs": {
"clip_name": "t5_base.safetensors",
"type": "stable_audio"
},
"class_type": "CLIPLoader",
"_meta": {
"title": "Load CLIP"
}
},
"16": {
"inputs": {
"text": "hard bop, upright bass, slappy bass, low frequencies, drum kit brushed hi-hat, snare with ghost notes, syncopated, groove",
"clip": [
"15",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"17": {
"inputs": {
"text": "",
"clip": [
"15",
0
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Prompt)"
}
},
"19": {
"inputs": {
"cfg": 7,
"model": [
"14",
0
],
"positive": [
"16",
0
],
"negative": [
"17",
0
]
},
"class_type": "CFGGuider",
"_meta": {
"title": "CFGGuider"
}
},
"21": {
"inputs": {
"noise_seed": 600769511872395
},
"class_type": "RandomNoise",
"_meta": {
"title": "RandomNoise"
}
},
"22": {
"inputs": {
"noise": [
"21",
0
],
"guider": [
"19",
0
],
"sampler": [
"29",
0
],
"sigmas": [
"24",
0
],
"latent_image": [
"30",
0
]
},
"class_type": "SamplerCustomAdvanced",
"_meta": {
"title": "SamplerCustomAdvanced"
}
},
"24": {
"inputs": {
"scheduler": "exponential",
"steps": 20,
"denoise": 1,
"model": [
"14",
0
]
},
"class_type": "BasicScheduler",
"_meta": {
"title": "BasicScheduler"
}
},
"26": {
"inputs": {
"samples": [
"22",
1
],
"vae": [
"14",
2
]
},
"class_type": "VAEDecodeAudio",
"_meta": {
"title": "VAEDecodeAudio"
}
},
"27": {
"inputs": {
"filename_prefix": "audio/objectobject",
"audio": [
"26",
0
]
},
"class_type": "SaveAudio",
"_meta": {
"title": "SaveAudio"
}
},
"29": {
"inputs": {
"version": "regular"
},
"class_type": "SamplerEulerCFGpp",
"_meta": {
"title": "SamplerEulerCFG++"
}
},
"30": {
"inputs": {
"seconds": 47.6
},
"class_type": "EmptyLatentAudio",
"_meta": {
"title": "EmptyLatentAudio"
}
},
"31": {
"inputs": {
"filename_prefix": "latents/ComfyUI",
"samples": [
"22",
1
]
},
"class_type": "SaveLatent",
"_meta": {
"title": "SaveLatent"
}
}
},
"auraflow_1": {
"1": {
"inputs": {
@ -273,5 +453,9 @@ async def test_workflow(workflow_name: str, workflow: dict, has_gpu: bool, clien
# todo: add all the models we want to test a bit more elegantly
outputs = await client.queue_prompt(prompt)
save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveImage")
assert outputs[save_image_node_id]["images"][0]["abs_path"] is not None
if any(v.class_type == "SaveImage" for v in prompt.values()):
save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveImage")
assert outputs[save_image_node_id]["images"][0]["abs_path"] is not None
elif any(v.class_type == "SaveAudio" for v in prompt.values()):
save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveAudio")
assert outputs[save_image_node_id]["audio"][0]["filename"] is not None