ComfyUI/tests/inference/test_workflows.py

import importlib.resources
import json
import logging
import time
from importlib.abc import Traversable
from typing import Any, AsyncGenerator
import threading
import psutil

try:
    import pynvml
except ImportError:
    pynvml = None

import pytest

from comfy.api.components.schema.prompt import Prompt
from comfy.client.embedded_comfy_client import Comfy
from comfy.distributed.process_pool_executor import ProcessPoolExecutor
from comfy.model_downloader import add_known_models, KNOWN_LORAS
from comfy.model_downloader_types import CivitFile, HuggingFile
from comfy_extras.nodes.nodes_audio import TorchAudioNotFoundError
from . import workflows
import itertools
from comfy.cli_args import default_configuration
from comfy.cli_args_types import PerformanceFeature

logger = logging.getLogger(__name__)


class ResourceMonitor:
    def __init__(self, interval: float = 0.1):
        self.interval = interval
        self.peak_cpu_ram = 0
        self.peak_gpu_vram = 0
        self._stop_event = threading.Event()
        self._thread = None
        self._pynvml_available = False
        self._gpu_handles = []

    def _monitor(self):
        current_process = psutil.Process()
        while not self._stop_event.is_set():
            # Monitor CPU RAM (RSS) for process tree
            try:
                children = current_process.children(recursive=True)
                processes = [current_process] + children
                pids = {p.pid for p in processes}

                total_rss = 0
                for p in processes:
                    try:
                        total_rss += p.memory_info().rss
                    except (psutil.NoSuchProcess, psutil.AccessDenied):
                        pass
                self.peak_cpu_ram = max(self.peak_cpu_ram, total_rss)

                # Monitor GPU VRAM if available
                if self._pynvml_available and self._gpu_handles:
                    total_vram = 0
                    try:
                        # Iterate over all detected GPUs
                        for handle in self._gpu_handles:
                            # Get all processes running on the GPU
                            try:
                                compute_procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
                                graphics_procs = pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle)

                                # Filter for our process tree
                                for p in compute_procs + graphics_procs:
                                    if p.pid in pids:
                                        total_vram += p.usedGpuMemory
                            except Exception:
                                pass # Skip errors for specific GPU queries

                        self.peak_gpu_vram = max(self.peak_gpu_vram, total_vram)
                    except Exception:
                        pass
            except Exception:
                pass

            time.sleep(self.interval)

    def __enter__(self):
        if pynvml:
            try:
                pynvml.nvmlInit()
                device_count = pynvml.nvmlDeviceGetCount()
                self._gpu_handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(device_count)]
                self._pynvml_available = True
            except Exception as e:
                logger.warning(f"Failed to initialize pynvml for VRAM monitoring: {e}")

        self._thread = threading.Thread(target=self._monitor, daemon=True)
        self._thread.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._stop_event.set()
        if self._thread:
            self._thread.join()
        if self._pynvml_available:
            try:
                pynvml.nvmlShutdown()
            except Exception:
                pass


def _generate_config_params():
    attn_keys = [
        "use_pytorch_cross_attention",
        # "use_split_cross_attention",
        # "use_quad_cross_attention",
        # "use_sage_attention",
        # "use_flash_attention"
    ]
    attn_options = [
        {k: (k == target_key) for k in attn_keys}
        for target_key in attn_keys
    ]

    async_options = [
        {"disable_async_offload": False},
        {"disable_async_offload": True},
    ]
    pinned_options = [
        {"disable_pinned_memory": False},
        {"disable_pinned_memory": True},
    ]
    fast_options = [
        {"fast": set()},
        # {"fast": {PerformanceFeature.Fp16Accumulation}},
        # {"fast": {PerformanceFeature.Fp8MatrixMultiplication}},
        {"fast": {PerformanceFeature.CublasOps}},
    ]

    for attn, asnc, pinned, fst in itertools.product(attn_options, async_options, pinned_options, fast_options):
        config_update = {}
        config_update.update(attn)
        config_update.update(asnc)
        config_update.update(pinned)
        config_update.update(fst)
        yield config_update


@pytest.fixture(scope="function", autouse=False, params=_generate_config_params(), ids=lambda p: ",".join(f"{k}={v}" for k, v in p.items()))
async def client(tmp_path_factory, request) -> AsyncGenerator[Any, Any]:
    config = default_configuration()
    # this should help things go a little faster
    config.disable_all_custom_nodes = True
    config.update(request.param)
    # use ProcessPoolExecutor to respect various config settings
    with ProcessPoolExecutor(max_workers=1) as executor:
        async with Comfy(configuration=config, executor=executor) as client:
            yield client


def _prepare_for_workflows() -> dict[str, Traversable]:

    add_known_models("loras", HuggingFile("artificialguybr/pixelartredmond-1-5v-pixel-art-loras-for-sd-1-5", "PixelArtRedmond15V-PixelArt-PIXARFK.safetensors"))
    add_known_models("checkpoints", HuggingFile("autismanon/modeldump", "cardosAnime_v20.safetensors"))

    return {f.name: f for f in importlib.resources.files(workflows).iterdir() if f.is_file() and f.name.endswith(".json")}


@pytest.mark.asyncio
@pytest.mark.parametrize("workflow_name, workflow_file", _prepare_for_workflows().items())
async def test_workflow(workflow_name: str, workflow_file: Traversable, has_gpu: bool, client: Comfy):
    if not has_gpu:
        pytest.skip("requires gpu")

    if "compile" in workflow_name:
        pytest.skip("compilation has regressed in 0.4.0 and later because upcast weights are now permitted to be compiled, causing OOM errors in most cases")
        return

    workflow = json.loads(workflow_file.read_text(encoding="utf8"))

    prompt = Prompt.validate(workflow)
    # todo: add all the models we want to test a bit m2ore elegantly
    outputs = {}

    start_time = time.time()
    monitor = ResourceMonitor()
    try:
        with monitor:
            outputs = await client.queue_prompt(prompt)
    except TorchAudioNotFoundError:
        pytest.skip("requires torchaudio")
    finally:
        end_time = time.time()
        duration = end_time - start_time
        ram_gb = monitor.peak_cpu_ram / (1024**3)
        vram_gb = monitor.peak_gpu_vram / (1024**3)
        logger.info(f"Test {workflow_name} with client {client} took {duration:.4f}s | Peak RAM: {ram_gb:.2f} GB | Peak VRAM: {vram_gb:.2f} GB")

    if any(v.class_type == "SaveImage" for v in prompt.values()):
        save_image_node_id = next(key for key in prompt if prompt[key].class_type == "SaveImage")
        assert outputs[save_image_node_id]["images"][0]["abs_path"] is not None
    elif any(v.class_type == "SaveAudio" for v in prompt.values()):
        save_audio_node_id = next(key for key in prompt if prompt[key].class_type == "SaveAudio")
        assert outputs[save_audio_node_id]["audio"][0]["filename"] is not None
    elif any(v.class_type == "SaveAnimatedWEBP" for v in prompt.values()):
        save_video_node_id = next(key for key in prompt if prompt[key].class_type == "SaveAnimatedWEBP")
        assert outputs[save_video_node_id]["images"][0]["filename"] is not None
    elif any(v.class_type == "PreviewString" for v in prompt.values()):
        save_image_node_id = next(key for key in prompt if prompt[key].class_type == "PreviewString")
        output_str = outputs[save_image_node_id]["string"][0]
        assert output_str is not None
        assert len(output_str) > 0
    else:
        assert len(outputs) > 0
        logger.warning(f"test {workflow_name} did not have a node that could be checked for output")