Fix distributed previews, add sageattention to the docker container

This commit is contained in:
doctorpangloss 2025-09-12 11:48:25 -07:00
parent 421c9b88ae
commit 7cd6383110
6 changed files with 86 additions and 46 deletions

View File

@ -1,4 +1,5 @@
*
!comfy*
!pyproject.toml
!README.md
!README.md
!pkg/*

View File

@ -10,6 +10,7 @@ ENV UV_BREAK_SYSTEM_PACKAGES=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV PIP_NO_CACHE_DIR=1
ENV DEBIAN_FRONTEND=noninteractive
ENV UV_OVERRIDE=/workspace/overrides.txt
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
@ -17,20 +18,22 @@ ENV LC_ALL=C.UTF-8
# mitigates
# RuntimeError: Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
# numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
RUN pip freeze | grep numpy > numpy-override.txt
RUN echo "onnxruntime-gpu==1.22.0" >> /workspace/overrides.txt; pip freeze | grep nvidia >> /workspace/overrides.txt; echo "torch==2.7.0a0+7c8ec84dab.nv25.3" >> /workspace/overrides.txt; pip freeze | grep numpy >> /workspace/overrides.txt; echo "opencv-python; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-contrib-python; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-python-headless; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-contrib-python-headless!=4.11.0.86" >> /workspace/overrides.txt; echo "sentry-sdk; python_version < '0'" >> /workspace/overrides.txt
# mitigates https://stackoverflow.com/questions/55313610/importerror-libgl-so-1-cannot-open-shared-object-file-no-such-file-or-directo
# mitigates AttributeError: module 'cv2.dnn' has no attribute 'DictValue' \
# see https://github.com/facebookresearch/nougat/issues/40
RUN apt-get update && \
apt-get install --no-install-recommends -y ffmpeg libsm6 libxext6 && \
pip install uv && uv --version && \
apt-get purge -y && \
RUN pip install uv && uv --version && \
apt-get update && apt-get install --no-install-recommends ffmpeg libsm6 libxext6 -y && \
uv pip uninstall --system $(pip list --format=freeze | grep opencv) && \
rm -rf /usr/local/lib/python3.12/dist-packages/cv2/ && \
uv pip install wheel && \
uv pip install --no-build-isolation "opencv-contrib-python-headless!=4.11.0.86" && \
rm -rf /var/lib/apt/lists/*
RUN uv pip uninstall --system $(pip list --format=freeze | grep opencv) && \
rm -rf /usr/local/lib/python3.12/dist-packages/cv2/ && \
uv pip install --no-build-isolation opencv-python-headless
# install sageattention
ADD pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl /workspace/pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl
RUN uv pip install --no-deps --no-build-isolation spandrel>=0.3.4 timm>=1.0.19 tensorboard>=2.17.0 poetry flash-attn xformers==0.0.31.post1 file:./pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl
# this exotic command will determine the correct torchaudio to install for the image
RUN <<-EOF
python -c 'import torch, re, subprocess
@ -41,7 +44,7 @@ if not torch_ver_match:
torch_ver = torch_ver_match.group(1)
cuda_ver_tag = f"cu{torch.version.cuda.replace(".", "")}"
command = [
"uv", "pip", "install", "--no-deps", "--overrides=numpy-override.txt",
"uv", "pip", "install", "--no-deps",
f"torchaudio=={torch_ver}+{cuda_ver_tag}",
"--extra-index-url", f"https://download.pytorch.org/whl/{cuda_ver_tag}",
]
@ -50,17 +53,18 @@ EOF
# sources for building this dockerfile
# use these lines to build from the local fs
# ADD . /src
# ARG SOURCES=/src
ADD . /workspace/src
ARG SOURCES="comfyui[attention,comfyui_manager]@./src"
# this builds from github
ARG SOURCES="comfyui[attention,comfyui_manager]@git+https://github.com/hiddenswitch/ComfyUI.git"
#ARG SOURCES="comfyui[attention,comfyui_manager]@git+https://github.com/hiddenswitch/ComfyUI.git"
ENV SOURCES=$SOURCES
RUN uv pip install --overrides=numpy-override.txt $SOURCES
RUN uv pip install $SOURCES
WORKDIR /workspace
# addresses https://github.com/pytorch/pytorch/issues/104801
# and issues reported by importing nodes_canny
RUN python -c "import torch; import xformers; import sageattention; import cv2"
RUN comfyui --quick-test-for-ci --cpu --cwd /workspace
EXPOSE 8188

View File

@ -27,7 +27,7 @@ class LogInterceptor(io.TextIOWrapper):
# Simple handling for cr to overwrite the last output if it isnt a full line
# else logs just get full of progress messages
if isinstance(data, str) and data.startswith("\r") and not logs[-1]["m"].endswith("\n"):
if isinstance(data, str) and data.startswith("\r") and len(logs) > 0 and not logs[-1]["m"].endswith("\n"):
logs.pop()
logs.append(entry)
super().write(data)

View File

@ -2,25 +2,35 @@ from __future__ import annotations
import asyncio
import base64
import pickle
from asyncio import AbstractEventLoop
from enum import Enum
from functools import partial
from typing import Optional, Dict, Any, Union
from typing import Optional, Dict, Any, TypeVar, NewType
from aio_pika import DeliveryMode
from aio_pika.patterns import RPC
from ..component_model.executor_types import SendSyncEvent, SendSyncData, ExecutorToClientProgress, \
UnencodedPreviewImageMessage, StatusMessage, QueueInfo, ExecInfo
from ..component_model.queue_types import BinaryEventTypes
StatusMessage, QueueInfo, ExecInfo
T = TypeVar('T')
Base64Pickled = NewType('Base64Pickled', str)
async def _progress(event: SendSyncEvent, data: SendSyncData, user_id: Optional[str] = None,
def obj2base64(obj: T) -> Base64Pickled:
return Base64Pickled(base64.b64encode(pickle.dumps(obj)).decode())
def base642obj(data: Base64Pickled) -> T:
return pickle.loads(base64.b64decode(data))
async def _progress(event: Base64Pickled, data: Base64Pickled, user_id: Optional[str] = None,
caller_server: Optional[ExecutorToClientProgress] = None) -> None:
assert caller_server is not None
assert user_id is not None
if event == BinaryEventTypes.PREVIEW_IMAGE or event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE or isinstance(data, str):
data: bytes = base64.b64decode(data)
caller_server.send_sync(event, data, sid=user_id)
caller_server.send_sync(base642obj(event), base642obj(data), sid=user_id)
def _get_name(queue_name: str, user_id: str) -> str:
@ -43,27 +53,10 @@ class DistributedExecutorToClientProgress(ExecutorToClientProgress):
return True
async def send(self, event: SendSyncEvent, data: SendSyncData, user_id: Optional[str]) -> None:
if event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE:
from ..cmd.latent_preview_image_encoding import encode_preview_image
# encode preview image
event = BinaryEventTypes.PREVIEW_IMAGE.value
data: UnencodedPreviewImageMessage
format, pil_image, max_size, node_id, task_id = data
data: bytes = encode_preview_image(pil_image, format, max_size, node_id, task_id)
if isinstance(data, bytes) or isinstance(data, bytearray):
if isinstance(event, Enum):
event: int = event.value
data: str = base64.b64encode(data).decode()
if user_id is None:
# todo: user_id should never be none here
return
assert user_id is not None, f"event={event} data={data}"
try:
# we don't need to await this coroutine
_ = asyncio.create_task(self._rpc.call(_get_name(self._queue_name, user_id), {"event": event, "data": data}, expiration=1000))
_ = asyncio.create_task(self._rpc.call(_get_name(self._queue_name, user_id), {"event": obj2base64(event), "data": obj2base64(data)}, expiration=1000, delivery_mode=DeliveryMode.NOT_PERSISTENT))
except asyncio.TimeoutError:
# these can gracefully expire
pass

View File

@ -5,7 +5,14 @@ services:
context: .
dockerfile: Dockerfile
volumes:
# USING DOCKER MANAGED VOLUMES
- workspace_data:/workspace
# OR: USE LOCAL DIRECTORIES
# Comment out the `- workspace_data...` line, then uncomment:
# - models:/workspace/models
# - custom_nodes:/workspace/custom_nodes
# - output:/workspace/output
# - input:/workspace/input
deploy:
replicas: 1
resources:
@ -16,7 +23,6 @@ services:
capabilities: [ gpu ]
environment:
- COMFYUI_DISTRIBUTED_QUEUE_CONNECTION_URI=amqp://guest:guest@rabbitmq:5672
- COMFYUI_EXECUTOR_FACTORY=ProcessPoolExecutor
- COMFYUI_PANIC_WHEN=torch.cuda.OutOfMemoryError
- COMFYUI_LOGGING_LEVEL=ERROR
command:
@ -25,7 +31,7 @@ services:
test: curl -f http://localhost:9090/health
interval: 10s
timeout: 5s
retries: 3
retries: 1
start_period: 10s
restart: unless-stopped
frontend:
@ -35,7 +41,14 @@ services:
deploy:
replicas: 1
volumes:
# USING DOCKER MANAGED VOLUMES
- workspace_data:/workspace
# OR: USE LOCAL DIRECTORIES
# Comment out the `- workspace_data...` line, then uncomment:
# - models:/workspace/models
# - custom_nodes:/workspace/custom_nodes
# - output:/workspace/output
# - input:/workspace/input
environment:
- COMFYUI_DISTRIBUTED_QUEUE_CONNECTION_URI=amqp://guest:guest@rabbitmq:5672
- COMFYUI_DISTRIBUTED_QUEUE_FRONTEND=1
@ -54,5 +67,34 @@ services:
restart: unless-stopped
rabbitmq:
image: rabbitmq:3
command: >
sh -c "echo 'log.default.level = error' > /etc/rabbitmq/rabbitmq.conf &&
docker-entrypoint.sh rabbitmq-server"
volumes:
workspace_data: {}
# USING DOCKER MANAGED VOLUMES
workspace_data: {}
# OR: USE LOCAL DIRECTORIES
# models:
# driver: local
# driver_opts:
# type: 'none'
# o: 'bind'
# device: './models'
# custom_nodes:
# driver: local
# driver_opts:
# type: 'none'
# o: 'bind'
# device: './custom_nodes'
# output:
# driver: local
# driver_opts:
# type: 'none'
# o: 'bind'
# device: './output'
# input:
# driver: local
# driver_opts:
# type: 'none'
# o: 'bind'
# device: './input'

Binary file not shown.