diff --git a/.dockerignore b/.dockerignore
index e787aefe2..02f506a25 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,5 @@
 *
 !comfy*
 !pyproject.toml
-!README.md
\ No newline at end of file
+!README.md
+!pkg/*
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 1d3f03ccc..cc2404af2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,6 +10,7 @@ ENV UV_BREAK_SYSTEM_PACKAGES=1
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
 ENV PIP_NO_CACHE_DIR=1
 ENV DEBIAN_FRONTEND=noninteractive
+ENV UV_OVERRIDE=/workspace/overrides.txt
 
 ENV LANG=C.UTF-8
 ENV LC_ALL=C.UTF-8
@@ -17,20 +18,22 @@ ENV LC_ALL=C.UTF-8
 # mitigates
 # RuntimeError: Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
 # numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
-RUN pip freeze | grep numpy > numpy-override.txt
+RUN echo "onnxruntime-gpu==1.22.0" >> /workspace/overrides.txt; pip freeze | grep nvidia >> /workspace/overrides.txt; echo "torch==2.7.0a0+7c8ec84dab.nv25.3" >> /workspace/overrides.txt; pip freeze | grep numpy >> /workspace/overrides.txt; echo "opencv-python; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-contrib-python; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-python-headless; python_version < '0'" >> /workspace/overrides.txt; echo "opencv-contrib-python-headless!=4.11.0.86" >> /workspace/overrides.txt; echo "sentry-sdk; python_version < '0'" >> /workspace/overrides.txt
 
+# mitigates https://stackoverflow.com/questions/55313610/importerror-libgl-so-1-cannot-open-shared-object-file-no-such-file-or-directo
 # mitigates AttributeError: module 'cv2.dnn' has no attribute 'DictValue' \
 # see https://github.com/facebookresearch/nougat/issues/40
-RUN apt-get update && \
-    apt-get install --no-install-recommends -y ffmpeg libsm6 libxext6 && \
-    pip install uv && uv --version && \
-    apt-get purge -y && \
+RUN pip install uv && uv --version && \
+    apt-get update && apt-get install --no-install-recommends ffmpeg libsm6 libxext6 -y && \
+    uv pip uninstall --system $(pip list --format=freeze | grep opencv) && \
+    rm -rf /usr/local/lib/python3.12/dist-packages/cv2/ && \
+    uv pip install wheel && \
+    uv pip install --no-build-isolation "opencv-contrib-python-headless!=4.11.0.86" && \
     rm -rf /var/lib/apt/lists/*
 
-RUN uv pip uninstall --system $(pip list --format=freeze | grep opencv) && \
-    rm -rf /usr/local/lib/python3.12/dist-packages/cv2/ && \
-    uv pip install --no-build-isolation opencv-python-headless
-
+# install sageattention
+ADD pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl /workspace/pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl
+RUN uv pip install --no-deps --no-build-isolation spandrel>=0.3.4 timm>=1.0.19 tensorboard>=2.17.0 poetry flash-attn xformers==0.0.31.post1 file:./pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl
 # this exotic command will determine the correct torchaudio to install for the image
 RUN <<-EOF
 python -c 'import torch, re, subprocess
@@ -41,7 +44,7 @@ if not torch_ver_match:
 torch_ver = torch_ver_match.group(1)
 cuda_ver_tag = f"cu{torch.version.cuda.replace(".", "")}"
 command = [
-    "uv", "pip", "install", "--no-deps", "--overrides=numpy-override.txt",
+    "uv", "pip", "install", "--no-deps",
     f"torchaudio=={torch_ver}+{cuda_ver_tag}",
     "--extra-index-url", f"https://download.pytorch.org/whl/{cuda_ver_tag}",
 ]
@@ -50,17 +53,18 @@ EOF
 
 # sources for building this dockerfile
 # use these lines to build from the local fs
-# ADD . /src
-# ARG SOURCES=/src
+ADD . /workspace/src
+ARG SOURCES="comfyui[attention,comfyui_manager]@./src"
 # this builds from github
-ARG SOURCES="comfyui[attention,comfyui_manager]@git+https://github.com/hiddenswitch/ComfyUI.git"
+#ARG SOURCES="comfyui[attention,comfyui_manager]@git+https://github.com/hiddenswitch/ComfyUI.git"
 ENV SOURCES=$SOURCES
 
-RUN uv pip install --overrides=numpy-override.txt $SOURCES
+RUN uv pip install $SOURCES
 
 WORKDIR /workspace
 # addresses https://github.com/pytorch/pytorch/issues/104801
 # and issues reported by importing nodes_canny
+RUN python -c "import torch; import xformers; import sageattention; import cv2"
 RUN comfyui --quick-test-for-ci --cpu --cwd /workspace
 
 EXPOSE 8188
diff --git a/comfy/app/logger.py b/comfy/app/logger.py
index 095b174b9..5bd6bd781 100644
--- a/comfy/app/logger.py
+++ b/comfy/app/logger.py
@@ -27,7 +27,7 @@ class LogInterceptor(io.TextIOWrapper):
 
             # Simple handling for cr to overwrite the last output if it isnt a full line
             # else logs just get full of progress messages
-            if isinstance(data, str) and data.startswith("\r") and not logs[-1]["m"].endswith("\n"):
+            if isinstance(data, str) and data.startswith("\r") and len(logs) > 0 and not logs[-1]["m"].endswith("\n"):
                 logs.pop()
             logs.append(entry)
         super().write(data)
diff --git a/comfy/distributed/distributed_progress.py b/comfy/distributed/distributed_progress.py
index 692714ed7..62c648611 100644
--- a/comfy/distributed/distributed_progress.py
+++ b/comfy/distributed/distributed_progress.py
@@ -2,25 +2,35 @@ from __future__ import annotations
 
 import asyncio
 import base64
+import pickle
 from asyncio import AbstractEventLoop
-from enum import Enum
 from functools import partial
-from typing import Optional, Dict, Any, Union
+from typing import Optional, Dict, Any, TypeVar, NewType
 
+from aio_pika import DeliveryMode
 from aio_pika.patterns import RPC
 
 from ..component_model.executor_types import SendSyncEvent, SendSyncData, ExecutorToClientProgress, \
-    UnencodedPreviewImageMessage, StatusMessage, QueueInfo, ExecInfo
-from ..component_model.queue_types import BinaryEventTypes
+    StatusMessage, QueueInfo, ExecInfo
+
+T = TypeVar('T')
+Base64Pickled = NewType('Base64Pickled', str)
 
 
-async def _progress(event: SendSyncEvent, data: SendSyncData, user_id: Optional[str] = None,
+def obj2base64(obj: T) -> Base64Pickled:
+    return Base64Pickled(base64.b64encode(pickle.dumps(obj)).decode())
+
+
+def base642obj(data: Base64Pickled) -> T:
+    return pickle.loads(base64.b64decode(data))
+
+
+async def _progress(event: Base64Pickled, data: Base64Pickled, user_id: Optional[str] = None,
                     caller_server: Optional[ExecutorToClientProgress] = None) -> None:
     assert caller_server is not None
     assert user_id is not None
-    if event == BinaryEventTypes.PREVIEW_IMAGE or event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE or isinstance(data, str):
-        data: bytes = base64.b64decode(data)
-    caller_server.send_sync(event, data, sid=user_id)
+
+    caller_server.send_sync(base642obj(event), base642obj(data), sid=user_id)
 
 
 def _get_name(queue_name: str, user_id: str) -> str:
@@ -43,27 +53,10 @@ class DistributedExecutorToClientProgress(ExecutorToClientProgress):
         return True
 
     async def send(self, event: SendSyncEvent, data: SendSyncData, user_id: Optional[str]) -> None:
-        if event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE:
-            from ..cmd.latent_preview_image_encoding import encode_preview_image
-
-            # encode preview image
-            event = BinaryEventTypes.PREVIEW_IMAGE.value
-            data: UnencodedPreviewImageMessage
-            format, pil_image, max_size, node_id, task_id = data
-            data: bytes = encode_preview_image(pil_image, format, max_size, node_id, task_id)
-
-        if isinstance(data, bytes) or isinstance(data, bytearray):
-            if isinstance(event, Enum):
-                event: int = event.value
-            data: str = base64.b64encode(data).decode()
-
-        if user_id is None:
-            # todo: user_id should never be none here
-            return
-
+        assert user_id is not None, f"event={event} data={data}"
         try:
             # we don't need to await this coroutine
-            _ = asyncio.create_task(self._rpc.call(_get_name(self._queue_name, user_id), {"event": event, "data": data}, expiration=1000))
+            _ = asyncio.create_task(self._rpc.call(_get_name(self._queue_name, user_id), {"event": obj2base64(event), "data": obj2base64(data)}, expiration=1000, delivery_mode=DeliveryMode.NOT_PERSISTENT))
         except asyncio.TimeoutError:
             # these can gracefully expire
             pass
diff --git a/docker-compose.yml b/docker-compose.yml
index 37fd9038f..55dcbb6e2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,7 +5,14 @@ services:
       context: .
       dockerfile: Dockerfile
     volumes:
+      # USING DOCKER MANAGED VOLUMES
       - workspace_data:/workspace
+      # OR: USE LOCAL DIRECTORIES
+      # Comment out the `- workspace_data...` line, then uncomment:
+      # - models:/workspace/models
+      # - custom_nodes:/workspace/custom_nodes
+      # - output:/workspace/output
+      # - input:/workspace/input
     deploy:
       replicas: 1
       resources:
@@ -16,7 +23,6 @@ services:
               capabilities: [ gpu ]
     environment:
       - COMFYUI_DISTRIBUTED_QUEUE_CONNECTION_URI=amqp://guest:guest@rabbitmq:5672
-      - COMFYUI_EXECUTOR_FACTORY=ProcessPoolExecutor
       - COMFYUI_PANIC_WHEN=torch.cuda.OutOfMemoryError
       - COMFYUI_LOGGING_LEVEL=ERROR
     command:
@@ -25,7 +31,7 @@ services:
       test: curl -f http://localhost:9090/health
       interval: 10s
       timeout: 5s
-      retries: 3
+      retries: 1
       start_period: 10s
     restart: unless-stopped
   frontend:
@@ -35,7 +41,14 @@ services:
     deploy:
       replicas: 1
     volumes:
+      # USING DOCKER MANAGED VOLUMES
       - workspace_data:/workspace
+      # OR: USE LOCAL DIRECTORIES
+      # Comment out the `- workspace_data...` line, then uncomment:
+      # - models:/workspace/models
+      # - custom_nodes:/workspace/custom_nodes
+      # - output:/workspace/output
+      # - input:/workspace/input
     environment:
       - COMFYUI_DISTRIBUTED_QUEUE_CONNECTION_URI=amqp://guest:guest@rabbitmq:5672
       - COMFYUI_DISTRIBUTED_QUEUE_FRONTEND=1
@@ -54,5 +67,34 @@ services:
     restart: unless-stopped
   rabbitmq:
     image: rabbitmq:3
+    command: >
+      sh -c "echo 'log.default.level = error' > /etc/rabbitmq/rabbitmq.conf && 
+             docker-entrypoint.sh rabbitmq-server"
 volumes:
-  workspace_data: {}
\ No newline at end of file
+  # USING DOCKER MANAGED VOLUMES
+  workspace_data: {}
+  # OR: USE LOCAL DIRECTORIES
+  # models:
+  #   driver: local
+  #   driver_opts:
+  #     type: 'none'
+  #     o: 'bind'
+  #     device: './models'
+  # custom_nodes:
+  #   driver: local
+  #   driver_opts:
+  #     type: 'none'
+  #     o: 'bind'
+  #     device: './custom_nodes'
+  # output:
+  #   driver: local
+  #   driver_opts:
+  #     type: 'none'
+  #     o: 'bind'
+  #     device: './output'
+  # input:
+  #   driver: local
+  #   driver_opts:
+  #     type: 'none'
+  #     o: 'bind'
+  #     device: './input'
\ No newline at end of file
diff --git a/pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl b/pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl
new file mode 100644
index 000000000..ea9febea9
Binary files /dev/null and b/pkg/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl differ