mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-16 22:58:19 +08:00
Compare commits
14 Commits
ea3e083a5f
...
2962184920
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2962184920 | ||
|
|
593be209a4 | ||
|
|
3814bf4454 | ||
|
|
d062becb33 | ||
|
|
e84a200a3c | ||
|
|
192cb8eeb9 | ||
|
|
0904cc3fe5 | ||
|
|
4941cd046e | ||
|
|
c711b8f437 | ||
|
|
1c5db7397d | ||
|
|
e0982a7174 | ||
|
|
4c4be1bba5 | ||
|
|
dac0710c88 | ||
|
|
33f01eb0df |
8
.dockerignore
Normal file
8
.dockerignore
Normal file
@ -0,0 +1,8 @@
|
||||
.*
|
||||
!.git
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
input
|
||||
models
|
||||
notebooks
|
||||
output
|
||||
101
.github/workflows/docker.yml
vendored
Normal file
101
.github/workflows/docker.yml
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
name: Build and publish Docker images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- id: cu118
|
||||
name: CUDA 11.8
|
||||
pytorch_install_args: "--index-url https://download.pytorch.org/whl/cu118"
|
||||
- id: cu121
|
||||
name: CUDA 12.1
|
||||
pytorch_install_args: "--index-url https://download.pytorch.org/whl/cu121"
|
||||
- id: cu124
|
||||
name: CUDA 12.4
|
||||
pytorch_install_args: "--index-url https://download.pytorch.org/whl/cu124"
|
||||
- id: rocm6.2
|
||||
name: ROCm 6.2
|
||||
pytorch_install_args: "--index-url https://download.pytorch.org/whl/rocm6.2"
|
||||
- id: cpu
|
||||
name: CPU only
|
||||
pytorch_install_args: "--index-url https://download.pytorch.org/whl/cpu"
|
||||
extra_args: --cpu
|
||||
|
||||
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Check which repositories to use
|
||||
id: repositories
|
||||
run: |
|
||||
echo "GHCR_IMAGE_NAME=ghcr.io/${GITHUB_REPOSITORY_OWNER}/comfyui" >> "$GITHUB_ENV"
|
||||
if [[ -n "${DOCKERHUB_USERNAME}" ]]; then
|
||||
echo "DOCKERHUB_IMAGE_NAME=${DOCKERHUB_USERNAME}/comfyui" >> "$GITHUB_ENV"
|
||||
else
|
||||
echo "DOCKERHUB_IMAGE_NAME=" >> "$GITHUB_ENV"
|
||||
echo "No Docker Hub username set, only deploying to GitHub Container Repository"
|
||||
fi
|
||||
env:
|
||||
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
GITHUB_REPOSITORY_OWNER: ${{ github.repository_owner }}
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
# list of Docker images to use as base name for tags
|
||||
images: |
|
||||
${{ env.DOCKERHUB_IMAGE_NAME }}
|
||||
${{ env.GHCR_IMAGE_NAME }}
|
||||
flavor: |
|
||||
suffix=-${{ matrix.id }},onlatest=true
|
||||
# generate Docker tags based on the following events/attributes
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
type=sha
|
||||
# set latest tag for default branch
|
||||
type=raw,value=latest,enable=${{ github.event_name == 'release' }}
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to Docker Hub
|
||||
if: github.event_name != 'pull_request' && env.DOCKERHUB_IMAGE_NAME != ''
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Login to GHCR
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: |
|
||||
PYTORCH_INSTALL_ARGS=${{ matrix.pytorch_install_args }}
|
||||
EXTRA_ARGS=${{ matrix.extra_args }}
|
||||
cache-from: type=gha,scope=${{ github.ref_name }}-${{ matrix.id }}
|
||||
cache-to: type=gha,mode=max,scope=${{ github.ref_name }}-${{ matrix.id }}
|
||||
79
Dockerfile
Normal file
79
Dockerfile
Normal file
@ -0,0 +1,79 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
ARG PYTHON_VERSION=3.12
|
||||
|
||||
FROM python:${PYTHON_VERSION}-slim
|
||||
|
||||
ARG PYTORCH_INSTALL_ARGS=""
|
||||
ARG EXTRA_ARGS=""
|
||||
ARG USERNAME=comfyui
|
||||
ARG USER_UID=1000
|
||||
ARG USER_GID=${USER_UID}
|
||||
|
||||
# Fail fast on errors or unset variables
|
||||
SHELL ["/bin/bash", "-eux", "-o", "pipefail", "-c"]
|
||||
|
||||
RUN <<EOF
|
||||
groupadd --gid ${USER_GID} ${USERNAME}
|
||||
useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME}
|
||||
EOF
|
||||
|
||||
RUN <<EOF
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
git-lfs \
|
||||
rsync \
|
||||
fonts-recommended
|
||||
EOF
|
||||
|
||||
# run instructions as user
|
||||
USER ${USER_UID}:${USER_GID}
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV XDG_CACHE_HOME=/cache
|
||||
ENV PIP_CACHE_DIR=/cache/pip
|
||||
ENV VIRTUAL_ENV=/app/venv
|
||||
ENV VIRTUAL_ENV_CUSTOM=/app/custom_venv
|
||||
|
||||
# create cache directory. During build we will use a cache mount,
|
||||
# but later this is useful for custom node installs
|
||||
RUN --mount=type=cache,target=/cache/,uid=${USER_UID},gid=${USER_GID} \
|
||||
mkdir -p ${PIP_CACHE_DIR}
|
||||
|
||||
# create virtual environment to manage packages
|
||||
RUN python -m venv ${VIRTUAL_ENV}
|
||||
|
||||
# run python from venv (prefer custom_venv over baked-in one)
|
||||
ENV PATH="${VIRTUAL_ENV_CUSTOM}/bin:${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
RUN --mount=type=cache,target=/cache/,uid=${USER_UID},gid=${USER_GID} \
|
||||
pip install torch torchvision torchaudio ${PYTORCH_INSTALL_ARGS}
|
||||
|
||||
# copy requirements files first so packages can be cached separately
|
||||
COPY --chown=${USER_UID}:${USER_GID} requirements.txt .
|
||||
RUN --mount=type=cache,target=/cache/,uid=${USER_UID},gid=${USER_GID} \
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Not strictly required for comfyui, but prevents non-working variants of
|
||||
# cv2 being pulled in by custom nodes
|
||||
RUN --mount=type=cache,target=/cache/,uid=${USER_UID},gid=${USER_GID} \
|
||||
pip install opencv-python-headless
|
||||
|
||||
COPY --chown=${USER_UID}:${USER_GID} . .
|
||||
|
||||
COPY --chown=nobody:${USER_GID} .git .git
|
||||
|
||||
# default environment variables
|
||||
ENV COMFYUI_ADDRESS=0.0.0.0
|
||||
ENV COMFYUI_PORT=8188
|
||||
ENV COMFYUI_EXTRA_BUILD_ARGS="${EXTRA_ARGS}"
|
||||
ENV COMFYUI_EXTRA_ARGS=""
|
||||
# default start command
|
||||
CMD \
|
||||
if [ -d "${VIRTUAL_ENV_CUSTOM}" ]; then \
|
||||
rsync -aP "${VIRTUAL_ENV}/" "${VIRTUAL_ENV_CUSTOM}/" ;\
|
||||
sed -i "s!${VIRTUAL_ENV}!${VIRTUAL_ENV_CUSTOM}!g" "${VIRTUAL_ENV_CUSTOM}/pyvenv.cfg" ;\
|
||||
fi ;\
|
||||
python -u main.py --listen "${COMFYUI_ADDRESS}" --port "${COMFYUI_PORT}" ${COMFYUI_EXTRA_BUILD_ARGS} ${COMFYUI_EXTRA_ARGS}
|
||||
39
README.md
39
README.md
@ -293,6 +293,45 @@ Install the dependencies by opening your terminal inside the ComfyUI folder and:
|
||||
|
||||
After this you should have everything installed and can proceed to running ComfyUI.
|
||||
|
||||
|
||||
## Docker
|
||||
|
||||
There are prebuilt docker images for AMD and NVIDIA GPUs on [GitHub Packages](https://ghcr.io/comfyanonymous/comfyui).
|
||||
|
||||
You can pull them to your local docker registry with:
|
||||
|
||||
```shell
|
||||
# For NVIDIA GPUs
|
||||
docker pull ghcr.io/comfyanonymous/comfyui:latest-cu124
|
||||
# For AMD GPUs
|
||||
docker pull ghcr.io/comfyanonymous/comfyui:latest-rocm6.2
|
||||
|
||||
# For CPU only
|
||||
docker pull ghcr.io/comfyanonymous/comfyui:latest-cpu
|
||||
```
|
||||
|
||||
### Building images manually
|
||||
|
||||
You can build a docker image with the `Dockerfile` in this repo.
|
||||
Specify, `PYTORCH_INSTALL_ARGS` build arg with one of the PyTorch commands above to build for AMD or NVIDIA GPUs.
|
||||
```shell
|
||||
docker build --build-arg PYTORCH_INSTALL_ARGS="--index-url https://download.pytorch.org/whl/cu122" .
|
||||
```
|
||||
```shell
|
||||
docker build --build-arg PYTORCH_INSTALL_ARGS="--index-url https://download.pytorch.org/whl/rocm6.2" .
|
||||
```
|
||||
The `Dockerfile` requires BuildKit to be enabled. If your Docker does not support the buildx command, you can
|
||||
enable BuildKit by setting the `DOCKER_BUILDKIT` environment variable.
|
||||
```shell
|
||||
DOCKER_BUILDKIT=1 docker build --build-arg PYTORCH_INSTALL_ARGS="--index-url https://download.pytorch.org/whl/cu124" .
|
||||
```
|
||||
> [!NOTE]
|
||||
> For building the CPU-only image, it is recommended that you add the --cpu flag to the EXTRA_ARGS build arg:
|
||||
>
|
||||
> ```shell
|
||||
> docker build --build-arg PYTORCH_INSTALL_ARGS="--index-url https://download.pytorch.org/whl/cpu" --build-arg EXTRA_ARGS=--cpu .
|
||||
> ```
|
||||
|
||||
### Others:
|
||||
|
||||
#### Apple Mac silicon
|
||||
|
||||
@ -83,6 +83,8 @@ fpte_group.add_argument("--fp16-text-enc", action="store_true", help="Store text
|
||||
fpte_group.add_argument("--fp32-text-enc", action="store_true", help="Store text encoder weights in fp32.")
|
||||
fpte_group.add_argument("--bf16-text-enc", action="store_true", help="Store text encoder weights in bf16.")
|
||||
|
||||
parser.add_argument("--fp16-intermediates", action="store_true", help="Experimental: Use fp16 for intermediate tensors between nodes instead of fp32.")
|
||||
|
||||
parser.add_argument("--force-channels-last", action="store_true", help="Force channels last format when inferencing the models.")
|
||||
|
||||
parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
|
||||
|
||||
@ -209,3 +209,39 @@ def stochastic_round_quantize_nvfp4_by_block(x, per_tensor_scale, pad_16x, seed=
|
||||
output_block[i:i + slice_size].copy_(block)
|
||||
|
||||
return output_fp4, to_blocked(output_block, flatten=False)
|
||||
|
||||
|
||||
def stochastic_round_quantize_mxfp8_by_block(x, pad_32x, seed=0):
|
||||
def roundup(x_val, multiple):
|
||||
return ((x_val + multiple - 1) // multiple) * multiple
|
||||
|
||||
if pad_32x:
|
||||
rows, cols = x.shape
|
||||
padded_rows = roundup(rows, 32)
|
||||
padded_cols = roundup(cols, 32)
|
||||
if padded_rows != rows or padded_cols != cols:
|
||||
x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows))
|
||||
|
||||
F8_E4M3_MAX = 448.0
|
||||
E8M0_BIAS = 127
|
||||
BLOCK_SIZE = 32
|
||||
|
||||
rows, cols = x.shape
|
||||
x_blocked = x.reshape(rows, -1, BLOCK_SIZE)
|
||||
max_abs = torch.amax(torch.abs(x_blocked), dim=-1)
|
||||
|
||||
# E8M0 block scales (power-of-2 exponents)
|
||||
scale_needed = torch.clamp(max_abs.float() / F8_E4M3_MAX, min=2**(-127))
|
||||
exp_biased = torch.clamp(torch.ceil(torch.log2(scale_needed)).to(torch.int32) + E8M0_BIAS, 0, 254)
|
||||
block_scales_e8m0 = exp_biased.to(torch.uint8)
|
||||
|
||||
zero_mask = (max_abs == 0)
|
||||
block_scales_f32 = (block_scales_e8m0.to(torch.int32) << 23).view(torch.float32)
|
||||
block_scales_f32 = torch.where(zero_mask, torch.ones_like(block_scales_f32), block_scales_f32)
|
||||
|
||||
# Scale per-block then stochastic round
|
||||
data_scaled = (x_blocked.float() / block_scales_f32.unsqueeze(-1)).reshape(rows, cols)
|
||||
output_fp8 = stochastic_rounding(data_scaled, torch.float8_e4m3fn, seed=seed)
|
||||
|
||||
block_scales_e8m0 = torch.where(zero_mask, torch.zeros_like(block_scales_e8m0), block_scales_e8m0)
|
||||
return output_fp8, to_blocked(block_scales_e8m0, flatten=False).view(torch.float8_e8m0fnu)
|
||||
|
||||
@ -11,6 +11,7 @@ from .causal_conv3d import CausalConv3d
|
||||
from .pixel_norm import PixelNorm
|
||||
from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
|
||||
import comfy.ops
|
||||
import comfy.model_management
|
||||
from comfy.ldm.modules.diffusionmodules.model import torch_cat_if_needed
|
||||
|
||||
ops = comfy.ops.disable_weight_init
|
||||
@ -536,7 +537,7 @@ class Decoder(nn.Module):
|
||||
mark_conv3d_ended(self.conv_out)
|
||||
sample = self.conv_out(sample, causal=self.causal)
|
||||
if sample is not None and sample.shape[2] > 0:
|
||||
output.append(sample)
|
||||
output.append(sample.to(comfy.model_management.intermediate_device()))
|
||||
return
|
||||
|
||||
up_block = self.up_blocks[idx]
|
||||
|
||||
@ -400,7 +400,7 @@ try:
|
||||
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
|
||||
if aotriton_supported(arch): # AMD efficient attention implementation depends on aotriton.
|
||||
if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much
|
||||
if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950
|
||||
if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1150", "gfx1151"]): # TODO: more arches, TODO: gfx950
|
||||
ENABLE_PYTORCH_ATTENTION = True
|
||||
if rocm_version >= (7, 0):
|
||||
if any((a in arch) for a in ["gfx1200", "gfx1201"]):
|
||||
@ -1050,6 +1050,12 @@ def intermediate_device():
|
||||
else:
|
||||
return torch.device("cpu")
|
||||
|
||||
def intermediate_dtype():
|
||||
if args.fp16_intermediates:
|
||||
return torch.float16
|
||||
else:
|
||||
return torch.float32
|
||||
|
||||
def vae_device():
|
||||
if args.cpu_vae:
|
||||
return torch.device("cpu")
|
||||
@ -1712,6 +1718,19 @@ def supports_nvfp4_compute(device=None):
|
||||
|
||||
return True
|
||||
|
||||
def supports_mxfp8_compute(device=None):
|
||||
if not is_nvidia():
|
||||
return False
|
||||
|
||||
if torch_version_numeric < (2, 10):
|
||||
return False
|
||||
|
||||
props = torch.cuda.get_device_properties(device)
|
||||
if props.major < 10:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def extended_fp16_support():
|
||||
# TODO: check why some models work with fp16 on newer torch versions but not on older
|
||||
if torch_version_numeric < (2, 7):
|
||||
|
||||
37
comfy/ops.py
37
comfy/ops.py
@ -336,7 +336,10 @@ class disable_weight_init:
|
||||
class Linear(torch.nn.Linear, CastWeightBiasOp):
|
||||
|
||||
def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
|
||||
if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
|
||||
# don't trust subclasses that BYO state dict loader to call us.
|
||||
if (not comfy.model_management.WINDOWS
|
||||
or not comfy.memory_management.aimdo_enabled
|
||||
or type(self)._load_from_state_dict is not disable_weight_init.Linear._load_from_state_dict):
|
||||
super().__init__(in_features, out_features, bias, device, dtype)
|
||||
return
|
||||
|
||||
@ -357,7 +360,9 @@ class disable_weight_init:
|
||||
def _load_from_state_dict(self, state_dict, prefix, local_metadata,
|
||||
strict, missing_keys, unexpected_keys, error_msgs):
|
||||
|
||||
if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
|
||||
if (not comfy.model_management.WINDOWS
|
||||
or not comfy.memory_management.aimdo_enabled
|
||||
or type(self)._load_from_state_dict is not disable_weight_init.Linear._load_from_state_dict):
|
||||
return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
|
||||
missing_keys, unexpected_keys, error_msgs)
|
||||
disable_weight_init._lazy_load_from_state_dict(
|
||||
@ -564,7 +569,10 @@ class disable_weight_init:
|
||||
def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None,
|
||||
norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None,
|
||||
_freeze=False, device=None, dtype=None):
|
||||
if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
|
||||
# don't trust subclasses that BYO state dict loader to call us.
|
||||
if (not comfy.model_management.WINDOWS
|
||||
or not comfy.memory_management.aimdo_enabled
|
||||
or type(self)._load_from_state_dict is not disable_weight_init.Embedding._load_from_state_dict):
|
||||
super().__init__(num_embeddings, embedding_dim, padding_idx, max_norm,
|
||||
norm_type, scale_grad_by_freq, sparse, _weight,
|
||||
_freeze, device, dtype)
|
||||
@ -590,7 +598,9 @@ class disable_weight_init:
|
||||
def _load_from_state_dict(self, state_dict, prefix, local_metadata,
|
||||
strict, missing_keys, unexpected_keys, error_msgs):
|
||||
|
||||
if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
|
||||
if (not comfy.model_management.WINDOWS
|
||||
or not comfy.memory_management.aimdo_enabled
|
||||
or type(self)._load_from_state_dict is not disable_weight_init.Embedding._load_from_state_dict):
|
||||
return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
|
||||
missing_keys, unexpected_keys, error_msgs)
|
||||
disable_weight_init._lazy_load_from_state_dict(
|
||||
@ -857,6 +867,22 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
||||
orig_shape=(self.out_features, self.in_features),
|
||||
)
|
||||
|
||||
elif self.quant_format == "mxfp8":
|
||||
# MXFP8: E8M0 block scales stored as uint8 in safetensors
|
||||
block_scale = self._load_scale_param(state_dict, prefix, "weight_scale", device, manually_loaded_keys,
|
||||
dtype=torch.uint8)
|
||||
|
||||
if block_scale is None:
|
||||
raise ValueError(f"Missing MXFP8 block scales for layer {layer_name}")
|
||||
|
||||
block_scale = block_scale.view(torch.float8_e8m0fnu)
|
||||
|
||||
params = layout_cls.Params(
|
||||
scale=block_scale,
|
||||
orig_dtype=MixedPrecisionOps._compute_dtype,
|
||||
orig_shape=(self.out_features, self.in_features),
|
||||
)
|
||||
|
||||
elif self.quant_format == "nvfp4":
|
||||
# NVFP4: tensor_scale (weight_scale_2) + block_scale (weight_scale)
|
||||
tensor_scale = self._load_scale_param(state_dict, prefix, "weight_scale_2", device, manually_loaded_keys)
|
||||
@ -1006,12 +1032,15 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
|
||||
def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
|
||||
fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
|
||||
nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)
|
||||
mxfp8_compute = comfy.model_management.supports_mxfp8_compute(load_device)
|
||||
|
||||
if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
|
||||
logging.info("Using mixed precision operations")
|
||||
disabled = set()
|
||||
if not nvfp4_compute:
|
||||
disabled.add("nvfp4")
|
||||
if not mxfp8_compute:
|
||||
disabled.add("mxfp8")
|
||||
if not fp8_compute:
|
||||
disabled.add("float8_e4m3fn")
|
||||
disabled.add("float8_e5m2")
|
||||
|
||||
@ -43,6 +43,18 @@ except ImportError as e:
|
||||
def get_layout_class(name):
|
||||
return None
|
||||
|
||||
_CK_MXFP8_AVAILABLE = False
|
||||
if _CK_AVAILABLE:
|
||||
try:
|
||||
from comfy_kitchen.tensor import TensorCoreMXFP8Layout as _CKMxfp8Layout
|
||||
_CK_MXFP8_AVAILABLE = True
|
||||
except ImportError:
|
||||
logging.warning("comfy_kitchen does not support MXFP8, please update comfy_kitchen.")
|
||||
|
||||
if not _CK_MXFP8_AVAILABLE:
|
||||
class _CKMxfp8Layout:
|
||||
pass
|
||||
|
||||
import comfy.float
|
||||
|
||||
# ==============================================================================
|
||||
@ -84,6 +96,31 @@ class _TensorCoreFP8LayoutBase(_CKFp8Layout):
|
||||
return qdata, params
|
||||
|
||||
|
||||
class TensorCoreMXFP8Layout(_CKMxfp8Layout):
|
||||
@classmethod
|
||||
def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False):
|
||||
if tensor.dim() != 2:
|
||||
raise ValueError(f"MXFP8 requires 2D tensor, got {tensor.dim()}D")
|
||||
|
||||
orig_dtype = tensor.dtype
|
||||
orig_shape = tuple(tensor.shape)
|
||||
|
||||
padded_shape = cls.get_padded_shape(orig_shape)
|
||||
needs_padding = padded_shape != orig_shape
|
||||
|
||||
if stochastic_rounding > 0:
|
||||
qdata, block_scale = comfy.float.stochastic_round_quantize_mxfp8_by_block(tensor, pad_32x=needs_padding, seed=stochastic_rounding)
|
||||
else:
|
||||
qdata, block_scale = ck.quantize_mxfp8(tensor, pad_32x=needs_padding)
|
||||
|
||||
params = cls.Params(
|
||||
scale=block_scale,
|
||||
orig_dtype=orig_dtype,
|
||||
orig_shape=orig_shape,
|
||||
)
|
||||
return qdata, params
|
||||
|
||||
|
||||
class TensorCoreNVFP4Layout(_CKNvfp4Layout):
|
||||
@classmethod
|
||||
def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False):
|
||||
@ -137,6 +174,8 @@ register_layout_class("TensorCoreFP8Layout", TensorCoreFP8Layout)
|
||||
register_layout_class("TensorCoreFP8E4M3Layout", TensorCoreFP8E4M3Layout)
|
||||
register_layout_class("TensorCoreFP8E5M2Layout", TensorCoreFP8E5M2Layout)
|
||||
register_layout_class("TensorCoreNVFP4Layout", TensorCoreNVFP4Layout)
|
||||
if _CK_MXFP8_AVAILABLE:
|
||||
register_layout_class("TensorCoreMXFP8Layout", TensorCoreMXFP8Layout)
|
||||
|
||||
QUANT_ALGOS = {
|
||||
"float8_e4m3fn": {
|
||||
@ -157,6 +196,14 @@ QUANT_ALGOS = {
|
||||
},
|
||||
}
|
||||
|
||||
if _CK_MXFP8_AVAILABLE:
|
||||
QUANT_ALGOS["mxfp8"] = {
|
||||
"storage_t": torch.float8_e4m3fn,
|
||||
"parameters": {"weight_scale", "input_scale"},
|
||||
"comfy_tensor_layout": "TensorCoreMXFP8Layout",
|
||||
"group_size": 32,
|
||||
}
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# Re-exports for backward compatibility
|
||||
|
||||
27
comfy/sd.py
27
comfy/sd.py
@ -871,13 +871,16 @@ class VAE:
|
||||
pixels = torch.nn.functional.pad(pixels, (0, self.output_channels - pixels.shape[-1]), mode=mode, value=value)
|
||||
return pixels
|
||||
|
||||
def vae_output_dtype(self):
|
||||
return model_management.intermediate_dtype()
|
||||
|
||||
def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
|
||||
steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
|
||||
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
|
||||
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
|
||||
pbar = comfy.utils.ProgressBar(steps)
|
||||
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
output = self.process_output(
|
||||
(comfy.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
|
||||
comfy.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
|
||||
@ -887,16 +890,16 @@ class VAE:
|
||||
|
||||
def decode_tiled_1d(self, samples, tile_x=256, overlap=32):
|
||||
if samples.ndim == 3:
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
else:
|
||||
og_shape = samples.shape
|
||||
samples = samples.reshape((og_shape[0], og_shape[1] * og_shape[2], -1))
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).float()
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
|
||||
return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device))
|
||||
|
||||
def decode_tiled_3d(self, samples, tile_t=999, tile_x=32, tile_y=32, overlap=(1, 8, 8)):
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
|
||||
decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, index_formulas=self.upscale_index_formula, output_device=self.output_device))
|
||||
|
||||
def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
|
||||
@ -905,7 +908,7 @@ class VAE:
|
||||
steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
|
||||
pbar = comfy.utils.ProgressBar(steps)
|
||||
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
samples = comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
|
||||
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
|
||||
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
|
||||
@ -914,7 +917,7 @@ class VAE:
|
||||
|
||||
def encode_tiled_1d(self, samples, tile_x=256 * 2048, overlap=64 * 2048):
|
||||
if self.latent_dim == 1:
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
out_channels = self.latent_channels
|
||||
upscale_amount = 1 / self.downscale_ratio
|
||||
else:
|
||||
@ -923,7 +926,7 @@ class VAE:
|
||||
tile_x = tile_x // extra_channel_size
|
||||
overlap = overlap // extra_channel_size
|
||||
upscale_amount = 1 / self.downscale_ratio
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).float()
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).to(dtype=self.vae_output_dtype())
|
||||
|
||||
out = comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=self.output_device)
|
||||
if self.latent_dim == 1:
|
||||
@ -932,7 +935,7 @@ class VAE:
|
||||
return out.reshape(samples.shape[0], self.latent_channels, extra_channel_size, -1)
|
||||
|
||||
def encode_tiled_3d(self, samples, tile_t=9999, tile_x=512, tile_y=512, overlap=(1, 64, 64)):
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
|
||||
encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
|
||||
return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.downscale_ratio, out_channels=self.latent_channels, downscale=True, index_formulas=self.downscale_index_formula, output_device=self.output_device)
|
||||
|
||||
def decode(self, samples_in, vae_options={}):
|
||||
@ -950,9 +953,9 @@ class VAE:
|
||||
|
||||
for x in range(0, samples_in.shape[0], batch_number):
|
||||
samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device)
|
||||
out = self.process_output(self.first_stage_model.decode(samples, **vae_options).to(self.output_device).float())
|
||||
out = self.process_output(self.first_stage_model.decode(samples, **vae_options).to(self.output_device).to(dtype=self.vae_output_dtype()))
|
||||
if pixel_samples is None:
|
||||
pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
|
||||
pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device, dtype=self.vae_output_dtype())
|
||||
pixel_samples[x:x+batch_number] = out
|
||||
except Exception as e:
|
||||
model_management.raise_non_oom(e)
|
||||
@ -1025,9 +1028,9 @@ class VAE:
|
||||
samples = None
|
||||
for x in range(0, pixel_samples.shape[0], batch_number):
|
||||
pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype).to(self.device)
|
||||
out = self.first_stage_model.encode(pixels_in).to(self.output_device).float()
|
||||
out = self.first_stage_model.encode(pixels_in).to(self.output_device).to(dtype=self.vae_output_dtype())
|
||||
if samples is None:
|
||||
samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
|
||||
samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device, dtype=self.vae_output_dtype())
|
||||
samples[x:x + batch_number] = out
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@ -1459,6 +1459,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
node_id="KlingOmniProEditVideoNode",
|
||||
display_name="Kling 3.0 Omni Edit Video",
|
||||
category="api node/video/Kling",
|
||||
essentials_category="Video Generation",
|
||||
description="Edit an existing video with the latest model from Kling.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
|
||||
|
||||
@ -833,6 +833,7 @@ class RecraftVectorizeImageNode(IO.ComfyNode):
|
||||
node_id="RecraftVectorizeImageNode",
|
||||
display_name="Recraft Vectorize Image",
|
||||
category="api node/image/Recraft",
|
||||
essentials_category="Image Tools",
|
||||
description="Generates SVG synchronously from an input image.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
|
||||
@ -19,6 +19,7 @@ class EmptyLatentAudio(IO.ComfyNode):
|
||||
node_id="EmptyLatentAudio",
|
||||
display_name="Empty Latent Audio",
|
||||
category="latent/audio",
|
||||
essentials_category="Audio",
|
||||
inputs=[
|
||||
IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1),
|
||||
IO.Int.Input(
|
||||
@ -185,6 +186,7 @@ class SaveAudioMP3(IO.ComfyNode):
|
||||
search_aliases=["export mp3"],
|
||||
display_name="Save Audio (MP3)",
|
||||
category="audio",
|
||||
essentials_category="Audio",
|
||||
inputs=[
|
||||
IO.Audio.Input("audio"),
|
||||
IO.String.Input("filename_prefix", default="audio/ComfyUI"),
|
||||
|
||||
@ -14,6 +14,7 @@ class ImageCompare(IO.ComfyNode):
|
||||
display_name="Image Compare",
|
||||
description="Compares two images side by side with a slider.",
|
||||
category="image",
|
||||
essentials_category="Image Tools",
|
||||
is_experimental=True,
|
||||
is_output_node=True,
|
||||
inputs=[
|
||||
|
||||
@ -58,6 +58,7 @@ class ImageCropV2(IO.ComfyNode):
|
||||
search_aliases=["trim"],
|
||||
display_name="Image Crop",
|
||||
category="image/transform",
|
||||
essentials_category="Image Tools",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
IO.BoundingBox.Input("crop_region", component="ImageCrop"),
|
||||
|
||||
@ -21,6 +21,7 @@ class Blend(io.ComfyNode):
|
||||
node_id="ImageBlend",
|
||||
display_name="Image Blend",
|
||||
category="image/postprocessing",
|
||||
essentials_category="Image Tools",
|
||||
inputs=[
|
||||
io.Image.Input("image1"),
|
||||
io.Image.Input("image2"),
|
||||
|
||||
21
docker-compose.yaml
Normal file
21
docker-compose.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
services:
|
||||
comfyui:
|
||||
user: "1000:1000"
|
||||
build: .
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
ports:
|
||||
- "8188:8188"
|
||||
volumes:
|
||||
- "./models:/app/models"
|
||||
- "./input:/app/input"
|
||||
- "./temp:/app/output/temp"
|
||||
- "./output:/app/output"
|
||||
- "./user:/app/user"
|
||||
- "./custom_venv:/app/custom_venv"
|
||||
- "./custom_nodes:/app/custom_nodes"
|
||||
@ -1 +1 @@
|
||||
comfyui_manager==4.1b4
|
||||
comfyui_manager==4.1b5
|
||||
@ -32,7 +32,7 @@ async def cache_control(
|
||||
)
|
||||
|
||||
if request.path.endswith(".js") or request.path.endswith(".css") or is_entry_point:
|
||||
response.headers.setdefault("Cache-Control", "no-cache")
|
||||
response.headers.setdefault("Cache-Control", "no-store")
|
||||
return response
|
||||
|
||||
# Early return for non-image files - no cache headers needed
|
||||
|
||||
14
nodes.py
14
nodes.py
@ -81,6 +81,7 @@ class CLIPTextEncode(ComfyNodeABC):
|
||||
|
||||
|
||||
class ConditioningCombine:
|
||||
ESSENTIALS_CATEGORY = "Image Generation"
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": {"conditioning_1": ("CONDITIONING", ), "conditioning_2": ("CONDITIONING", )}}
|
||||
@ -1211,9 +1212,6 @@ class GLIGENTextBoxApply:
|
||||
return (c, )
|
||||
|
||||
class EmptyLatentImage:
|
||||
def __init__(self):
|
||||
self.device = comfy.model_management.intermediate_device()
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {
|
||||
@ -1232,7 +1230,7 @@ class EmptyLatentImage:
|
||||
SEARCH_ALIASES = ["empty", "empty latent", "new latent", "create latent", "blank latent", "blank"]
|
||||
|
||||
def generate(self, width, height, batch_size=1):
|
||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8], device=self.device)
|
||||
latent = torch.zeros([batch_size, 4, height // 8, width // 8], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
|
||||
return ({"samples": latent, "downscale_ratio_spacial": 8}, )
|
||||
|
||||
|
||||
@ -1724,6 +1722,8 @@ class LoadImage:
|
||||
output_masks = []
|
||||
w, h = None, None
|
||||
|
||||
dtype = comfy.model_management.intermediate_dtype()
|
||||
|
||||
for i in ImageSequence.Iterator(img):
|
||||
i = node_helpers.pillow(ImageOps.exif_transpose, i)
|
||||
|
||||
@ -1748,8 +1748,8 @@ class LoadImage:
|
||||
mask = 1. - torch.from_numpy(mask)
|
||||
else:
|
||||
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
|
||||
output_images.append(image)
|
||||
output_masks.append(mask.unsqueeze(0))
|
||||
output_images.append(image.to(dtype=dtype))
|
||||
output_masks.append(mask.unsqueeze(0).to(dtype=dtype))
|
||||
|
||||
if img.format == "MPO":
|
||||
break # ignore all frames except the first one for MPO format
|
||||
@ -1779,6 +1779,7 @@ class LoadImage:
|
||||
return True
|
||||
|
||||
class LoadImageMask:
|
||||
ESSENTIALS_CATEGORY = "Image Tools"
|
||||
SEARCH_ALIASES = ["import mask", "alpha mask", "channel mask"]
|
||||
|
||||
_color_channels = ["alpha", "red", "green", "blue"]
|
||||
@ -1887,6 +1888,7 @@ class ImageScale:
|
||||
return (s,)
|
||||
|
||||
class ImageScaleBy:
|
||||
ESSENTIALS_CATEGORY = "Image Tools"
|
||||
upscale_methods = ["nearest-exact", "bilinear", "area", "bicubic", "lanczos"]
|
||||
|
||||
@classmethod
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
comfyui-frontend-package==1.41.19
|
||||
comfyui-frontend-package==1.41.20
|
||||
comfyui-workflow-templates==0.9.21
|
||||
comfyui-embedded-docs==0.4.3
|
||||
torch
|
||||
@ -23,7 +23,7 @@ SQLAlchemy
|
||||
filelock
|
||||
av>=14.2.0
|
||||
comfy-kitchen>=0.2.8
|
||||
comfy-aimdo>=0.2.11
|
||||
comfy-aimdo>=0.2.12
|
||||
requests
|
||||
simpleeval>=1.0.0
|
||||
blake3
|
||||
|
||||
@ -310,7 +310,7 @@ class PromptServer():
|
||||
@routes.get("/")
|
||||
async def get_root(request):
|
||||
response = web.FileResponse(os.path.join(self.web_root, "index.html"))
|
||||
response.headers['Cache-Control'] = 'no-cache'
|
||||
response.headers['Cache-Control'] = 'no-store, must-revalidate'
|
||||
response.headers["Pragma"] = "no-cache"
|
||||
response.headers["Expires"] = "0"
|
||||
return response
|
||||
|
||||
@ -28,31 +28,31 @@ CACHE_SCENARIOS = [
|
||||
},
|
||||
# JavaScript/CSS scenarios
|
||||
{
|
||||
"name": "js_no_cache",
|
||||
"name": "js_no_store",
|
||||
"path": "/script.js",
|
||||
"status": 200,
|
||||
"expected_cache": "no-cache",
|
||||
"expected_cache": "no-store",
|
||||
"should_have_header": True,
|
||||
},
|
||||
{
|
||||
"name": "css_no_cache",
|
||||
"name": "css_no_store",
|
||||
"path": "/styles.css",
|
||||
"status": 200,
|
||||
"expected_cache": "no-cache",
|
||||
"expected_cache": "no-store",
|
||||
"should_have_header": True,
|
||||
},
|
||||
{
|
||||
"name": "index_json_no_cache",
|
||||
"name": "index_json_no_store",
|
||||
"path": "/api/index.json",
|
||||
"status": 200,
|
||||
"expected_cache": "no-cache",
|
||||
"expected_cache": "no-store",
|
||||
"should_have_header": True,
|
||||
},
|
||||
{
|
||||
"name": "localized_index_json_no_cache",
|
||||
"name": "localized_index_json_no_store",
|
||||
"path": "/templates/index.zh.json",
|
||||
"status": 200,
|
||||
"expected_cache": "no-cache",
|
||||
"expected_cache": "no-store",
|
||||
"should_have_header": True,
|
||||
},
|
||||
# Non-matching files
|
||||
|
||||
Loading…
Reference in New Issue
Block a user