mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-27 09:27:24 +08:00
Compare commits
8 Commits
6c7960ea10
...
2f13ca038f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f13ca038f | ||
|
|
b138133ffa | ||
|
|
025e6792ee | ||
|
|
867b8d2408 | ||
|
|
d0f0b15cf5 | ||
|
|
b5bb83c964 | ||
|
|
45a2363e6a | ||
|
|
edd44a6874 |
@ -31,7 +31,8 @@
|
||||
[github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
|
||||
[github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases
|
||||
|
||||
<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/4aab0bef-b413-4595-9766-a2c134676d27" />
|
||||
<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/36e065e0-bfae-4456-8c7f-8369d5ea48a2" />
|
||||
<br>
|
||||
</div>
|
||||
|
||||
ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more...
|
||||
|
||||
@ -91,6 +91,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"
|
||||
|
||||
parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
|
||||
parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
|
||||
parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.")
|
||||
|
||||
class LatentPreviewMethod(enum.Enum):
|
||||
NoPreviews = "none"
|
||||
|
||||
@ -55,6 +55,11 @@ def stochastic_rounding(value, dtype, seed=0):
|
||||
if dtype == torch.bfloat16:
|
||||
return value.to(dtype=torch.bfloat16)
|
||||
if dtype == torch.float8_e4m3fn or dtype == torch.float8_e5m2:
|
||||
# MPS does not support FP8 dtypes — perform rounding on CPU and return the result there.
|
||||
on_mps = value.device.type == "mps"
|
||||
if on_mps:
|
||||
value = value.cpu()
|
||||
|
||||
generator = torch.Generator(device=value.device)
|
||||
generator.manual_seed(seed)
|
||||
output = torch.empty_like(value, dtype=dtype)
|
||||
@ -159,6 +164,12 @@ def stochastic_round_quantize_nvfp4(x, per_tensor_scale, pad_16x, seed=0):
|
||||
"""Round up x to the nearest multiple."""
|
||||
return ((x + multiple - 1) // multiple) * multiple
|
||||
|
||||
# MPS does not support FP8 dtypes used for block scales — perform on CPU.
|
||||
on_mps = x.device.type == "mps"
|
||||
if on_mps:
|
||||
x = x.cpu()
|
||||
per_tensor_scale = per_tensor_scale.cpu() if isinstance(per_tensor_scale, torch.Tensor) else per_tensor_scale
|
||||
|
||||
generator = torch.Generator(device=x.device)
|
||||
generator.manual_seed(seed)
|
||||
|
||||
@ -179,6 +190,12 @@ def stochastic_round_quantize_nvfp4_by_block(x, per_tensor_scale, pad_16x, seed=
|
||||
"""Round up x to the nearest multiple."""
|
||||
return ((x + multiple - 1) // multiple) * multiple
|
||||
|
||||
# MPS does not support FP8 dtypes used for block scales — perform on CPU.
|
||||
on_mps = x.device.type == "mps"
|
||||
if on_mps:
|
||||
x = x.cpu()
|
||||
per_tensor_scale = per_tensor_scale.cpu() if isinstance(per_tensor_scale, torch.Tensor) else per_tensor_scale
|
||||
|
||||
orig_shape = x.shape
|
||||
|
||||
# Handle padding
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
import torch
|
||||
import logging
|
||||
|
||||
from comfy.cli_args import args
|
||||
|
||||
try:
|
||||
import comfy_kitchen as ck
|
||||
from comfy_kitchen.tensor import (
|
||||
@ -21,7 +23,15 @@ try:
|
||||
ck.registry.disable("cuda")
|
||||
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
|
||||
|
||||
ck.registry.disable("triton")
|
||||
if args.enable_triton_backend:
|
||||
try:
|
||||
import triton
|
||||
logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__)
|
||||
except ImportError as e:
|
||||
logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.")
|
||||
ck.registry.disable("triton")
|
||||
else:
|
||||
ck.registry.disable("triton")
|
||||
for k, v in ck.list_backends().items():
|
||||
logging.info(f"Found comfy_kitchen backend {k}: {v}")
|
||||
except ImportError as e:
|
||||
@ -83,6 +93,12 @@ class _TensorCoreFP8LayoutBase(_CKFp8Layout):
|
||||
if not isinstance(scale, torch.Tensor):
|
||||
scale = torch.tensor(scale, device=tensor.device, dtype=torch.float32)
|
||||
|
||||
# MPS does not support FP8 dtypes — move to CPU for quantization.
|
||||
on_mps = tensor.device.type == "mps"
|
||||
if on_mps:
|
||||
tensor = tensor.cpu()
|
||||
scale = scale.cpu()
|
||||
|
||||
if stochastic_rounding > 0:
|
||||
if inplace_ops:
|
||||
tensor *= (1.0 / scale).to(tensor.dtype)
|
||||
|
||||
@ -202,14 +202,11 @@ class JoinImageWithAlpha(io.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
|
||||
batch_size = min(len(image), len(alpha))
|
||||
out_images = []
|
||||
|
||||
batch_size = max(len(image), len(alpha))
|
||||
alpha = 1.0 - resize_mask(alpha, image.shape[1:])
|
||||
for i in range(batch_size):
|
||||
out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2))
|
||||
|
||||
return io.NodeOutput(torch.stack(out_images))
|
||||
alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
|
||||
image = comfy.utils.repeat_to_batch_size(image, batch_size)
|
||||
return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))
|
||||
|
||||
|
||||
class CompositingExtension(ComfyExtension):
|
||||
|
||||
@ -86,6 +86,6 @@ def image_alpha_fix(destination, source):
|
||||
if destination.shape[-1] < source.shape[-1]:
|
||||
source = source[...,:destination.shape[-1]]
|
||||
elif destination.shape[-1] > source.shape[-1]:
|
||||
destination = torch.nn.functional.pad(destination, (0, 1))
|
||||
destination[..., -1] = 1.0
|
||||
source = torch.nn.functional.pad(source, (0, 1))
|
||||
source[..., -1] = 1.0
|
||||
return destination, source
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import errno
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
@ -1245,7 +1246,13 @@ class PromptServer():
|
||||
address = addr[0]
|
||||
port = addr[1]
|
||||
site = web.TCPSite(runner, address, port, ssl_context=ssl_ctx)
|
||||
await site.start()
|
||||
try:
|
||||
await site.start()
|
||||
except OSError as e:
|
||||
if e.errno == errno.EADDRINUSE:
|
||||
logging.error(f"Port {port} is already in use on address {address}. Please close the other application or use a different port with --port.")
|
||||
raise SystemExit(1)
|
||||
raise
|
||||
|
||||
if not hasattr(self, 'address'):
|
||||
self.address = address #TODO: remove this
|
||||
|
||||
147
tests/test_fp8_mps.py
Normal file
147
tests/test_fp8_mps.py
Normal file
@ -0,0 +1,147 @@
|
||||
"""
|
||||
Tests for FP8 quantization on MPS (Apple Silicon) devices.
|
||||
|
||||
MPS does not natively support float8_e4m3fn or float8_e5m2 dtypes.
|
||||
These tests verify that:
|
||||
1. FP8 operations correctly fall back to CPU when on MPS.
|
||||
2. The round-trip (quantize on CPU -> result on original device) is numerically sound.
|
||||
3. No "Placeholder storage has not been allocated on MPS device!" errors occur.
|
||||
"""
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
import comfy.float
|
||||
from comfy.quant_ops import TensorCoreFP8E4M3Layout, TensorCoreFP8E5M2Layout
|
||||
|
||||
# Skip the entire module if MPS is not available
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not torch.backends.mps.is_available(),
|
||||
reason="MPS backend not available"
|
||||
)
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_mps_tensor(shape=(256, 256), dtype=torch.float32):
|
||||
return torch.randn(shape, device="mps", dtype=dtype)
|
||||
|
||||
|
||||
# ── Tests for comfy.float ────────────────────────────────────────────────────
|
||||
|
||||
class TestStochasticRoundingMPS:
|
||||
"""Tests for comfy.float.stochastic_rounding on MPS device."""
|
||||
|
||||
def test_stochastic_rounding_fp8_e4m3fn_on_mps(self):
|
||||
"""stochastic_rounding must not crash when input is on MPS and target dtype is float8_e4m3fn."""
|
||||
x = _make_mps_tensor((64, 64), dtype=torch.float32)
|
||||
result = comfy.float.stochastic_rounding(x, dtype=torch.float8_e4m3fn, seed=42)
|
||||
|
||||
assert result.dtype == torch.float8_e4m3fn
|
||||
assert result.shape == x.shape
|
||||
|
||||
def test_stochastic_rounding_fp8_e5m2_on_mps(self):
|
||||
"""stochastic_rounding must not crash when input is on MPS and target dtype is float8_e5m2."""
|
||||
x = _make_mps_tensor((64, 64), dtype=torch.float32)
|
||||
result = comfy.float.stochastic_rounding(x, dtype=torch.float8_e5m2, seed=42)
|
||||
|
||||
assert result.dtype == torch.float8_e5m2
|
||||
assert result.shape == x.shape
|
||||
|
||||
def test_stochastic_rounding_fp8_result_on_cpu(self):
|
||||
"""Result of FP8 rounding from MPS input should be on CPU (since MPS can't hold FP8)."""
|
||||
x = _make_mps_tensor((32, 32), dtype=torch.float32)
|
||||
result = comfy.float.stochastic_rounding(x, dtype=torch.float8_e4m3fn, seed=42)
|
||||
|
||||
# FP8 tensors cannot live on MPS, so result must be on CPU
|
||||
assert result.device.type == "cpu"
|
||||
|
||||
def test_stochastic_rounding_non_fp8_still_works(self):
|
||||
"""Non-FP8 dtypes on MPS must still work as before (no regression)."""
|
||||
x = _make_mps_tensor((32, 32), dtype=torch.float32)
|
||||
|
||||
r16 = comfy.float.stochastic_rounding(x, dtype=torch.float16, seed=0)
|
||||
assert r16.dtype == torch.float16
|
||||
assert r16.device.type == "mps"
|
||||
|
||||
rbf16 = comfy.float.stochastic_rounding(x, dtype=torch.bfloat16, seed=0)
|
||||
assert rbf16.dtype == torch.bfloat16
|
||||
assert rbf16.device.type == "mps"
|
||||
|
||||
def test_stochastic_rounding_fp8_numerical_sanity(self):
|
||||
"""FP8 round-trip (float32 -> fp8 -> float32) should have bounded error."""
|
||||
x = torch.randn(128, 128, device="mps", dtype=torch.float32)
|
||||
x_clamped = torch.clamp(x, min=-448, max=448) # FP8 e4m3fn range
|
||||
|
||||
fp8 = comfy.float.stochastic_rounding(x_clamped, dtype=torch.float8_e4m3fn, seed=123)
|
||||
# Convert back to float32 for comparison
|
||||
reconstructed = fp8.to(torch.float32)
|
||||
|
||||
# Max relative error should be bounded (FP8 e4m3fn has ~0.125 relative precision)
|
||||
x_cpu = x_clamped.cpu()
|
||||
max_abs_err = (reconstructed - x_cpu).abs().max().item()
|
||||
# FP8 e4m3fn max value is 448, min subnormal ~0.001953
|
||||
# For random normal data, error should be well under 1.0
|
||||
assert max_abs_err < 2.0, f"FP8 round-trip error too large: {max_abs_err}"
|
||||
|
||||
|
||||
class TestManualStochasticRoundMPS:
|
||||
"""Tests for comfy.float.manual_stochastic_round_to_float8 on MPS device."""
|
||||
|
||||
def test_manual_round_fp8_on_mps_tensor(self):
|
||||
"""stochastic_rounding handles MPS generator internally without 'Placeholder storage' error."""
|
||||
x = _make_mps_tensor((16, 16), dtype=torch.float32)
|
||||
result = comfy.float.stochastic_rounding(x, dtype=torch.float8_e4m3fn, seed=42)
|
||||
assert result.dtype == torch.float8_e4m3fn
|
||||
|
||||
|
||||
class TestNVFP4StochasticRoundMPS:
|
||||
"""Tests for NVFP4 stochastic rounding on MPS - also creates FP8 tensors internally."""
|
||||
|
||||
def test_nvfp4_stochastic_round_on_mps(self):
|
||||
"""stochastic_round_quantize_nvfp4 creates FP8 block scales internally."""
|
||||
# NVFP4 requires 2D input with dimensions divisible by 16
|
||||
x = torch.randn(32, 32, device="mps", dtype=torch.float32)
|
||||
scale = torch.tensor(1.0, device="mps", dtype=torch.float32)
|
||||
|
||||
# This should not crash - internally creates float8_e4m3fn block scales
|
||||
qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4(
|
||||
x, scale, pad_16x=False, seed=42
|
||||
)
|
||||
assert qdata.dtype == torch.uint8
|
||||
|
||||
|
||||
# ── Tests for comfy.quant_ops (integration) ──────────────────────────────────
|
||||
|
||||
class TestQuantOpsMPS:
|
||||
"""Tests for the quantization ops layer that calls into comfy.float."""
|
||||
|
||||
def test_fp8_layout_quantize_on_mps(self):
|
||||
"""TensorCoreFP8E4M3Layout.quantize must work with MPS tensors."""
|
||||
x = _make_mps_tensor((64, 64), dtype=torch.bfloat16)
|
||||
qdata, params = TensorCoreFP8E4M3Layout.quantize(
|
||||
x, scale="recalculate", stochastic_rounding=42
|
||||
)
|
||||
|
||||
assert qdata.dtype == torch.float8_e4m3fn
|
||||
assert params.orig_dtype == torch.bfloat16
|
||||
|
||||
def test_fp8_layout_quantize_without_stochastic_on_mps(self):
|
||||
"""TensorCoreFP8E4M3Layout.quantize with stochastic_rounding=0 uses ck.quantize_per_tensor_fp8."""
|
||||
x = _make_mps_tensor((64, 64), dtype=torch.bfloat16)
|
||||
qdata, params = TensorCoreFP8E4M3Layout.quantize(
|
||||
x, scale="recalculate", stochastic_rounding=0
|
||||
)
|
||||
|
||||
assert qdata.dtype == torch.float8_e4m3fn
|
||||
|
||||
def test_fp8_e5m2_layout_quantize_on_mps(self):
|
||||
"""TensorCoreFP8E5M2Layout.quantize must work with MPS tensors."""
|
||||
x = _make_mps_tensor((64, 64), dtype=torch.float32)
|
||||
qdata, params = TensorCoreFP8E5M2Layout.quantize(
|
||||
x, scale="recalculate", stochastic_rounding=42
|
||||
)
|
||||
|
||||
assert qdata.dtype == torch.float8_e5m2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Loading…
Reference in New Issue
Block a user