Merge branch 'comfyanonymous:master' into master

2026-07-13 09:57:20 +08:00 · 2025-11-04 13:09:30 +03:00 · 2025-11-04 13:09:30 +03:00 · 11083ab58c
commit 11083ab58c
parent 634b284407 af4b7b5edb
3 changed files with 63 additions and 7 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -35,7 +35,7 @@ def scaled_dot_product_attention(q, k, v, *args, **kwargs):


 try:
-    if torch.cuda.is_available():
+    if torch.cuda.is_available() and comfy.model_management.WINDOWS:
        from torch.nn.attention import SDPBackend, sdpa_kernel
        import inspect
        if "set_priority" in inspect.signature(sdpa_kernel).parameters:
@ -71,7 +71,6 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
    return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)


-@torch.compiler.disable()
 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False):
    # NOTE: offloadable=False is a a legacy and if you are a custom node author reading this please pass
    # offloadable=True and call uncast_bias_weight() after your last usage of the weight/bias. This
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -126,12 +126,12 @@ class QuantizedTensor(torch.Tensor):
        return torch.Tensor._make_wrapper_subclass(cls, qdata.shape, device=qdata.device, dtype=qdata.dtype, requires_grad=False)

    def __init__(self, qdata, layout_type, layout_params):
-        self._qdata = qdata.contiguous()
+        self._qdata = qdata
        self._layout_type = layout_type
        self._layout_params = layout_params

    def __repr__(self):
-        layout_name = self._layout_type.__name__
+        layout_name = self._layout_type
        param_str = ", ".join(f"{k}={v}" for k, v in list(self._layout_params.items())[:2])
        return f"QuantizedTensor(shape={self.shape}, layout={layout_name}, {param_str})"

@ -179,7 +179,7 @@ class QuantizedTensor(torch.Tensor):
            attr_name = f"_layout_param_{key}"
            layout_params[key] = inner_tensors[attr_name]

-        return QuantizedTensor(inner_tensors["_q_data"], layout_type, layout_params)
+        return QuantizedTensor(inner_tensors["_qdata"], layout_type, layout_params)

    @classmethod
    def from_float(cls, tensor, layout_type, **quantize_kwargs) -> 'QuantizedTensor':
@ -411,13 +411,17 @@ def fp8_linear(func, args, kwargs):

        try:
            output = torch._scaled_mm(
-                plain_input.reshape(-1, input_shape[2]),
+                plain_input.reshape(-1, input_shape[2]).contiguous(),
                weight_t,
                bias=bias,
                scale_a=scale_a,
                scale_b=scale_b,
                out_dtype=out_dtype,
            )
+
+            if isinstance(output, tuple):  # TODO: remove when we drop support for torch 2.4
+                output = output[0]
+
            if not tensor_2d:
                output = output.reshape((-1, input_shape[1], weight.shape[0]))

@ -442,6 +446,59 @@ def fp8_linear(func, args, kwargs):

    return torch.nn.functional.linear(input_tensor, weight, bias)

+def fp8_mm_(input_tensor, weight, bias=None, out_dtype=None):
+    if out_dtype is None:
+        out_dtype = input_tensor._layout_params['orig_dtype']
+
+    plain_input, scale_a = TensorCoreFP8Layout.get_plain_tensors(input_tensor)
+    plain_weight, scale_b = TensorCoreFP8Layout.get_plain_tensors(weight)
+
+    output = torch._scaled_mm(
+        plain_input.contiguous(),
+        plain_weight,
+        bias=bias,
+        scale_a=scale_a,
+        scale_b=scale_b,
+        out_dtype=out_dtype,
+    )
+
+    if isinstance(output, tuple):  # TODO: remove when we drop support for torch 2.4
+        output = output[0]
+    return output
+
+@register_layout_op(torch.ops.aten.addmm.default, "TensorCoreFP8Layout")
+def fp8_addmm(func, args, kwargs):
+    input_tensor = args[1]
+    weight = args[2]
+    bias = args[0]
+
+    if isinstance(input_tensor, QuantizedTensor) and isinstance(weight, QuantizedTensor):
+        return fp8_mm_(input_tensor, weight, bias=bias, out_dtype=kwargs.get("out_dtype", None))
+
+    a = list(args)
+    if isinstance(args[0], QuantizedTensor):
+        a[0] = args[0].dequantize()
+    if isinstance(args[1], QuantizedTensor):
+        a[1] = args[1].dequantize()
+    if isinstance(args[2], QuantizedTensor):
+        a[2] = args[2].dequantize()
+
+    return func(*a, **kwargs)
+
+@register_layout_op(torch.ops.aten.mm.default, "TensorCoreFP8Layout")
+def fp8_mm(func, args, kwargs):
+    input_tensor = args[0]
+    weight = args[1]
+
+    if isinstance(input_tensor, QuantizedTensor) and isinstance(weight, QuantizedTensor):
+        return fp8_mm_(input_tensor, weight, bias=None, out_dtype=kwargs.get("out_dtype", None))
+
+    a = list(args)
+    if isinstance(args[0], QuantizedTensor):
+        a[0] = args[0].dequantize()
+    if isinstance(args[1], QuantizedTensor):
+        a[1] = args[1].dequantize()
+    return func(*a, **kwargs)

@register_layout_op(torch.ops.aten.view.default, "TensorCoreFP8Layout")
@register_layout_op(torch.ops.aten.t.default, "TensorCoreFP8Layout")
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 comfyui-frontend-package==1.28.8
 comfyui-workflow-templates==0.2.4
-comfyui-embedded-docs==0.3.0
+comfyui-embedded-docs==0.3.1
 torch
 torchsde
 torchvision