Reduce memory usage for fp8 scaled op. (#10531)

2026-03-12 04:37:32 +08:00 · 2025-10-29 12:43:51 -07:00 · 2025-10-29 12:43:51 -07:00 · 1a58087ac2
commit 1a58087ac2
parent 6c14f3afac
1 changed files with 1 additions and 1 deletions
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -358,7 +358,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
        scale = scale.to(device=tensor.device, dtype=torch.float32)

        lp_amax = torch.finfo(dtype).max
-        tensor_scaled = tensor.float() / scale
+        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
        qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)