From ce76d8bd275bbbe6bfb6ebc4371393907c1faa38 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:32:24 -0400 Subject: [PATCH] Reduce memory usage for fp8 scaled op. --- comfy/quant_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index b14e03084..fb35a0d40 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -358,7 +358,7 @@ class TensorCoreFP8Layout(QuantizedLayout): scale = scale.to(device=tensor.device, dtype=torch.float32) lp_amax = torch.finfo(dtype).max - tensor_scaled = tensor.float() / scale + tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype) torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled) qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)