From ce76d8bd275bbbe6bfb6ebc4371393907c1faa38 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Oct 2025 15:32:24 -0400
Subject: [PATCH] Reduce memory usage for fp8 scaled op.

---
 comfy/quant_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index b14e03084..fb35a0d40 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -358,7 +358,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
         lp_amax = torch.finfo(dtype).max
-        tensor_scaled = tensor.float() / scale
+        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
         torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
         qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)