quant ops: Dequantize weight in-place (#10935)

In flux2 these weights are huge (200MB). As plain_tensor is a throw-away deep copy, do this multiplication in-place to save VRAM.
2025-12-17 01:52:59 +08:00 · 2025-11-28 02:06:30 +10:00 · 2025-11-28 02:06:30 +10:00 · 3f382a4f98
commit 3f382a4f98
parent f17251bec6
1 changed files with 2 additions and 1 deletions
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -425,7 +425,8 @@ class TensorCoreFP8Layout(QuantizedLayout):
    @staticmethod
    def dequantize(qdata, scale, orig_dtype, **kwargs):
        plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
-        return plain_tensor * scale
+        plain_tensor.mul_(scale)
+        return plain_tensor

    @classmethod
    def get_plain_tensors(cls, qtensor):