mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-16 17:42:58 +08:00
Reduce memory usage for fp8 scaled op. (#10531)
This commit is contained in:
parent
6c14f3afac
commit
1a58087ac2
@ -358,7 +358,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
|
|||||||
scale = scale.to(device=tensor.device, dtype=torch.float32)
|
scale = scale.to(device=tensor.device, dtype=torch.float32)
|
||||||
|
|
||||||
lp_amax = torch.finfo(dtype).max
|
lp_amax = torch.finfo(dtype).max
|
||||||
tensor_scaled = tensor.float() / scale
|
tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
|
||||||
torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
|
torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
|
||||||
qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
|
qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user