From 971932346ac6e6e02c1e1e8cfe34df2f0e1cea3e Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:27:38 -0700 Subject: [PATCH] Update quant doc so it's not completely wrong. (#13381) There is still more that needs to be fixed. --- QUANTIZATION.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/QUANTIZATION.md b/QUANTIZATION.md index 1693e13f3..300822029 100644 --- a/QUANTIZATION.md +++ b/QUANTIZATION.md @@ -139,9 +139,9 @@ Example: "_quantization_metadata": { "format_version": "1.0", "layers": { - "model.layers.0.mlp.up_proj": "float8_e4m3fn", - "model.layers.0.mlp.down_proj": "float8_e4m3fn", - "model.layers.1.mlp.up_proj": "float8_e4m3fn" + "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"}, + "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"}, + "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"} } } } @@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s 3. **Compute scales**: Derive `input_scale` from collected statistics 4. **Store in checkpoint**: Save `input_scale` parameters alongside weights -The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. \ No newline at end of file +The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.