Merge 96e5287a72 into 25757a53c9

chore: update workflow templates to v0.9.72 (#13732 )
Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
2026-05-13 10:42:59 +08:00 · 2026-05-08 00:39:55 +08:00 · 2026-05-07 00:28:18 -07:00 · 2026-05-06 23:45:59 -07:00 · 2026-05-06 22:51:01 -04:00 · 2026-05-07 10:41:13 +08:00
7 changed files with 166 additions and 23 deletions
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -145,6 +145,8 @@ jobs:
          cp -r ComfyUI/.ci/windows_${{ inputs.rel_name }}_base_files/* ./
          cp ../update_comfyui_and_python_dependencies.bat ./update/

+          echo 'local-portable' > ComfyUI/.comfy_environment
+
          cd ..

          "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=768m -ms=on -mf=BCJ2 ComfyUI_windows_portable.7z ComfyUI_windows_portable
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -26,6 +26,7 @@ import uuid
 from typing import Callable, Optional

 import torch
+import tqdm

 import comfy.float
 import comfy.hooks
@ -1651,7 +1652,11 @@ class ModelPatcherDynamic(ModelPatcher):
                self.model.model_loaded_weight_memory += casted_buf.numel() * casted_buf.element_size()

            force_load_stat = f" Force pre-loaded {len(self.backup)} weights: {self.model.model_loaded_weight_memory // 1024} KB." if len(self.backup) > 0 else ""
-            logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.{force_load_stat}")
+            log_key = (self.patches_uuid, allocated_size, num_patches, len(self.backup), self.model.model_loaded_weight_memory)
+            in_loop = bool(getattr(tqdm.tqdm, "_instances", None))
+            level = logging.DEBUG if in_loop and getattr(self, "_last_prepare_log_key", None) == log_key else logging.INFO
+            self._last_prepare_log_key = log_key
+            logging.log(level, f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.{force_load_stat}")

            self.model.device = device_to
            self.model.current_weight_patches_uuid = self.patches_uuid
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -1087,6 +1087,51 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                            orig_dtype=MixedPrecisionOps._compute_dtype,
                            orig_shape=(self.out_features, self.in_features),
                        )
+                    elif self.quant_format == "svdquant_w4a4":
+                        # SVDQuant W4A4: per-group weight scales + low-rank correction
+                        # (proj_down, proj_up) + activation smoothing (smooth_factor)
+                        wscales = self._load_scale_param(state_dict, prefix, "weight_scale", device, manually_loaded_keys)
+                        proj_down = self._load_scale_param(state_dict, prefix, "proj_down", device, manually_loaded_keys)
+                        proj_up = self._load_scale_param(state_dict, prefix, "proj_up", device, manually_loaded_keys)
+                        smooth_factor = self._load_scale_param(state_dict, prefix, "smooth_factor", device, manually_loaded_keys)
+                        act_unsigned = bool(layer_conf.get("act_unsigned", False))
+
+                        # Early Qwen-Image conversion artifacts did not persist the
+                        # fused GELU -> fc2 unsigned-activation flag. Those layers
+                        # are the second linear in the feed-forward block.
+                        if not act_unsigned and (
+                            layer_name.endswith(".img_mlp.net.2") or layer_name.endswith(".txt_mlp.net.2")
+                        ):
+                            act_unsigned = True
+
+                        if any(t is None for t in (wscales, proj_down, proj_up, smooth_factor)):
+                            raise ValueError(f"Missing SVDQuant W4A4 parameters for layer {layer_name}")
+
+                        params = layout_cls.Params(
+                            scale=wscales,
+                            orig_dtype=MixedPrecisionOps._compute_dtype,
+                            orig_shape=(self.out_features, self.in_features),
+                            proj_down=proj_down,
+                            proj_up=proj_up,
+                            smooth_factor=smooth_factor,
+                            act_unsigned=act_unsigned,
+                        )
+                    elif self.quant_format == "awq_w4a16":
+                        # AWQ W4A16: int4 weight, fp16/bf16 activation. Used for
+                        # the modulation linears (img_mod.1 / txt_mod.1) so they
+                        # stay int4 in checkpoint + VRAM rather than getting
+                        # dequantized to bf16 at conversion time (~10 GB saving).
+                        wscales = self._load_scale_param(state_dict, prefix, "weight_scale", device, manually_loaded_keys)
+                        wzeros = self._load_scale_param(state_dict, prefix, "weight_zero", device, manually_loaded_keys)
+                        if wscales is None or wzeros is None:
+                            raise ValueError(f"Missing AWQ W4A16 parameters for layer {layer_name}")
+                        params = layout_cls.Params(
+                            scale=wscales,
+                            zeros=wzeros,
+                            group_size=int(layer_conf.get("group_size", qconfig.get("group_size", 64))),
+                            orig_dtype=MixedPrecisionOps._compute_dtype,
+                            orig_shape=(self.out_features, self.in_features),
+                        )
                    else:
                        raise ValueError(f"Unsupported quantization format: {self.quant_format}")

@ -1136,6 +1181,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                    quant_conf = {"format": self.quant_format}
                    if self._full_precision_mm_config:
                        quant_conf["full_precision_matrix_mult"] = True
+                    if bool(getattr(getattr(self.weight, "_params", None), "act_unsigned", False)):
+                        quant_conf["act_unsigned"] = True
                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)

                    input_scale = getattr(self, 'input_scale', None)
@ -1193,18 +1240,24 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

                # Inference path (unchanged)
                if _use_quantized:
+                    # Some layouts (e.g. SVDQuant W4A4) do activation quantization
+                    # inside their fused kernel and cannot pre-quantize a float
+                    # tensor up-front. Skip the input wrapping for those.
+                    layout_cls = get_layout_class(self.layout_type)
+                    layout_quantizes_input = getattr(layout_cls, "QUANTIZES_INPUT", True)

-                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
-                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input
+                    if layout_quantizes_input:
+                        # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
+                        input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input

-                    # Fall back to non-quantized for non-2D tensors
-                    if input_reshaped.ndim == 2:
-                        reshaped_3d = input.ndim == 3
-                        # dtype is now implicit in the layout class
-                        scale = getattr(self, 'input_scale', None)
-                        if scale is not None:
-                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
-                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)
+                        # Fall back to non-quantized for non-2D tensors
+                        if input_reshaped.ndim == 2:
+                            reshaped_3d = input.ndim == 3
+                            # dtype is now implicit in the layout class
+                            scale = getattr(self, 'input_scale', None)
+                            if scale is not None:
+                                scale = comfy.model_management.cast_to_device(scale, input.device, None)
+                            input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

                output = self.forward_comfy_cast_weights(input, compute_dtype, want_requant=isinstance(input, QuantizedTensor))

--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -47,6 +47,12 @@ except ImportError as e:
    class _CKNvfp4Layout:
        pass

+    class _CKSVDQuantW4A4Layout:
+        pass
+
+    class _CKAWQW4A16Layout:
+        pass
+
    def register_layout_class(name, cls):
        pass

@ -65,6 +71,26 @@ if not _CK_MXFP8_AVAILABLE:
    class _CKMxfp8Layout:
        pass

+_CK_SVDQUANT_W4A4_AVAILABLE = False
+if _CK_AVAILABLE:
+    try:
+        from comfy_kitchen.tensor import TensorCoreSVDQuantW4A4Layout as _CKSVDQuantW4A4Layout
+        _CK_SVDQUANT_W4A4_AVAILABLE = True
+    except ImportError:
+        logging.info("comfy_kitchen does not expose SVDQuant W4A4 layout; int4 SVDQuant checkpoints will not be supported.")
+        class _CKSVDQuantW4A4Layout:
+            pass
+
+_CK_AWQ_W4A16_AVAILABLE = False
+if _CK_AVAILABLE:
+    try:
+        from comfy_kitchen.tensor import TensorCoreAWQW4A16Layout as _CKAWQW4A16Layout
+        _CK_AWQ_W4A16_AVAILABLE = True
+    except ImportError:
+        logging.info("comfy_kitchen does not expose AWQ W4A16 layout; int4 AWQ modulation checkpoints will fall back to bf16-dequantized layers.")
+        class _CKAWQW4A16Layout:
+            pass
+
 import comfy.float

 # ==============================================================================
@ -172,6 +198,21 @@ class TensorCoreFP8E5M2Layout(_TensorCoreFP8LayoutBase):
    FP8_DTYPE = torch.float8_e5m2


+# SVDQuant W4A4 — pre-quantized offline (no runtime quantize), pass through the
+# kitchen-registered layout class unchanged. Comfy-side extension reserved in
+# case per-layer input scales or other Comfy-specific metadata are added later.
+class TensorCoreSVDQuantW4A4Layout(_CKSVDQuantW4A4Layout):
+    pass
+
+
+# AWQ W4A16 — pre-quantized offline (no runtime quantize) via the kitchen
+# eager `gemv_awq_w4a16` op. Used for modulation linears (img_mod.1 /
+# txt_mod.1) on Qwen-Image-Edit and similar topologies where keeping the
+# weight at int4 saves ~10 GB of VRAM vs the bf16-dequantized fallback.
+class TensorCoreAWQW4A16Layout(_CKAWQW4A16Layout):
+    pass
+
+
 # Backward compatibility alias - default to E4M3
 TensorCoreFP8Layout = TensorCoreFP8E4M3Layout

@ -186,6 +227,10 @@ register_layout_class("TensorCoreFP8E5M2Layout", TensorCoreFP8E5M2Layout)
 register_layout_class("TensorCoreNVFP4Layout", TensorCoreNVFP4Layout)
 if _CK_MXFP8_AVAILABLE:
    register_layout_class("TensorCoreMXFP8Layout", TensorCoreMXFP8Layout)
+if _CK_SVDQUANT_W4A4_AVAILABLE:
+    register_layout_class("TensorCoreSVDQuantW4A4Layout", TensorCoreSVDQuantW4A4Layout)
+if _CK_AWQ_W4A16_AVAILABLE:
+    register_layout_class("TensorCoreAWQW4A16Layout", TensorCoreAWQW4A16Layout)

 QUANT_ALGOS = {
    "float8_e4m3fn": {
@ -214,6 +259,22 @@ if _CK_MXFP8_AVAILABLE:
        "group_size": 32,
    }

+if _CK_SVDQUANT_W4A4_AVAILABLE:
+    QUANT_ALGOS["svdquant_w4a4"] = {
+        "storage_t": torch.int8,
+        "parameters": {"weight_scale", "proj_down", "proj_up", "smooth_factor"},
+        "comfy_tensor_layout": "TensorCoreSVDQuantW4A4Layout",
+        "group_size": 64,
+    }
+
+if _CK_AWQ_W4A16_AVAILABLE:
+    QUANT_ALGOS["awq_w4a16"] = {
+        "storage_t": torch.int8,
+        "parameters": {"weight_scale", "weight_zero"},
+        "comfy_tensor_layout": "TensorCoreAWQW4A16Layout",
+        "group_size": 64,
+    }
+

 # ==============================================================================
 # Re-exports for backward compatibility
@ -222,10 +283,12 @@ if _CK_MXFP8_AVAILABLE:
 __all__ = [
    "QuantizedTensor",
    "QuantizedLayout",
+    "TensorCoreAWQW4A16Layout",
    "TensorCoreFP8Layout",
    "TensorCoreFP8E4M3Layout",
    "TensorCoreFP8E5M2Layout",
    "TensorCoreNVFP4Layout",
+    "TensorCoreSVDQuantW4A4Layout",
    "QUANT_ALGOS",
    "register_layout_op",
 ]
--- a/comfy_api_nodes/nodes_grok.py
+++ b/comfy_api_nodes/nodes_grok.py
@ -54,7 +54,12 @@ class GrokImageNode(IO.ComfyNode):
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
+                    options=[
+                        "grok-imagine-image-quality",
+                        "grok-imagine-image-pro",
+                        "grok-imagine-image",
+                        "grok-imagine-image-beta",
+                    ],
                ),
                IO.String.Input(
                    "prompt",
@ -111,10 +116,12 @@ class GrokImageNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]),
                expr="""
                (
-                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
+                  $rate := widgets.model = "grok-imagine-image-quality"
+                    ? (widgets.resolution = "1k" ? 0.05 : 0.07)
+                    : ($contains(widgets.model, "pro") ? 0.07 : 0.02);
                  {"type":"usd","usd": $rate * widgets.number_of_images}
                )
                """,
@ -167,7 +174,12 @@ class GrokImageEditNode(IO.ComfyNode):
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
+                    options=[
+                        "grok-imagine-image-quality",
+                        "grok-imagine-image-pro",
+                        "grok-imagine-image",
+                        "grok-imagine-image-beta",
+                    ],
                ),
                IO.Image.Input("image", display_name="images"),
                IO.String.Input(
@ -228,11 +240,19 @@ class GrokImageEditNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images", "resolution"]),
                expr="""
                (
-                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
-                  {"type":"usd","usd": 0.002 + $rate * widgets.number_of_images}
+                  $isQualityModel := widgets.model = "grok-imagine-image-quality";
+                  $isPro := $contains(widgets.model, "pro");
+                  $rate := $isQualityModel
+                    ? (widgets.resolution = "1k" ? 0.05 : 0.07)
+                    : ($isPro ? 0.07 : 0.02);
+                  $base := $isQualityModel ? 0.01 : 0.002;
+                  $output := $rate * widgets.number_of_images;
+                  $isPro
+                    ? {"type":"usd","usd": $base + $output}
+                    : {"type":"range_usd","min_usd": $base + $output, "max_usd": 3 * $base + $output}
                )
                """,
            ),
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.43.17
-comfyui-workflow-templates==0.9.69
+comfyui-workflow-templates==0.9.72
 comfyui-embedded-docs==0.4.4
 torch
 torchsde
--- a/server.py
+++ b/server.py
@ -560,7 +560,7 @@ class PromptServer():
                            buffer.seek(0)

                            return web.Response(body=buffer.read(), content_type=f'image/{image_format}',
-                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"filename=\"{filename}\""})

                    if 'channel' not in request.rel_url.query:
                        channel = 'rgba'
@ -580,7 +580,7 @@ class PromptServer():
                            buffer.seek(0)

                            return web.Response(body=buffer.read(), content_type='image/png',
-                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"filename=\"{filename}\""})

                    elif channel == 'a':
                        with Image.open(file) as img:
@ -597,7 +597,7 @@ class PromptServer():
                            alpha_buffer.seek(0)

                            return web.Response(body=alpha_buffer.read(), content_type='image/png',
-                                                headers={"Content-Disposition": f"attachment; filename=\"{filename}\""})
+                                                headers={"Content-Disposition": f"filename=\"{filename}\""})
                    else:
                        # Use the content type from asset resolution if available,
                        # otherwise guess from the filename.
@ -614,7 +614,7 @@ class PromptServer():
                        return web.FileResponse(
                            file,
                            headers={
-                                "Content-Disposition": f"attachment; filename=\"{filename}\"",
+                                "Content-Disposition": f"filename=\"{filename}\"",
                                "Content-Type": content_type
                            }
                        )
Author	SHA1	Message	Date
HK416-TYPED	57d2219e10	Merge `96e5287a72` into `25757a53c9`	2026-05-08 00:39:55 +08:00
Daxiong (Lin)	25757a53c9	chore: update workflow templates to v0.9.72 (#13732 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Build package / Build Test (3.10) (push) Has been cancelled Details Build package / Build Test (3.11) (push) Has been cancelled Details Build package / Build Test (3.12) (push) Has been cancelled Details Build package / Build Test (3.13) (push) Has been cancelled Details Build package / Build Test (3.14) (push) Has been cancelled Details Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-05-07 00:28:18 -07:00
Alexander Piskun	1b25f1289e	[Partner Nodes] add grok-imagine-image-quality model (#13725 ) * feat(api-nodes): add grok-imagine-image-quality model Signed-off-by: bigcat88 <bigcat88@icloud.com> * fixed price badges Signed-off-by: bigcat88 <bigcat88@icloud.com> * fix: adjust price badges Signed-off-by: bigcat88 <bigcat88@icloud.com> --------- Signed-off-by: bigcat88 <bigcat88@icloud.com> Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-05-06 23:45:59 -07:00
comfyanonymous	e35348aa53	Add .comfy_environment to portable. (#13746 )	2026-05-06 22:51:01 -04:00
Jukka Seppänen	cd8c7a2306	Throttle dynamic VRAM prepare logging (#13704 )	2026-05-07 10:41:13 +08:00
guill	6bcd8b96ab	Revert "Fix Content-Disposition header missing 'attachment;' prefix (#13093 )" (#13733 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details This reverts commit `ea6880b04b`.	2026-05-06 10:08:35 -07:00
Alexis Rolland	96e5287a72	Merge branch 'master' into feat/comfykit-awq-w4a16-modulation	2026-04-27 17:42:23 +08:00
lax	3ddcc095ed	Add AWQ W4A16 (modulation) integration with comfy-kitchen Wires comfy-kitchen's TensorCoreAWQW4A16Layout (introduced on feat/awq-w4a16-modulation) into ComfyUI's MixedPrecisionOps so checkpoints that tag modulation linears with comfy_quant.format = "awq_w4a16" get their (qweight, weight_scale, weight_zero) loaded into the kitchen layout class instead of being dequantized to bf16 plain Linear at conversion time. quant_ops.py: - detect TensorCoreAWQW4A16Layout availability and stub it out for the no-kitchen fallback (mirrors the SVDQuant W4A4 pattern) - register the layout class + add "awq_w4a16" to QUANT_ALGOS (storage_t = int8 packed uint4, parameters = {weight_scale, weight_zero}, default group_size = 64) ops.py: add the awq_w4a16 branch in MixedPrecisionOps.Linear._load_from_state_dict that constructs Params(scale, zeros, group_size, ...) and wraps qweight into a QuantizedTensor — F.linear then dispatches to ck.gemv_awq_w4a16 via the layout's aten handlers. Pairs with comfy-kitchen feat/awq-w4a16-modulation. Targets the ~10 GB inflation in Qwen-Image-Edit kitchen-native checkpoints, where the modulation linears (img_mod.1 / txt_mod.1) currently dominate disk + VRAM because they're materialized as plain bf16 Linear during conversion.	2026-04-27 07:33:26 +00:00
lax	353978a9b7	Add SVDQuant W4A4 integration with comfy-kitchen (kitchen-native row-major) quant_ops.py: register TensorCoreSVDQuantW4A4Layout when comfy-kitchen exposes it; gate the kitchen CUDA backend on cuda >= 13 (the optimized kitchen CUDA ops are validated against cu13+ runtimes; on older cu the backend falls back to eager). ops.py: handle svdquant_w4a4 quant_format by loading weight_scale / proj_down / proj_up / smooth_factor into TensorCoreSVDQuantW4A4Layout.Params, with the img_mlp.net.2 / txt_mlp.net.2 fallback for act_unsigned. Targets the row-major kitchen-native kernels on feat/svdquant-w4a4-kitchen-native; the verbatim zgemm path is a sibling branch.	2026-04-27 07:33:25 +00:00