Merge 43e9509856 into 3cd7b32f1b

Support gemma 12B with quant weights. (#11696 )
Fix stable release workflow not pulling latest comfy kitchen. (#11695 )
2026-01-09 05:40:49 +08:00 · 2026-01-07 07:40:11 -06:00 · 2026-01-07 05:15:14 -05:00 · 2026-01-07 04:48:28 -05:00 · 2026-01-06 13:54:27 +08:00 · 2025-12-31 15:30:10 +08:00
4 changed files with 86 additions and 7 deletions
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@ -117,7 +117,7 @@ jobs:
          ./python.exe get-pip.py
          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*

-          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
          ./python.exe -s -m pip install -r requirements_comfyui.txt
          rm requirements_comfyui.txt

--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -36,10 +36,10 @@ class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):

 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        llama_scaled_fp8 = model_options.get("gemma_scaled_fp8", None)
-        if llama_scaled_fp8 is not None:
+        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
+        if llama_quantization_metadata is not None:
            model_options = model_options.copy()
-            model_options["scaled_fp8"] = llama_scaled_fp8
+            model_options["quantization_metadata"] = llama_quantization_metadata

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

@ -119,12 +119,12 @@ class LTXAVTEModel(torch.nn.Module):
            return self.load_state_dict(sdo, strict=False)


-def ltxav_te(dtype_llama=None, llama_scaled_fp8=None):
+def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
    class LTXAVTEModel_(LTXAVTEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
+            if llama_quantization_metadata is not None:
                model_options = model_options.copy()
-                model_options["llama_scaled_fp8"] = llama_scaled_fp8
+                model_options["llama_quantization_metadata"] = llama_quantization_metadata
            if dtype_llama is not None:
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
--- a/comfy_extras/nodes_sage3.py
+++ b/comfy_extras/nodes_sage3.py
@ -0,0 +1,78 @@
+from typing import Callable
+
+import torch
+from typing_extensions import override
+
+from comfy.ldm.modules.attention import get_attention_function
+from comfy.model_patcher import ModelPatcher
+from comfy_api.latest import ComfyExtension, io
+from server import PromptServer
+
+
+class Sage3PatchModel(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Sage3PatchModel",
+            display_name="Patch SageAttention 3",
+            description="Patch the model to use `attention3_sage` during the middle blocks and steps, keeping the default attention function for the first/last blocks and steps",
+            category="_for_testing",
+            inputs=[
+                io.Model.Input("model"),
+            ],
+            outputs=[io.Model.Output()],
+            is_experimental=True,
+        )
+
+    @classmethod
+    def execute(cls, model: ModelPatcher) -> io.NodeOutput:
+        sage3: Callable | None = get_attention_function("sage3", default=None)
+
+        if sage3 is None:
+            PromptServer.instance.send_progress_text(
+                "`sageattn3` is not installed / available...",
+                cls.hidden.unique_id,
+            )
+            return io.NodeOutput(model)
+
+        def attention_override(func: Callable, *args, **kwargs):
+            transformer_options: dict = kwargs.get("transformer_options", {})
+
+            block_index: int = transformer_options.get("block_index", 0)
+            total_blocks: int = transformer_options.get("total_blocks", 1)
+
+            if block_index == 0 or block_index >= (total_blocks - 1):
+                return func(*args, **kwargs)
+
+            sample_sigmas: torch.Tensor = transformer_options["sample_sigmas"]
+            sigmas: torch.Tensor = transformer_options["sigmas"]
+
+            total_steps: int = sample_sigmas.size(0)
+            step: int = 0
+
+            for i in range(total_steps):
+                if torch.allclose(sample_sigmas[i], sigmas):
+                    step = i
+                    break
+
+            if step == 0 or step >= (total_steps - 1):
+                return func(*args, **kwargs)
+
+            return sage3(*args, **kwargs)
+
+        model = model.clone()
+        model.model_options["transformer_options"][
+            "optimized_attention_override"
+        ] = attention_override
+
+        return io.NodeOutput(model)
+
+
+class Sage3Extension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [Sage3PatchModel]
+
+
+async def comfy_entrypoint():
+    return Sage3Extension()
--- a/nodes.py
+++ b/nodes.py
@ -2369,6 +2369,7 @@ async def init_builtin_extra_nodes():
        "nodes_nop.py",
        "nodes_kandinsky5.py",
        "nodes_wanmove.py",
+        "nodes_sage3.py",
    ]

    import_failed = []
Author	SHA1	Message	Date
Haoming	068b75bb44	Merge `43e9509856` into `3cd7b32f1b`	2026-01-07 07:40:11 -06:00
comfyanonymous	3cd7b32f1b	Support gemma 12B with quant weights. (#11696 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-07 05:15:14 -05:00
comfyanonymous	c0c9720d77	Fix stable release workflow not pulling latest comfy kitchen. (#11695 )	2026-01-07 04:48:28 -05:00
Haoming	43e9509856	desc	2026-01-06 13:54:27 +08:00
Haoming	265b4f0fa1	init	2025-12-31 15:30:10 +08:00