Disable comfy kitchen cuda if pytorch cuda less than 13 (#11681 )

Skip fp4 matrix mult on devices that don't support it. (#11677 )
Disable ltxav previews. (#11676 )
2026-05-27 09:27:24 +08:00 · 2026-01-06 22:13:43 -05:00 · 2026-01-06 18:07:26 -05:00 · 2026-01-06 17:41:27 -05:00 · 2026-01-06 17:33:03 -05:00 · 2026-01-06 14:28:29 -08:00
8 changed files with 58 additions and 17 deletions
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -408,7 +408,9 @@ class LTXV(LatentFormat):
        self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]

 class LTXAV(LTXV):
-    pass
+    def __init__(self):
+        self.latent_rgb_factors = None
+        self.latent_rgb_factors_bias = None

 class HunyuanVideo(LatentFormat):
    latent_channels = 16
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -4,6 +4,7 @@ from torch import Tensor

 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
+import logging


 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
@ -13,7 +14,6 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transforme
    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
    return x

-
 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
@ -28,13 +28,20 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
    return out.to(dtype=torch.float32, device=pos.device)

-def apply_rope1(x: Tensor, freqs_cis: Tensor):
-    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

-    x_out = freqs_cis[..., 0] * x_[..., 0]
-    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+try:
+    import comfy.quant_ops
+    apply_rope = comfy.quant_ops.ck.apply_rope
+    apply_rope1 = comfy.quant_ops.ck.apply_rope1
+except:
+    logging.warning("No comfy kitchen, using old apply_rope functions.")
+    def apply_rope1(x: Tensor, freqs_cis: Tensor):
+        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

-    return x_out.reshape(*x.shape).type_as(x)
+        x_out = freqs_cis[..., 0] * x_[..., 0]
+        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])

-def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+        return x_out.reshape(*x.shape).type_as(x)
+
+    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@ -276,7 +276,7 @@ class Embeddings1DConnector(nn.Module):
                max(1024, hidden_states.shape[1]) / self.num_learnable_registers
            )
            learnable_registers = torch.tile(
-                self.learnable_registers, (num_registers_duplications, 1)
+                self.learnable_registers.to(hidden_states), (num_registers_duplications, 1)
            )

            hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1504,6 +1504,16 @@ def supports_fp8_compute(device=None):

    return True

+def supports_nvfp4_compute(device=None):
+    if not is_nvidia():
+        return False
+
+    props = torch.cuda.get_device_properties(device)
+    if props.major < 10:
+        return False
+
+    return True
+
 def extended_fp16_support():
    # TODO: check why some models work with fp16 on newer torch versions but not on older
    if torch_version_numeric < (2, 7):
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -493,11 +493,12 @@ from .quant_ops import (
 )


-def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False):
+def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False, disabled=[]):
    class MixedPrecisionOps(manual_cast):
        _quant_config = quant_config
        _compute_dtype = compute_dtype
        _full_precision_mm = full_precision_mm
+        _disabled = disabled

        class Linear(torch.nn.Module, CastWeightBiasOp):
            def __init__(
@ -522,6 +523,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

                self.tensor_class = None
                self._full_precision_mm = MixedPrecisionOps._full_precision_mm
+                self._full_precision_mm_config = False

            def reset_parameters(self):
                return None
@ -556,8 +558,12 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                    self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
                else:
                    self.quant_format = layer_conf.get("format", None)
+                    self._full_precision_mm_config = layer_conf.get("full_precision_matrix_mult", False)
                    if not self._full_precision_mm:
-                        self._full_precision_mm = layer_conf.get("full_precision_matrix_mult", False)
+                        self._full_precision_mm = self._full_precision_mm_config
+
+                    if self.quant_format in MixedPrecisionOps._disabled:
+                        self._full_precision_mm = True

                    if self.quant_format is None:
                        raise ValueError(f"Unknown quantization format for layer {layer_name}")
@ -630,7 +636,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                        sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale

                    quant_conf = {"format": self.quant_format}
-                    if self._full_precision_mm:
+                    if self._full_precision_mm_config:
                        quant_conf["full_precision_matrix_mult"] = True
                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
                return sd
@ -711,10 +717,17 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec

 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
    fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
+    nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)

    if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
        logging.info("Using mixed precision operations")
-        return mixed_precision_ops(model_config.quant_config, compute_dtype, full_precision_mm=not fp8_compute)
+        disabled = set()
+        if not nvfp4_compute:
+            disabled.add("nvfp4")
+        if not fp8_compute:
+            disabled.add("float8_e4m3fn")
+            disabled.add("float8_e5m2")
+        return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled)

    if (
        fp8_compute and
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -13,6 +13,13 @@ try:
        get_layout_class,
    )
    _CK_AVAILABLE = True
+    if torch.version.cuda is None:
+        ck.registry.disable("cuda")
+    else:
+        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
+        if cuda_version < (13,):
+            ck.registry.disable("cuda")
+
    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@ -86,17 +86,19 @@ class LTXAVTEModel(torch.nn.Module):
        )

    def set_clip_options(self, options):
+        self.execution_device = options.get("execution_device", self.execution_device)
        self.gemma3_12b.set_clip_options(options)

    def reset_clip_options(self):
        self.gemma3_12b.reset_clip_options()
+        self.execution_device = None

    def encode_token_weights(self, token_weight_pairs):
        token_weight_pairs = token_weight_pairs["gemma3_12b"]

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
        out_device = out.device
-        out = out.movedim(1, -1).to(self.text_embedding_projection.weight.device)
+        out = out.movedim(1, -1).to(self.execution_device)
        out = 8.0 * (out - out.mean(dim=(1, 2), keepdim=True)) / (out.amax(dim=(1, 2), keepdim=True) - out.amin(dim=(1, 2), keepdim=True) + 1e-6)
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 comfyui-frontend-package==1.35.9
-comfyui-workflow-templates==0.7.66
+comfyui-workflow-templates==0.7.67
 comfyui-embedded-docs==0.3.1
 torch
 torchsde
@ -21,7 +21,7 @@ psutil
 alembic
 SQLAlchemy
 av>=14.2.0
-comfy-kitchen>=0.2.0
+comfy-kitchen>=0.2.2

 #non essential dependencies:
 kornia>=0.7.1
Author	SHA1	Message	Date
comfyanonymous	edee33f55e	Disable comfy kitchen cuda if pytorch cuda less than 13 (#11681 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Build package / Build Test (3.10) (push) Waiting to run Details Build package / Build Test (3.11) (push) Waiting to run Details Build package / Build Test (3.12) (push) Waiting to run Details Build package / Build Test (3.13) (push) Waiting to run Details Build package / Build Test (3.14) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-06 22:13:43 -05:00
comfyanonymous	2c03884f5f	Skip fp4 matrix mult on devices that don't support it. (#11677 )	2026-01-06 18:07:26 -05:00
comfyanonymous	6e9ee55cdd	Disable ltxav previews. (#11676 )	2026-01-06 17:41:27 -05:00
comfyanonymous	023cf13721	Fix lowvram issue with ltxv2 text encoder. (#11675 )	2026-01-06 17:33:03 -05:00
ComfyUI Wiki	c3566c0d76	chore: update workflow templates to v0.7.67 (#11667 )	2026-01-06 14:28:29 -08:00
comfyanonymous	c3c3e93c5b	Use rope functions from comfy kitchen. (#11674 )	2026-01-06 16:57:50 -05:00
comfyanonymous	6ffc159bdd	Update comfy-kitchen version to 0.2.1 (#11672 )	2026-01-06 15:53:43 -05:00