Don't convert quants when custom ops are used.

Fix loras.
Remove
2026-02-16 16:32:34 +08:00 · 2025-12-05 02:16:34 -05:00 · 2025-12-05 02:05:14 -05:00 · 2025-12-05 01:11:22 -05:00 · 2025-12-04 23:58:29 -05:00
3 changed files with 49 additions and 27 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -626,6 +626,20 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                assert inplace_update is False  # TODO: eventually remove the inplace_update stuff
                self.weight = torch.nn.Parameter(weight, requires_grad=False)

+            def _apply(self, fn, recurse=True):  # This is to get torch.compile + moving weights to another device working
+                if recurse:
+                    for module in self.children():
+                        module._apply(fn)
+
+                for key, param in self._parameters.items():
+                    if param is None:
+                        continue
+                    self.register_parameter(key, torch.nn.Parameter(fn(param), requires_grad=False))
+                for key, buf in self._buffers.items():
+                    if buf is not None:
+                        self._buffers[key] = fn(buf)
+                return self
+
    return MixedPrecisionOps

 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -338,7 +338,9 @@ def generic_copy_(func, args, kwargs):
            # Copy from another quantized tensor
            qt_dest._qdata.copy_(src._qdata, non_blocking=non_blocking)
            qt_dest._layout_type = src._layout_type
+            orig_dtype = qt_dest._layout_params["orig_dtype"]
            _copy_layout_params_inplace(src._layout_params, qt_dest._layout_params, non_blocking=non_blocking)
+            qt_dest._layout_params["orig_dtype"] = orig_dtype
        else:
            # Copy from regular tensor - just copy raw data
            qt_dest._qdata.copy_(src)
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -964,7 +964,8 @@ def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DI
    clip_data = []
    for p in ckpt_paths:
        sd, metadata = comfy.utils.load_torch_file(p, safe_load=True, return_metadata=True)
-        sd, metadata = comfy.utils.convert_old_quants(sd, model_prefix="", metadata=metadata)
+        if model_options.get("custom_operations", None) is None:
+            sd, metadata = comfy.utils.convert_old_quants(sd, model_prefix="", metadata=metadata)
        clip_data.append(sd)
    return load_text_encoder_state_dicts(clip_data, embedding_directory=embedding_directory, clip_type=clip_type, model_options=model_options)

@ -1217,8 +1218,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip

    parameters = 0
    for c in clip_data:
-        if "_quantization_metadata" in c:
-            c.pop("_quantization_metadata")
        parameters += comfy.utils.calculate_parameters(c)
        tokenizer_data, model_options = comfy.text_encoders.long_clipl.model_options_long_clip(c, tokenizer_data, model_options)

@ -1288,7 +1287,9 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    weight_dtype = comfy.utils.weight_dtype(sd, diffusion_model_prefix)
    load_device = model_management.get_torch_device()

-    sd, metadata = comfy.utils.convert_old_quants(sd, diffusion_model_prefix, metadata=metadata)
+    custom_operations = model_options.get("custom_operations", None)
+    if custom_operations is None:
+        sd, metadata = comfy.utils.convert_old_quants(sd, diffusion_model_prefix, metadata=metadata)

    model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix, metadata=metadata)
    if model_config is None:
@ -1302,7 +1303,9 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    if model_config.quant_config is not None:
        weight_dtype = None

-    model_config.custom_operations = model_options.get("custom_operations", None)
+    if custom_operations is not None:
+        model_config.custom_operations = custom_operations
+
    unet_dtype = model_options.get("dtype", model_options.get("weight_dtype", None))

    if unet_dtype is None:
@ -1329,25 +1332,26 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
        vae = VAE(sd=vae_sd, metadata=metadata)

    if output_clip:
-        scaled_fp8_list = []
-        for k in list(sd.keys()):  # Convert scaled fp8 to mixed ops
-            if k.endswith(".scaled_fp8"):
-                scaled_fp8_list.append(k[:-len("scaled_fp8")])
+        if te_model_options.get("custom_operations", None) is None:
+            scaled_fp8_list = []
+            for k in list(sd.keys()):  # Convert scaled fp8 to mixed ops
+                if k.endswith(".scaled_fp8"):
+                    scaled_fp8_list.append(k[:-len("scaled_fp8")])
+
+            if len(scaled_fp8_list) > 0:
+                out_sd = {}
+                for k in sd:
+                    skip = False
+                    for pref in scaled_fp8_list:
+                        skip = skip or k.startswith(pref)
+                    if not skip:
+                        out_sd[k] = sd[k]

-        if len(scaled_fp8_list) > 0:
-            out_sd = {}
-            for k in sd:
-                skip = False
                for pref in scaled_fp8_list:
-                    skip = skip or k.startswith(pref)
-                if not skip:
-                    out_sd[k] = sd[k]
-
-            for pref in scaled_fp8_list:
-                quant_sd, qmetadata = comfy.utils.convert_old_quants(sd, pref, metadata={})
-                for k in quant_sd:
-                    out_sd[k] = quant_sd[k]
-                sd = out_sd
+                    quant_sd, qmetadata = comfy.utils.convert_old_quants(sd, pref, metadata={})
+                    for k in quant_sd:
+                        out_sd[k] = quant_sd[k]
+                    sd = out_sd

        clip_target = model_config.clip_target(state_dict=sd)
        if clip_target is not None:
@ -1410,11 +1414,10 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
    temp_sd = comfy.utils.state_dict_prefix_replace(sd, {diffusion_model_prefix: ""}, filter_keys=True)
    if len(temp_sd) > 0:
        sd = temp_sd
-        quant_key = "{}_quantization_metadata".format(diffusion_model_prefix)
-        if metadata is not None and quant_key in metadata:
-            metadata["_quantization_metadata"] = metadata.pop(quant_key)

-    sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)
+    custom_operations = model_options.get("custom_operations", None)
+    if custom_operations is None:
+        sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)
    parameters = comfy.utils.calculate_parameters(sd)
    weight_dtype = comfy.utils.weight_dtype(sd)

@ -1458,7 +1461,10 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
    else:
        manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
    model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
-    model_config.custom_operations = model_options.get("custom_operations", model_config.custom_operations)
+
+    if custom_operations is not None:
+        model_config.custom_operations = custom_operations
+
    if model_options.get("fp8_optimizations", False):
        model_config.optimizations["fp8"] = True
Author	SHA1	Message	Date
comfyanonymous	3193d3aa53	Don't convert quants when custom ops are used. Some checks failed Python Linting / Run Ruff (push) Has been cancelled Details Python Linting / Run Pylint (push) Has been cancelled Details	2025-12-05 02:16:34 -05:00
comfyanonymous	129f3e7db1	Fix loras.	2025-12-05 02:05:14 -05:00
comfyanonymous	e12842fe31	Remove	2025-12-05 01:11:22 -05:00
comfyanonymous	88172a4339	Fix torch compile issue.	2025-12-04 23:58:29 -05:00