diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index d70b42bf8..c01a57a9b 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -165,6 +165,23 @@ def low_vram_patch_estimate_vram(model, key): return weight.numel() * model_dtype.itemsize * LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR +def _collect_quant_synthetic_keys(): + # Synthetic per-parameter suffixes a QuantizedTensor surfaces in state_dict(), gathered from + # the core quant algo table so new quant types are covered automatically. "comfy_quant" is the + # metadata marker key attached alongside quantized weights. + keys = {"comfy_quant"} + try: + import comfy.quant_ops + for algo in getattr(comfy.quant_ops, "QUANT_ALGOS", {}).values(): + keys.update(algo.get("parameters", ())) + except Exception: + keys.update({"weight_scale", "weight_scale_2", "input_scale"}) + return keys + + +_QUANT_SYNTHETIC_KEYS = _collect_quant_synthetic_keys() + + def get_key_weight(model, key): set_func = None convert_func = None @@ -817,6 +834,14 @@ class ModelPatcher: if filter_prefix is not None: if not k.startswith(filter_prefix): continue + # Quantized weights (Mixed Precision Quantization) flatten into synthetic + # state_dict keys (e.g. *.weight_scale / *.input_scale + the comfy_quant marker) + # that are components of the QuantizedTensor in *.weight, not real module + # attributes. The *.weight key carries the convert_weight dequant path, so the + # synthetic sub-keys are skipped here and merging uses *.weight. + op_keys = k.rsplit('.', 1) + if len(op_keys) == 2 and op_keys[1] in _QUANT_SYNTHETIC_KEYS: + continue bk = self.backup.get(k, None) hbk = self.hook_backup.get(k, None) weight, set_func, convert_func = get_key_weight(self.model, k)