torch compile fixes.

Remove.
2026-06-14 20:09:24 +08:00 · 2025-12-04 22:17:13 -05:00 · 2025-12-04 19:38:01 -05:00 · 2025-12-04 19:17:06 -05:00 · 2025-12-04 19:09:35 -05:00
3 changed files with 5 additions and 15 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -449,7 +449,7 @@ class fp8_ops(manual_cast):
            return None

        def forward_comfy_cast_weights(self, input):
-            if not self.training:
+            if len(self.weight_function) == 0 and len(self.bias_function) == 0:
                try:
                    out = fp8_linear(self, input)
                    if out is not None:
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -241,9 +241,6 @@ class QuantizedTensor(torch.Tensor):
    def storage(self):
        return self._qdata.storage()

-    def untyped_storage(self):
-        return self._qdata.untyped_storage()
-
 # ==============================================================================
 # Generic Utilities (Layout-Agnostic Operations)
 # ==============================================================================
@ -255,12 +252,6 @@ def _create_transformed_qtensor(qt, transform_fn):


 def _handle_device_transfer(qt, target_device, target_dtype=None, target_layout=None, op_name="to"):
-    if target_dtype is not None and target_dtype != qt.dtype:
-        logging.warning(
-            f"QuantizedTensor: dtype conversion requested to {target_dtype}, "
-            f"but not supported for quantized tensors. Ignoring dtype."
-        )
-
    if target_layout is not None and target_layout != torch.strided:
        logging.warning(
            f"QuantizedTensor: layout change requested to {target_layout}, "
@ -280,6 +271,8 @@ def _handle_device_transfer(qt, target_device, target_dtype=None, target_layout=
            logging.debug(f"QuantizedTensor.{op_name}: Moving from {current_device} to {target_device}")
            new_q_data = qt._qdata.to(device=target_device)
            new_params = _move_layout_params_to_device(qt._layout_params, target_device)
+            if target_dtype is not None:
+                new_params["orig_dtype"] = target_dtype
            new_qt = QuantizedTensor(new_q_data, qt._layout_type, new_params)
            logging.debug(f"QuantizedTensor.{op_name}: Created new tensor on {target_device}")
            return new_qt
@ -403,7 +396,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
    def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn, stochastic_rounding=0, inplace_ops=False):
        orig_dtype = tensor.dtype

-        if scale == "recalculate":
+        if isinstance(scale, str) and scale == "recalculate":
            scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max

        if scale is not None:
--- a/comfy/text_encoders/ovis.py
+++ b/comfy/text_encoders/ovis.py
@ -55,12 +55,9 @@ class OvisTEModel(sd1_clip.SD1ClipModel):
        return out, pooled, {}


-def te(dtype_llama=None, llama_scaled_fp8=None, llama_quantization_metadata=None):
+def te(dtype_llama=None, llama_quantization_metadata=None):
    class OvisTEModel_(OvisTEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
-                model_options = model_options.copy()
-                model_options["scaled_fp8"] = llama_scaled_fp8
            if dtype_llama is not None:
                dtype = dtype_llama
            if llama_quantization_metadata is not None:
Author	SHA1	Message	Date
comfyanonymous	b8afb60ee8	torch compile fixes. Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details	2025-12-04 22:17:13 -05:00
comfyanonymous	295a0170d6	Remove.	2025-12-04 19:38:01 -05:00
comfyanonymous	55366d9c0f	Remove.	2025-12-04 19:17:06 -05:00
comfyanonymous	c217243b56	Fix fp8 fast lora offload.	2025-12-04 19:09:35 -05:00