From 22e40d2ace0f53da025b3a41cbe4b664ef807097 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:08:08 -0700
Subject: [PATCH 01/20] Tell users to update their nvidia drivers if portable
 doesn't start. (#10518)

---
 .../advanced/run_nvidia_gpu_disable_api_nodes.bat                | 1 +
 .ci/windows_nvidia_base_files/run_nvidia_gpu.bat                 | 1 +
 .../run_nvidia_gpu_fast_fp16_accumulation.bat                    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
index cfe4b9f0e..ed00583b6 100644
--- a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
+++ b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
@@ -1,2 +1,3 @@
 ..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
diff --git a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
index 274d7c948..4898a424f 100755
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@@ -1,2 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
diff --git a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
index 38f06ecb2..32611e4af 100644
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
@@ -1,2 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause

From 8817f8fc148c5a63ffd3f854975df8e72c740540 Mon Sep 17 00:00:00 2001
From: contentis <lspindler@nvidia.com>
Date: Tue, 28 Oct 2025 21:20:53 +0100
Subject: [PATCH 02/20] Mixed Precision Quantization System (#10498)

* Implement mixed precision operations with a registry design and metadate for quant spec in checkpoint.

* Updated design using Tensor Subclasses

* Fix FP8 MM

* An actually functional POC

* Remove CK reference and ensure correct compute dtype

* Update unit tests

* ruff lint

* Implement mixed precision operations with a registry design and metadate for quant spec in checkpoint.

* Updated design using Tensor Subclasses

* Fix FP8 MM

* An actually functional POC

* Remove CK reference and ensure correct compute dtype

* Update unit tests

* ruff lint

* Fix missing keys

* Rename quant dtype parameter

* Rename quant dtype parameter

* Fix unittests for CPU build
---
 comfy/model_base.py                           |  10 +-
 comfy/model_detection.py                      |  20 +
 comfy/ops.py                                  | 146 +++++-
 comfy/quant_ops.py                            | 437 ++++++++++++++++++
 comfy/sd.py                                   |  13 +-
 comfy/supported_models_base.py                |   1 +
 .../comfy_quant/test_mixed_precision.py       | 232 ++++++++++
 tests-unit/comfy_quant/test_quant_registry.py | 190 ++++++++
 8 files changed, 1030 insertions(+), 19 deletions(-)
 create mode 100644 comfy/quant_ops.py
 create mode 100644 tests-unit/comfy_quant/test_mixed_precision.py
 create mode 100644 tests-unit/comfy_quant/test_quant_registry.py

diff --git a/comfy/model_base.py b/comfy/model_base.py
index e877f19ac..7c788d085 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -134,7 +134,7 @@ class BaseModel(torch.nn.Module):
         if not unet_config.get("disable_unet_model_creation", False):
             if model_config.custom_operations is None:
                 fp8 = model_config.optimizations.get("fp8", False)
-                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8)
+                operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8, model_config=model_config)
             else:
                 operations = model_config.custom_operations
             self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
@@ -333,6 +333,14 @@ class BaseModel(torch.nn.Module):
         if self.model_config.scaled_fp8 is not None:
             unet_state_dict["scaled_fp8"] = torch.tensor([], dtype=self.model_config.scaled_fp8)
 
+        # Save mixed precision metadata
+        if hasattr(self.model_config, 'layer_quant_config') and self.model_config.layer_quant_config:
+            metadata = {
+                "format_version": "1.0",
+                "layers": self.model_config.layer_quant_config
+            }
+            unet_state_dict["_quantization_metadata"] = metadata
+
         unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
 
         if self.model_type == ModelType.V_PREDICTION:
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 141f1e164..3142a7fc3 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -6,6 +6,20 @@ import math
 import logging
 import torch
 
+
+def detect_layer_quantization(metadata):
+    quant_key = "_quantization_metadata"
+    if metadata is not None and quant_key in metadata:
+        quant_metadata = metadata.pop(quant_key)
+        quant_metadata = json.loads(quant_metadata)
+        if isinstance(quant_metadata, dict) and "layers" in quant_metadata:
+            logging.info(f"Found quantization metadata (version {quant_metadata.get('format_version', 'unknown')})")
+            return quant_metadata["layers"]
+        else:
+            raise ValueError("Invalid quantization metadata format")
+    return None
+
+
 def count_blocks(state_dict_keys, prefix_string):
     count = 0
     while True:
@@ -701,6 +715,12 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal
         else:
             model_config.optimizations["fp8"] = True
 
+    # Detect per-layer quantization (mixed precision)
+    layer_quant_config = detect_layer_quantization(metadata)
+    if layer_quant_config:
+        model_config.layer_quant_config = layer_quant_config
+        logging.info(f"Detected mixed precision quantization: {len(layer_quant_config)} layers quantized")
+
     return model_config
 
 def unet_prefix_from_state_dict(state_dict):
diff --git a/comfy/ops.py b/comfy/ops.py
index 934e21261..93731eedf 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -344,6 +344,10 @@ class manual_cast(disable_weight_init):
 
 
 def fp8_linear(self, input):
+    """
+    Legacy FP8 linear function for backward compatibility.
+    Uses QuantizedTensor subclass for dispatch.
+    """
     dtype = self.weight.dtype
     if dtype not in [torch.float8_e4m3fn]:
         return None
@@ -355,9 +359,9 @@ def fp8_linear(self, input):
 
     input_shape = input.shape
     input_dtype = input.dtype
+
     if len(input.shape) == 3:
         w, bias = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype)
-        w = w.t()
 
         scale_weight = self.scale_weight
         scale_input = self.scale_input
@@ -368,23 +372,18 @@ def fp8_linear(self, input):
 
         if scale_input is None:
             scale_input = torch.ones((), device=input.device, dtype=torch.float32)
-            input = torch.clamp(input, min=-448, max=448, out=input)
-            input = input.reshape(-1, input_shape[2]).to(dtype).contiguous()
         else:
             scale_input = scale_input.to(input.device)
-            input = (input * (1.0 / scale_input).to(input_dtype)).reshape(-1, input_shape[2]).to(dtype).contiguous()
 
-        if bias is not None:
-            o = torch._scaled_mm(input, w, out_dtype=input_dtype, bias=bias, scale_a=scale_input, scale_b=scale_weight)
-        else:
-            o = torch._scaled_mm(input, w, out_dtype=input_dtype, scale_a=scale_input, scale_b=scale_weight)
-
-        if isinstance(o, tuple):
-            o = o[0]
+        # Wrap weight in QuantizedTensor - this enables unified dispatch
+        # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
+        layout_params_weight = {'scale': scale_weight, 'orig_dtype': input_dtype}
+        quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
+        quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
+        o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
 
         if tensor_2d:
             return o.reshape(input_shape[0], -1)
-
         return o.reshape((-1, input_shape[1], self.weight.shape[0]))
 
     return None
@@ -478,7 +477,128 @@ if CUBLAS_IS_AVAILABLE:
             def forward(self, *args, **kwargs):
                 return super().forward(*args, **kwargs)
 
-def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None):
+
+# ==============================================================================
+# Mixed Precision Operations
+# ==============================================================================
+from .quant_ops import QuantizedTensor, TensorCoreFP8Layout
+
+QUANT_FORMAT_MIXINS = {
+    "float8_e4m3fn": {
+        "dtype": torch.float8_e4m3fn,
+        "layout_type": TensorCoreFP8Layout,
+        "parameters": {
+            "weight_scale": torch.nn.Parameter(torch.zeros((), dtype=torch.float32), requires_grad=False),
+            "input_scale": torch.nn.Parameter(torch.zeros((), dtype=torch.float32), requires_grad=False),
+        }
+    }
+}
+
+class MixedPrecisionOps(disable_weight_init):
+    _layer_quant_config = {}
+    _compute_dtype = torch.bfloat16
+
+    class Linear(torch.nn.Module, CastWeightBiasOp):
+        def __init__(
+            self,
+            in_features: int,
+            out_features: int,
+            bias: bool = True,
+            device=None,
+            dtype=None,
+        ) -> None:
+            super().__init__()
+
+            self.factory_kwargs = {"device": device, "dtype": MixedPrecisionOps._compute_dtype}
+            # self.factory_kwargs = {"device": device, "dtype": dtype}
+
+            self.in_features = in_features
+            self.out_features = out_features
+            if bias:
+                self.bias = torch.nn.Parameter(torch.empty(out_features, **self.factory_kwargs))
+            else:
+                self.register_parameter("bias", None)
+
+            self.tensor_class = None
+
+        def reset_parameters(self):
+            return None
+
+        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
+                                  strict, missing_keys, unexpected_keys, error_msgs):
+
+            device = self.factory_kwargs["device"]
+            layer_name = prefix.rstrip('.')
+            weight_key = f"{prefix}weight"
+            weight = state_dict.pop(weight_key, None)
+            if weight is None:
+                raise ValueError(f"Missing weight for layer {layer_name}")
+
+            manually_loaded_keys = [weight_key]
+
+            if layer_name not in MixedPrecisionOps._layer_quant_config:
+                self.weight = torch.nn.Parameter(weight.to(device=device, dtype=MixedPrecisionOps._compute_dtype), requires_grad=False)
+            else:
+                quant_format = MixedPrecisionOps._layer_quant_config[layer_name].get("format", None)
+                if quant_format is None:
+                    raise ValueError(f"Unknown quantization format for layer {layer_name}")
+
+                mixin = QUANT_FORMAT_MIXINS[quant_format]
+                self.layout_type = mixin["layout_type"]
+
+                scale_key = f"{prefix}weight_scale"
+                layout_params = {
+                    'scale': state_dict.pop(scale_key, None),
+                    'orig_dtype': MixedPrecisionOps._compute_dtype
+                }
+                if layout_params['scale'] is not None:
+                    manually_loaded_keys.append(scale_key)
+
+                self.weight = torch.nn.Parameter(
+                    QuantizedTensor(weight.to(device=device, dtype=mixin["dtype"]), self.layout_type, layout_params),
+                    requires_grad=False
+                )
+
+                for param_name, param_value in mixin["parameters"].items():
+                    param_key = f"{prefix}{param_name}"
+                    _v = state_dict.pop(param_key, None)
+                    if _v is None:
+                        continue
+                    setattr(self, param_name, torch.nn.Parameter(_v.to(device=device), requires_grad=False))
+                    manually_loaded_keys.append(param_key)
+
+            super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+
+            for key in manually_loaded_keys:
+                if key in missing_keys:
+                    missing_keys.remove(key)
+
+        def _forward(self, input, weight, bias):
+            return torch.nn.functional.linear(input, weight, bias)
+
+        def forward_comfy_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return self._forward(input, weight, bias)
+
+        def forward(self, input, *args, **kwargs):
+            run_every_op()
+
+            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
+                return self.forward_comfy_cast_weights(input, *args, **kwargs)
+            if (getattr(self, 'layout_type', None) is not None and
+                getattr(self, 'input_scale', None) is not None and
+                not isinstance(input, QuantizedTensor)):
+                input = QuantizedTensor.from_float(input, self.layout_type, scale=self.input_scale, fp8_dtype=self.weight.dtype)
+            return self._forward(input, self.weight, self.bias)
+
+
+def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None, model_config=None):
+    if model_config and hasattr(model_config, 'layer_quant_config') and model_config.layer_quant_config:
+        MixedPrecisionOps._layer_quant_config = model_config.layer_quant_config
+        MixedPrecisionOps._compute_dtype = compute_dtype
+        logging.info(f"Using mixed precision operations: {len(model_config.layer_quant_config)} quantized layers")
+        return MixedPrecisionOps
+
     fp8_compute = comfy.model_management.supports_fp8_compute(load_device)
     if scaled_fp8 is not None:
         return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=fp8_optimizations, override_dtype=scaled_fp8)
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
new file mode 100644
index 000000000..b14e03084
--- /dev/null
+++ b/comfy/quant_ops.py
@@ -0,0 +1,437 @@
+import torch
+import logging
+from typing import Tuple, Dict
+
+_LAYOUT_REGISTRY = {}
+_GENERIC_UTILS = {}
+
+
+def register_layout_op(torch_op, layout_type):
+    """
+    Decorator to register a layout-specific operation handler.
+    Args:
+        torch_op: PyTorch operation (e.g., torch.ops.aten.linear.default)
+        layout_type: Layout class (e.g., TensorCoreFP8Layout)
+    Example:
+        @register_layout_op(torch.ops.aten.linear.default, TensorCoreFP8Layout)
+        def fp8_linear(func, args, kwargs):
+            # FP8-specific linear implementation
+            ...
+    """
+    def decorator(handler_func):
+        if torch_op not in _LAYOUT_REGISTRY:
+            _LAYOUT_REGISTRY[torch_op] = {}
+        _LAYOUT_REGISTRY[torch_op][layout_type] = handler_func
+        return handler_func
+    return decorator
+
+
+def register_generic_util(torch_op):
+    """
+    Decorator to register a generic utility that works for all layouts.
+    Args:
+        torch_op: PyTorch operation (e.g., torch.ops.aten.detach.default)
+
+    Example:
+        @register_generic_util(torch.ops.aten.detach.default)
+        def generic_detach(func, args, kwargs):
+            # Works for any layout
+            ...
+    """
+    def decorator(handler_func):
+        _GENERIC_UTILS[torch_op] = handler_func
+        return handler_func
+    return decorator
+
+
+def _get_layout_from_args(args):
+    for arg in args:
+        if isinstance(arg, QuantizedTensor):
+            return arg._layout_type
+        elif isinstance(arg, (list, tuple)):
+            for item in arg:
+                if isinstance(item, QuantizedTensor):
+                    return item._layout_type
+    return None
+
+
+def _move_layout_params_to_device(params, device):
+    new_params = {}
+    for k, v in params.items():
+        if isinstance(v, torch.Tensor):
+            new_params[k] = v.to(device=device)
+        else:
+            new_params[k] = v
+    return new_params
+
+
+def _copy_layout_params(params):
+    new_params = {}
+    for k, v in params.items():
+        if isinstance(v, torch.Tensor):
+            new_params[k] = v.clone()
+        else:
+            new_params[k] = v
+    return new_params
+
+
+class QuantizedLayout:
+    """
+    Base class for quantization layouts.
+
+    A layout encapsulates the format-specific logic for quantization/dequantization
+    and provides a uniform interface for extracting raw tensors needed for computation.
+
+    New quantization formats should subclass this and implement the required methods.
+    """
+    @classmethod
+    def quantize(cls, tensor, **kwargs) -> Tuple[torch.Tensor, Dict]:
+        raise NotImplementedError(f"{cls.__name__} must implement quantize()")
+
+    @staticmethod
+    def dequantize(qdata, **layout_params) -> torch.Tensor:
+        raise NotImplementedError("TensorLayout must implement dequantize()")
+
+    @classmethod
+    def get_plain_tensors(cls, qtensor) -> torch.Tensor:
+        raise NotImplementedError(f"{cls.__name__} must implement get_plain_tensors()")
+
+
+class QuantizedTensor(torch.Tensor):
+    """
+    Universal quantized tensor that works with any layout.
+
+    This tensor subclass uses a pluggable layout system to support multiple
+    quantization formats (FP8, INT4, INT8, etc.) without code duplication.
+
+    The layout_type determines format-specific behavior, while common operations
+    (detach, clone, to) are handled generically.
+
+    Attributes:
+        _qdata: The quantized tensor data
+        _layout_type: Layout class (e.g., TensorCoreFP8Layout)
+        _layout_params: Dict with layout-specific params (scale, zero_point, etc.)
+    """
+
+    @staticmethod
+    def __new__(cls, qdata, layout_type, layout_params):
+        """
+        Create a quantized tensor.
+
+        Args:
+            qdata: The quantized data tensor
+            layout_type: Layout class (subclass of QuantizedLayout)
+            layout_params: Dict with layout-specific parameters
+        """
+        return torch.Tensor._make_subclass(cls, qdata, require_grad=False)
+
+    def __init__(self, qdata, layout_type, layout_params):
+        self._qdata = qdata.contiguous()
+        self._layout_type = layout_type
+        self._layout_params = layout_params
+
+    def __repr__(self):
+        layout_name = self._layout_type.__name__
+        param_str = ", ".join(f"{k}={v}" for k, v in list(self._layout_params.items())[:2])
+        return f"QuantizedTensor(shape={self.shape}, layout={layout_name}, {param_str})"
+
+    @property
+    def layout_type(self):
+        return self._layout_type
+
+    def __tensor_flatten__(self):
+        """
+        Tensor flattening protocol for proper device movement.
+        """
+        inner_tensors = ["_qdata"]
+        ctx = {
+            "layout_type": self._layout_type,
+        }
+
+        tensor_params = {}
+        non_tensor_params = {}
+        for k, v in self._layout_params.items():
+            if isinstance(v, torch.Tensor):
+                tensor_params[k] = v
+            else:
+                non_tensor_params[k] = v
+
+        ctx["tensor_param_keys"] = list(tensor_params.keys())
+        ctx["non_tensor_params"] = non_tensor_params
+
+        for k, v in tensor_params.items():
+            attr_name = f"_layout_param_{k}"
+            object.__setattr__(self, attr_name, v)
+            inner_tensors.append(attr_name)
+
+        return inner_tensors, ctx
+
+    @staticmethod
+    def __tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride):
+        """
+        Tensor unflattening protocol for proper device movement.
+        Reconstructs the QuantizedTensor after device movement.
+        """
+        layout_type = ctx["layout_type"]
+        layout_params = dict(ctx["non_tensor_params"])
+
+        for key in ctx["tensor_param_keys"]:
+            attr_name = f"_layout_param_{key}"
+            layout_params[key] = inner_tensors[attr_name]
+
+        return QuantizedTensor(inner_tensors["_q_data"], layout_type, layout_params)
+
+    @classmethod
+    def from_float(cls, tensor, layout_type, **quantize_kwargs) -> 'QuantizedTensor':
+        qdata, layout_params = layout_type.quantize(tensor, **quantize_kwargs)
+        return cls(qdata, layout_type, layout_params)
+
+    def dequantize(self) -> torch.Tensor:
+        return self._layout_type.dequantize(self._qdata, **self._layout_params)
+
+    @classmethod
+    def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+        kwargs = kwargs or {}
+
+        # Step 1: Check generic utilities first (detach, clone, to, etc.)
+        if func in _GENERIC_UTILS:
+            return _GENERIC_UTILS[func](func, args, kwargs)
+
+        # Step 2: Check layout-specific handlers (linear, matmul, etc.)
+        layout_type = _get_layout_from_args(args)
+        if layout_type and func in _LAYOUT_REGISTRY:
+            handler = _LAYOUT_REGISTRY[func].get(layout_type)
+            if handler:
+                return handler(func, args, kwargs)
+
+        # Step 3: Fallback to dequantization
+        if isinstance(args[0] if args else None, QuantizedTensor):
+            logging.info(f"QuantizedTensor: Unhandled operation {func}, falling back to dequantization. kwargs={kwargs}")
+        return cls._dequant_and_fallback(func, args, kwargs)
+
+    @classmethod
+    def _dequant_and_fallback(cls, func, args, kwargs):
+        def dequant_arg(arg):
+            if isinstance(arg, QuantizedTensor):
+                return arg.dequantize()
+            elif isinstance(arg, (list, tuple)):
+                return type(arg)(dequant_arg(a) for a in arg)
+            return arg
+
+        new_args = dequant_arg(args)
+        new_kwargs = dequant_arg(kwargs)
+        return func(*new_args, **new_kwargs)
+
+
+# ==============================================================================
+# Generic Utilities (Layout-Agnostic Operations)
+# ==============================================================================
+
+def _create_transformed_qtensor(qt, transform_fn):
+    new_data = transform_fn(qt._qdata)
+    new_params = _copy_layout_params(qt._layout_params)
+    return QuantizedTensor(new_data, qt._layout_type, new_params)
+
+
+def _handle_device_transfer(qt, target_device, target_dtype=None, target_layout=None, op_name="to"):
+    if target_dtype is not None and target_dtype != qt.dtype:
+        logging.warning(
+            f"QuantizedTensor: dtype conversion requested to {target_dtype}, "
+            f"but not supported for quantized tensors. Ignoring dtype."
+        )
+
+    if target_layout is not None and target_layout != torch.strided:
+        logging.warning(
+            f"QuantizedTensor: layout change requested to {target_layout}, "
+            f"but not supported. Ignoring layout."
+        )
+
+    # Handle device transfer
+    current_device = qt._qdata.device
+    if target_device is not None:
+        # Normalize device for comparison
+        if isinstance(target_device, str):
+            target_device = torch.device(target_device)
+        if isinstance(current_device, str):
+            current_device = torch.device(current_device)
+
+        if target_device != current_device:
+            logging.debug(f"QuantizedTensor.{op_name}: Moving from {current_device} to {target_device}")
+            new_q_data = qt._qdata.to(device=target_device)
+            new_params = _move_layout_params_to_device(qt._layout_params, target_device)
+            new_qt = QuantizedTensor(new_q_data, qt._layout_type, new_params)
+            logging.debug(f"QuantizedTensor.{op_name}: Created new tensor on {target_device}")
+            return new_qt
+
+    logging.debug(f"QuantizedTensor.{op_name}: No device change needed, returning original")
+    return qt
+
+
+@register_generic_util(torch.ops.aten.detach.default)
+def generic_detach(func, args, kwargs):
+    """Detach operation - creates a detached copy of the quantized tensor."""
+    qt = args[0]
+    if isinstance(qt, QuantizedTensor):
+        return _create_transformed_qtensor(qt, lambda x: x.detach())
+    return func(*args, **kwargs)
+
+
+@register_generic_util(torch.ops.aten.clone.default)
+def generic_clone(func, args, kwargs):
+    """Clone operation - creates a deep copy of the quantized tensor."""
+    qt = args[0]
+    if isinstance(qt, QuantizedTensor):
+        return _create_transformed_qtensor(qt, lambda x: x.clone())
+    return func(*args, **kwargs)
+
+
+@register_generic_util(torch.ops.aten._to_copy.default)
+def generic_to_copy(func, args, kwargs):
+    """Device/dtype transfer operation - handles .to(device) calls."""
+    qt = args[0]
+    if isinstance(qt, QuantizedTensor):
+        return _handle_device_transfer(
+            qt,
+            target_device=kwargs.get('device', None),
+            target_dtype=kwargs.get('dtype', None),
+            op_name="_to_copy"
+        )
+    return func(*args, **kwargs)
+
+
+@register_generic_util(torch.ops.aten.to.dtype_layout)
+def generic_to_dtype_layout(func, args, kwargs):
+    """Handle .to(device) calls using the dtype_layout variant."""
+    qt = args[0]
+    if isinstance(qt, QuantizedTensor):
+        return _handle_device_transfer(
+            qt,
+            target_device=kwargs.get('device', None),
+            target_dtype=kwargs.get('dtype', None),
+            target_layout=kwargs.get('layout', None),
+            op_name="to"
+        )
+    return func(*args, **kwargs)
+
+
+@register_generic_util(torch.ops.aten.copy_.default)
+def generic_copy_(func, args, kwargs):
+    qt_dest = args[0]
+    src = args[1]
+
+    if isinstance(qt_dest, QuantizedTensor):
+        if isinstance(src, QuantizedTensor):
+            # Copy from another quantized tensor
+            qt_dest._qdata.copy_(src._qdata)
+            qt_dest._layout_type = src._layout_type
+            qt_dest._layout_params = _copy_layout_params(src._layout_params)
+        else:
+            # Copy from regular tensor - just copy raw data
+            qt_dest._qdata.copy_(src)
+        return qt_dest
+    return func(*args, **kwargs)
+
+
+@register_generic_util(torch.ops.aten._has_compatible_shallow_copy_type.default)
+def generic_has_compatible_shallow_copy_type(func, args, kwargs):
+    return True
+
+# ==============================================================================
+# FP8 Layout + Operation Handlers
+# ==============================================================================
+class TensorCoreFP8Layout(QuantizedLayout):
+    """
+    Storage format:
+    - qdata: FP8 tensor (torch.float8_e4m3fn or torch.float8_e5m2)
+    - scale: Scalar tensor (float32) for dequantization
+    - orig_dtype: Original dtype before quantization (for casting back)
+    """
+    @classmethod
+    def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn):
+        orig_dtype = tensor.dtype
+
+        if scale is None:
+            scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max
+
+        if not isinstance(scale, torch.Tensor):
+            scale = torch.tensor(scale)
+        scale = scale.to(device=tensor.device, dtype=torch.float32)
+
+        lp_amax = torch.finfo(dtype).max
+        tensor_scaled = tensor.float() / scale
+        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
+        qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
+
+        layout_params = {
+            'scale': scale,
+            'orig_dtype': orig_dtype
+        }
+        return qdata, layout_params
+
+    @staticmethod
+    def dequantize(qdata, scale, orig_dtype, **kwargs):
+        plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
+        return plain_tensor * scale
+
+    @classmethod
+    def get_plain_tensors(cls, qtensor):
+        return qtensor._qdata, qtensor._layout_params['scale']
+
+
+@register_layout_op(torch.ops.aten.linear.default, TensorCoreFP8Layout)
+def fp8_linear(func, args, kwargs):
+    input_tensor = args[0]
+    weight = args[1]
+    bias = args[2] if len(args) > 2 else None
+
+    if isinstance(input_tensor, QuantizedTensor) and isinstance(weight, QuantizedTensor):
+        plain_input, scale_a = TensorCoreFP8Layout.get_plain_tensors(input_tensor)
+        plain_weight, scale_b = TensorCoreFP8Layout.get_plain_tensors(weight)
+
+        out_dtype = kwargs.get("out_dtype")
+        if out_dtype is None:
+            out_dtype = input_tensor._layout_params['orig_dtype']
+
+        weight_t = plain_weight.t()
+
+        tensor_2d = False
+        if len(plain_input.shape) == 2:
+            tensor_2d = True
+            plain_input = plain_input.unsqueeze(1)
+
+        input_shape = plain_input.shape
+        if len(input_shape) != 3:
+            return None
+
+        try:
+            output = torch._scaled_mm(
+                plain_input.reshape(-1, input_shape[2]),
+                weight_t,
+                bias=bias,
+                scale_a=scale_a,
+                scale_b=scale_b,
+                out_dtype=out_dtype,
+            )
+            if not tensor_2d:
+                output = output.reshape((-1, input_shape[1], weight.shape[0]))
+
+            if output.dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
+                output_scale = scale_a * scale_b
+                output_params = {
+                    'scale': output_scale,
+                    'orig_dtype': input_tensor._layout_params['orig_dtype']
+                }
+                return QuantizedTensor(output, TensorCoreFP8Layout, output_params)
+            else:
+                return output
+
+        except Exception as e:
+            raise RuntimeError(f"FP8 _scaled_mm failed, falling back to dequantization: {e}")
+
+    # Case 2: DQ Fallback
+    if isinstance(weight, QuantizedTensor):
+        weight = weight.dequantize()
+    if isinstance(input_tensor, QuantizedTensor):
+        input_tensor = input_tensor.dequantize()
+
+    return torch.nn.functional.linear(input_tensor, weight, bias)
diff --git a/comfy/sd.py b/comfy/sd.py
index 28bee248d..6411bb27d 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -1262,7 +1262,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
     return (model_patcher, clip, vae, clipvision)
 
 
-def load_diffusion_model_state_dict(sd, model_options={}):
+def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
     """
     Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.
 
@@ -1296,7 +1296,7 @@ def load_diffusion_model_state_dict(sd, model_options={}):
     weight_dtype = comfy.utils.weight_dtype(sd)
 
     load_device = model_management.get_torch_device()
-    model_config = model_detection.model_config_from_unet(sd, "")
+    model_config = model_detection.model_config_from_unet(sd, "", metadata=metadata)
 
     if model_config is not None:
         new_sd = sd
@@ -1330,7 +1330,10 @@ def load_diffusion_model_state_dict(sd, model_options={}):
     else:
         unet_dtype = dtype
 
-    manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
+    if hasattr(model_config, "layer_quant_config"):
+        manual_cast_dtype = model_management.unet_manual_cast(None, load_device, model_config.supported_inference_dtypes)
+    else:
+        manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
     model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
     model_config.custom_operations = model_options.get("custom_operations", model_config.custom_operations)
     if model_options.get("fp8_optimizations", False):
@@ -1346,8 +1349,8 @@ def load_diffusion_model_state_dict(sd, model_options={}):
 
 
 def load_diffusion_model(unet_path, model_options={}):
-    sd = comfy.utils.load_torch_file(unet_path)
-    model = load_diffusion_model_state_dict(sd, model_options=model_options)
+    sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True)
+    model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata)
     if model is None:
         logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
         raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
diff --git a/comfy/supported_models_base.py b/comfy/supported_models_base.py
index 54573abb1..e4bd74514 100644
--- a/comfy/supported_models_base.py
+++ b/comfy/supported_models_base.py
@@ -50,6 +50,7 @@ class BASE:
     manual_cast_dtype = None
     custom_operations = None
     scaled_fp8 = None
+    layer_quant_config = None  # Per-layer quantization configuration for mixed precision
     optimizations = {"fp8": False}
 
     @classmethod
diff --git a/tests-unit/comfy_quant/test_mixed_precision.py b/tests-unit/comfy_quant/test_mixed_precision.py
new file mode 100644
index 000000000..267bc177b
--- /dev/null
+++ b/tests-unit/comfy_quant/test_mixed_precision.py
@@ -0,0 +1,232 @@
+import unittest
+import torch
+import sys
+import os
+
+# Add comfy to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
+
+def has_gpu():
+    return torch.cuda.is_available()
+
+from comfy.cli_args import args
+if not has_gpu():
+    args.cpu = True
+
+from comfy import ops
+from comfy.quant_ops import QuantizedTensor, TensorCoreFP8Layout
+
+
+class SimpleModel(torch.nn.Module):
+    def __init__(self, operations=ops.disable_weight_init):
+        super().__init__()
+        self.layer1 = operations.Linear(10, 20, device="cpu", dtype=torch.bfloat16)
+        self.layer2 = operations.Linear(20, 30, device="cpu", dtype=torch.bfloat16)
+        self.layer3 = operations.Linear(30, 40, device="cpu", dtype=torch.bfloat16)
+
+    def forward(self, x):
+        x = self.layer1(x)
+        x = torch.nn.functional.relu(x)
+        x = self.layer2(x)
+        x = torch.nn.functional.relu(x)
+        x = self.layer3(x)
+        return x
+
+
+class TestMixedPrecisionOps(unittest.TestCase):
+
+    def test_all_layers_standard(self):
+        """Test that model with no quantization works normally"""
+        # Configure no quantization
+        ops.MixedPrecisionOps._layer_quant_config = {}
+
+        # Create model
+        model = SimpleModel(operations=ops.MixedPrecisionOps)
+
+        # Initialize weights manually
+        model.layer1.weight = torch.nn.Parameter(torch.randn(20, 10, dtype=torch.bfloat16))
+        model.layer1.bias = torch.nn.Parameter(torch.randn(20, dtype=torch.bfloat16))
+        model.layer2.weight = torch.nn.Parameter(torch.randn(30, 20, dtype=torch.bfloat16))
+        model.layer2.bias = torch.nn.Parameter(torch.randn(30, dtype=torch.bfloat16))
+        model.layer3.weight = torch.nn.Parameter(torch.randn(40, 30, dtype=torch.bfloat16))
+        model.layer3.bias = torch.nn.Parameter(torch.randn(40, dtype=torch.bfloat16))
+
+        # Initialize weight_function and bias_function
+        for layer in [model.layer1, model.layer2, model.layer3]:
+            layer.weight_function = []
+            layer.bias_function = []
+
+        # Forward pass
+        input_tensor = torch.randn(5, 10, dtype=torch.bfloat16)
+        output = model(input_tensor)
+
+        self.assertEqual(output.shape, (5, 40))
+        self.assertEqual(output.dtype, torch.bfloat16)
+
+    def test_mixed_precision_load(self):
+        """Test loading a mixed precision model from state dict"""
+        # Configure mixed precision: layer1 is FP8, layer2 and layer3 are standard
+        layer_quant_config = {
+            "layer1": {
+                "format": "float8_e4m3fn",
+                "params": {}
+            },
+            "layer3": {
+                "format": "float8_e4m3fn",
+                "params": {}
+            }
+        }
+        ops.MixedPrecisionOps._layer_quant_config = layer_quant_config
+
+        # Create state dict with mixed precision
+        fp8_weight1 = torch.randn(20, 10, dtype=torch.float32).to(torch.float8_e4m3fn)
+        fp8_weight3 = torch.randn(40, 30, dtype=torch.float32).to(torch.float8_e4m3fn)
+
+        state_dict = {
+            # Layer 1: FP8 E4M3FN
+            "layer1.weight": fp8_weight1,
+            "layer1.bias": torch.randn(20, dtype=torch.bfloat16),
+            "layer1.weight_scale": torch.tensor(2.0, dtype=torch.float32),
+
+            # Layer 2: Standard BF16
+            "layer2.weight": torch.randn(30, 20, dtype=torch.bfloat16),
+            "layer2.bias": torch.randn(30, dtype=torch.bfloat16),
+
+            # Layer 3: FP8 E4M3FN
+            "layer3.weight": fp8_weight3,
+            "layer3.bias": torch.randn(40, dtype=torch.bfloat16),
+            "layer3.weight_scale": torch.tensor(1.5, dtype=torch.float32),
+        }
+
+        # Create model and load state dict (strict=False because custom loading pops keys)
+        model = SimpleModel(operations=ops.MixedPrecisionOps)
+        model.load_state_dict(state_dict, strict=False)
+
+        # Verify weights are wrapped in QuantizedTensor
+        self.assertIsInstance(model.layer1.weight, QuantizedTensor)
+        self.assertEqual(model.layer1.weight._layout_type, TensorCoreFP8Layout)
+
+        # Layer 2 should NOT be quantized
+        self.assertNotIsInstance(model.layer2.weight, QuantizedTensor)
+
+        # Layer 3 should be quantized
+        self.assertIsInstance(model.layer3.weight, QuantizedTensor)
+        self.assertEqual(model.layer3.weight._layout_type, TensorCoreFP8Layout)
+
+        # Verify scales were loaded
+        self.assertEqual(model.layer1.weight._layout_params['scale'].item(), 2.0)
+        self.assertEqual(model.layer3.weight._layout_params['scale'].item(), 1.5)
+
+        # Forward pass
+        input_tensor = torch.randn(5, 10, dtype=torch.bfloat16)
+        output = model(input_tensor)
+
+        self.assertEqual(output.shape, (5, 40))
+
+    def test_state_dict_quantized_preserved(self):
+        """Test that quantized weights are preserved in state_dict()"""
+        # Configure mixed precision
+        layer_quant_config = {
+            "layer1": {
+                "format": "float8_e4m3fn",
+                "params": {}
+            }
+        }
+        ops.MixedPrecisionOps._layer_quant_config = layer_quant_config
+
+        # Create and load model
+        fp8_weight = torch.randn(20, 10, dtype=torch.float32).to(torch.float8_e4m3fn)
+        state_dict1 = {
+            "layer1.weight": fp8_weight,
+            "layer1.bias": torch.randn(20, dtype=torch.bfloat16),
+            "layer1.weight_scale": torch.tensor(3.0, dtype=torch.float32),
+            "layer2.weight": torch.randn(30, 20, dtype=torch.bfloat16),
+            "layer2.bias": torch.randn(30, dtype=torch.bfloat16),
+            "layer3.weight": torch.randn(40, 30, dtype=torch.bfloat16),
+            "layer3.bias": torch.randn(40, dtype=torch.bfloat16),
+        }
+
+        model = SimpleModel(operations=ops.MixedPrecisionOps)
+        model.load_state_dict(state_dict1, strict=False)
+
+        # Save state dict
+        state_dict2 = model.state_dict()
+
+        # Verify layer1.weight is a QuantizedTensor with scale preserved
+        self.assertIsInstance(state_dict2["layer1.weight"], QuantizedTensor)
+        self.assertEqual(state_dict2["layer1.weight"]._layout_params['scale'].item(), 3.0)
+        self.assertEqual(state_dict2["layer1.weight"]._layout_type, TensorCoreFP8Layout)
+
+        # Verify non-quantized layers are standard tensors
+        self.assertNotIsInstance(state_dict2["layer2.weight"], QuantizedTensor)
+        self.assertNotIsInstance(state_dict2["layer3.weight"], QuantizedTensor)
+
+    def test_weight_function_compatibility(self):
+        """Test that weight_function (LoRA) works with quantized layers"""
+        # Configure FP8 quantization
+        layer_quant_config = {
+            "layer1": {
+                "format": "float8_e4m3fn",
+                "params": {}
+            }
+        }
+        ops.MixedPrecisionOps._layer_quant_config = layer_quant_config
+
+        # Create and load model
+        fp8_weight = torch.randn(20, 10, dtype=torch.float32).to(torch.float8_e4m3fn)
+        state_dict = {
+            "layer1.weight": fp8_weight,
+            "layer1.bias": torch.randn(20, dtype=torch.bfloat16),
+            "layer1.weight_scale": torch.tensor(2.0, dtype=torch.float32),
+            "layer2.weight": torch.randn(30, 20, dtype=torch.bfloat16),
+            "layer2.bias": torch.randn(30, dtype=torch.bfloat16),
+            "layer3.weight": torch.randn(40, 30, dtype=torch.bfloat16),
+            "layer3.bias": torch.randn(40, dtype=torch.bfloat16),
+        }
+
+        model = SimpleModel(operations=ops.MixedPrecisionOps)
+        model.load_state_dict(state_dict, strict=False)
+
+        # Add a weight function (simulating LoRA)
+        # This should trigger dequantization during forward pass
+        def apply_lora(weight):
+            lora_delta = torch.randn_like(weight) * 0.01
+            return weight + lora_delta
+
+        model.layer1.weight_function.append(apply_lora)
+
+        # Forward pass should work with LoRA (triggers weight_function path)
+        input_tensor = torch.randn(5, 10, dtype=torch.bfloat16)
+        output = model(input_tensor)
+
+        self.assertEqual(output.shape, (5, 40))
+
+    def test_error_handling_unknown_format(self):
+        """Test that unknown formats raise error"""
+        # Configure with unknown format
+        layer_quant_config = {
+            "layer1": {
+                "format": "unknown_format_xyz",
+                "params": {}
+            }
+        }
+        ops.MixedPrecisionOps._layer_quant_config = layer_quant_config
+
+        # Create state dict
+        state_dict = {
+            "layer1.weight": torch.randn(20, 10, dtype=torch.bfloat16),
+            "layer1.bias": torch.randn(20, dtype=torch.bfloat16),
+            "layer2.weight": torch.randn(30, 20, dtype=torch.bfloat16),
+            "layer2.bias": torch.randn(30, dtype=torch.bfloat16),
+            "layer3.weight": torch.randn(40, 30, dtype=torch.bfloat16),
+            "layer3.bias": torch.randn(40, dtype=torch.bfloat16),
+        }
+
+        # Load should raise KeyError for unknown format in QUANT_FORMAT_MIXINS
+        model = SimpleModel(operations=ops.MixedPrecisionOps)
+        with self.assertRaises(KeyError):
+            model.load_state_dict(state_dict, strict=False)
+
+if __name__ == "__main__":
+    unittest.main()
+
diff --git a/tests-unit/comfy_quant/test_quant_registry.py b/tests-unit/comfy_quant/test_quant_registry.py
new file mode 100644
index 000000000..477811029
--- /dev/null
+++ b/tests-unit/comfy_quant/test_quant_registry.py
@@ -0,0 +1,190 @@
+import unittest
+import torch
+import sys
+import os
+
+# Add comfy to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
+
+def has_gpu():
+    return torch.cuda.is_available()
+
+from comfy.cli_args import args
+if not has_gpu():
+    args.cpu = True
+
+from comfy.quant_ops import QuantizedTensor, TensorCoreFP8Layout
+
+
+class TestQuantizedTensor(unittest.TestCase):
+    """Test the QuantizedTensor subclass with FP8 layout"""
+
+    def test_creation(self):
+        """Test creating a QuantizedTensor with TensorCoreFP8Layout"""
+        fp8_data = torch.randn(256, 128, dtype=torch.float32).to(torch.float8_e4m3fn)
+        scale = torch.tensor(2.0)
+        layout_params = {'scale': scale, 'orig_dtype': torch.bfloat16}
+
+        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+
+        self.assertIsInstance(qt, QuantizedTensor)
+        self.assertEqual(qt.shape, (256, 128))
+        self.assertEqual(qt.dtype, torch.float8_e4m3fn)
+        self.assertEqual(qt._layout_params['scale'], scale)
+        self.assertEqual(qt._layout_params['orig_dtype'], torch.bfloat16)
+        self.assertEqual(qt._layout_type, TensorCoreFP8Layout)
+
+    def test_dequantize(self):
+        """Test explicit dequantization"""
+
+        fp8_data = torch.ones(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
+        scale = torch.tensor(3.0)
+        layout_params = {'scale': scale, 'orig_dtype': torch.float32}
+
+        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        dequantized = qt.dequantize()
+
+        self.assertEqual(dequantized.dtype, torch.float32)
+        self.assertTrue(torch.allclose(dequantized, torch.ones(10, 20) * 3.0, rtol=0.1))
+
+    def test_from_float(self):
+        """Test creating QuantizedTensor from float tensor"""
+        float_tensor = torch.randn(64, 32, dtype=torch.float32)
+        scale = torch.tensor(1.5)
+
+        qt = QuantizedTensor.from_float(
+            float_tensor,
+            TensorCoreFP8Layout,
+            scale=scale,
+            dtype=torch.float8_e4m3fn
+        )
+
+        self.assertIsInstance(qt, QuantizedTensor)
+        self.assertEqual(qt.dtype, torch.float8_e4m3fn)
+        self.assertEqual(qt.shape, (64, 32))
+
+        # Verify dequantization gives approximately original values
+        dequantized = qt.dequantize()
+        mean_rel_error = ((dequantized - float_tensor).abs() / (float_tensor.abs() + 1e-6)).mean()
+        self.assertLess(mean_rel_error, 0.1)
+
+
+class TestGenericUtilities(unittest.TestCase):
+    """Test generic utility operations"""
+
+    def test_detach(self):
+        """Test detach operation on quantized tensor"""
+        fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
+        scale = torch.tensor(1.5)
+        layout_params = {'scale': scale, 'orig_dtype': torch.float32}
+        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+
+        # Detach should return a new QuantizedTensor
+        qt_detached = qt.detach()
+
+        self.assertIsInstance(qt_detached, QuantizedTensor)
+        self.assertEqual(qt_detached.shape, qt.shape)
+        self.assertEqual(qt_detached._layout_type, TensorCoreFP8Layout)
+
+    def test_clone(self):
+        """Test clone operation on quantized tensor"""
+        fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
+        scale = torch.tensor(1.5)
+        layout_params = {'scale': scale, 'orig_dtype': torch.float32}
+        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+
+        # Clone should return a new QuantizedTensor
+        qt_cloned = qt.clone()
+
+        self.assertIsInstance(qt_cloned, QuantizedTensor)
+        self.assertEqual(qt_cloned.shape, qt.shape)
+        self.assertEqual(qt_cloned._layout_type, TensorCoreFP8Layout)
+
+        # Verify it's a deep copy
+        self.assertIsNot(qt_cloned._qdata, qt._qdata)
+
+    @unittest.skipUnless(has_gpu(), "GPU not available")
+    def test_to_device(self):
+        """Test device transfer"""
+        fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
+        scale = torch.tensor(1.5)
+        layout_params = {'scale': scale, 'orig_dtype': torch.float32}
+        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+
+        # Moving to same device should work (CPU to CPU)
+        qt_cpu = qt.to('cpu')
+
+        self.assertIsInstance(qt_cpu, QuantizedTensor)
+        self.assertEqual(qt_cpu.device.type, 'cpu')
+        self.assertEqual(qt_cpu._layout_params['scale'].device.type, 'cpu')
+
+
+class TestTensorCoreFP8Layout(unittest.TestCase):
+    """Test the TensorCoreFP8Layout implementation"""
+
+    def test_quantize(self):
+        """Test quantization method"""
+        float_tensor = torch.randn(32, 64, dtype=torch.float32)
+        scale = torch.tensor(1.5)
+
+        qdata, layout_params = TensorCoreFP8Layout.quantize(
+            float_tensor,
+            scale=scale,
+            dtype=torch.float8_e4m3fn
+        )
+
+        self.assertEqual(qdata.dtype, torch.float8_e4m3fn)
+        self.assertEqual(qdata.shape, float_tensor.shape)
+        self.assertIn('scale', layout_params)
+        self.assertIn('orig_dtype', layout_params)
+        self.assertEqual(layout_params['orig_dtype'], torch.float32)
+
+    def test_dequantize(self):
+        """Test dequantization method"""
+        float_tensor = torch.ones(10, 20, dtype=torch.float32) * 3.0
+        scale = torch.tensor(1.0)
+
+        qdata, layout_params = TensorCoreFP8Layout.quantize(
+            float_tensor,
+            scale=scale,
+            dtype=torch.float8_e4m3fn
+        )
+
+        dequantized = TensorCoreFP8Layout.dequantize(qdata, **layout_params)
+
+        # Should approximately match original
+        self.assertTrue(torch.allclose(dequantized, float_tensor, rtol=0.1, atol=0.1))
+
+
+class TestFallbackMechanism(unittest.TestCase):
+    """Test fallback for unsupported operations"""
+
+    def test_unsupported_op_dequantizes(self):
+        """Test that unsupported operations fall back to dequantization"""
+        # Set seed for reproducibility
+        torch.manual_seed(42)
+
+        # Create quantized tensor
+        a_fp32 = torch.randn(10, 20, dtype=torch.float32)
+        scale = torch.tensor(1.0)
+        a_q = QuantizedTensor.from_float(
+            a_fp32,
+            TensorCoreFP8Layout,
+            scale=scale,
+            dtype=torch.float8_e4m3fn
+        )
+
+        # Call an operation that doesn't have a registered handler
+        # For example, torch.abs
+        result = torch.abs(a_q)
+
+        # Should work via fallback (dequantize → abs → return)
+        self.assertNotIsInstance(result, QuantizedTensor)
+        expected = torch.abs(a_fp32)
+        # FP8 introduces quantization error, so use loose tolerance
+        mean_error = (result - expected).abs().mean()
+        self.assertLess(mean_error, 0.05, f"Mean error {mean_error:.4f} is too large")
+
+
+if __name__ == "__main__":
+    unittest.main()

From d202c2ba7404affd58a2199aeb514b3cc48e0ef3 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Wed, 29 Oct 2025 06:22:08 +1000
Subject: [PATCH 03/20] execution: Allow a subgraph nodes to execute multiple
 times (#10499)

In the case of --cache-none lazy and subgraph execution can cause
anything to be run multiple times per workflow. If that rerun nodes is
in itself a subgraph generator, this will crash for two reasons.

pending_subgraph_results[] does not cleanup entries after their use.
So when a pending_subgraph_result is consumed, remove it from the list
so that if the corresponding node is fully re-executed this misses
lookup and it fall through to execute the node as it should.

Secondly, theres is an explicit enforcement against dups in the
addition of subgraphs nodes as ephemerals to the dymprompt. Remove this
enforcement as the use case is now valid.
---
 execution.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/execution.py b/execution.py
index b14bb14c7..20e106213 100644
--- a/execution.py
+++ b/execution.py
@@ -445,6 +445,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                     resolved_outputs.append(tuple(resolved_output))
             output_data = merge_result_data(resolved_outputs, class_def)
             output_ui = []
+            del pending_subgraph_results[unique_id]
             has_subgraph = False
         else:
             get_progress_state().start_progress(unique_id)
@@ -527,10 +528,6 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                 if new_graph is None:
                     cached_outputs.append((False, node_outputs))
                 else:
-                    # Check for conflicts
-                    for node_id in new_graph.keys():
-                        if dynprompt.has_node(node_id):
-                            raise DuplicateNodeError(f"Attempt to add duplicate node {node_id}. Ensure node ids are unique and deterministic or use graph_utils.GraphBuilder.")
                     for node_id, node_info in new_graph.items():
                         new_node_ids.append(node_id)
                         display_id = node_info.get("override_display_id", unique_id)

From 210f7a1ba580d57d817ca68346cb72b8d0a26ad2 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Tue, 28 Oct 2025 23:38:05 +0200
Subject: [PATCH 04/20] convert nodes_recraft.py to V3 schema (#10507)

---
 comfy_api_nodes/nodes_recraft.py | 1319 +++++++++++++-----------------
 1 file changed, 585 insertions(+), 734 deletions(-)

diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py
index 8ee7e55c4..dee186cd6 100644
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@@ -1,82 +1,71 @@
-from __future__ import annotations
-from inspect import cleandoc
-from typing import Optional
+from io import BytesIO
+from typing import Optional, Union
+
+import aiohttp
+import torch
+from PIL import UnidentifiedImageError
+from typing_extensions import override
+
 from comfy.utils import ProgressBar
-from comfy_extras.nodes_images import SVG # Added
-from comfy.comfy_types.node_typing import IO
+from comfy_api.latest import IO, ComfyExtension
+from comfy_api_nodes.apinode_utils import (
+    resize_mask_to_image,
+)
 from comfy_api_nodes.apis.recraft_api import (
-    RecraftImageGenerationRequest,
-    RecraftImageGenerationResponse,
-    RecraftImageSize,
-    RecraftModel,
-    RecraftStyle,
-    RecraftStyleV3,
     RecraftColor,
     RecraftColorChain,
     RecraftControls,
+    RecraftImageGenerationRequest,
+    RecraftImageGenerationResponse,
+    RecraftImageSize,
     RecraftIO,
+    RecraftModel,
+    RecraftStyle,
+    RecraftStyleV3,
     get_v3_substyles,
 )
-from comfy_api_nodes.apis.client import (
+from comfy_api_nodes.util import (
     ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    EmptyRequest,
+    bytesio_to_image_tensor,
+    download_url_as_bytesio,
+    sync_op,
+    tensor_to_bytesio,
+    validate_string,
 )
-from comfy_api_nodes.apinode_utils import (
-    download_url_to_bytesio,
-    resize_mask_to_image,
-)
-from comfy_api_nodes.util import validate_string, tensor_to_bytesio, bytesio_to_image_tensor
-from server import PromptServer
-
-import torch
-from io import BytesIO
-from PIL import UnidentifiedImageError
-import aiohttp
+from comfy_extras.nodes_images import SVG
 
 
 async def handle_recraft_file_request(
+    cls: type[IO.ComfyNode],
     image: torch.Tensor,
     path: str,
-    mask: torch.Tensor=None,
-    total_pixels=4096*4096,
-    timeout=1024,
+    mask: Optional[torch.Tensor] = None,
+    total_pixels: int = 4096 * 4096,
+    timeout: int = 1024,
     request=None,
-    auth_kwargs: dict[str,str] = None,
 ) -> list[BytesIO]:
-    """
-    Handle sending common Recraft file-only request to get back file bytes.
-    """
-    if request is None:
-        request = EmptyRequest()
+    """Handle sending common Recraft file-only request to get back file bytes."""
 
-    files = {
-        'image': tensor_to_bytesio(image, total_pixels=total_pixels).read()
-    }
+    files = {"image": tensor_to_bytesio(image, total_pixels=total_pixels).read()}
     if mask is not None:
-        files['mask'] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
+        files["mask"] = tensor_to_bytesio(mask, total_pixels=total_pixels).read()
 
-    operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path=path,
-            method=HttpMethod.POST,
-            request_model=type(request),
-            response_model=RecraftImageGenerationResponse,
-        ),
-        request=request,
+    response = await sync_op(
+        cls,
+        endpoint=ApiEndpoint(path=path, method="POST"),
+        response_model=RecraftImageGenerationResponse,
+        data=request if request else None,
         files=files,
         content_type="multipart/form-data",
-        auth_kwargs=auth_kwargs,
         multipart_parser=recraft_multipart_parser,
+        max_retries=1,
     )
-    response: RecraftImageGenerationResponse = await operation.execute()
     all_bytesio = []
     if response.image is not None:
-        all_bytesio.append(await download_url_to_bytesio(response.image.url, timeout=timeout))
+        all_bytesio.append(await download_url_as_bytesio(response.image.url, timeout=timeout))
     else:
         for data in response.data:
-            all_bytesio.append(await download_url_to_bytesio(data.url, timeout=timeout))
+            all_bytesio.append(await download_url_as_bytesio(data.url, timeout=timeout))
 
     return all_bytesio
 
@@ -84,11 +73,11 @@ async def handle_recraft_file_request(
 def recraft_multipart_parser(
     data,
     parent_key=None,
-    formatter: callable = None,
-    converted_to_check: list[list] = None,
+    formatter: Optional[type[callable]] = None,
+    converted_to_check: Optional[list[list]] = None,
     is_list: bool = False,
-    return_mode: str = "formdata"  # "dict" | "formdata"
-) -> dict | aiohttp.FormData:
+    return_mode: str = "formdata",  # "dict" | "formdata"
+) -> Union[dict, aiohttp.FormData]:
     """
     Formats data such that multipart/form-data will work with aiohttp library when both files and data are present.
 
@@ -108,8 +97,8 @@ def recraft_multipart_parser(
     # Modification of a function that handled a different type of multipart parsing, big ups:
     # https://gist.github.com/kazqvaizer/4cebebe5db654a414132809f9f88067b
 
-    def handle_converted_lists(item, parent_key, lists_to_check=tuple[list]):
-        # if list already exists exists, just extend list with data
+    def handle_converted_lists(item, parent_key, lists_to_check=list[list]):
+        # if list already exists, just extend list with data
         for check_list in lists_to_check:
             for conv_tuple in check_list:
                 if conv_tuple[0] == parent_key and isinstance(conv_tuple[1], list):
@@ -125,7 +114,7 @@ def recraft_multipart_parser(
         formatter = lambda v: v  # Multipart representation of value
 
     if not isinstance(data, dict):
-        # if list already exists exists, just extend list with data
+        # if list already exists, just extend list with data
         added = handle_converted_lists(data, parent_key, converted_to_check)
         if added:
             return {}
@@ -146,7 +135,9 @@ def recraft_multipart_parser(
         elif isinstance(value, list):
             for ind, list_value in enumerate(value):
                 iter_key = f"{current_key}[]"
-                converted.extend(recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items())
+                converted.extend(
+                    recraft_multipart_parser(list_value, iter_key, formatter, next_check, is_list=True).items()
+                )
         else:
             converted.append((current_key, formatter(value)))
 
@@ -166,6 +157,7 @@ class handle_recraft_image_output:
     """
     Catch an exception related to receiving SVG data instead of image, when Infinite Style Library style_id is in use.
     """
+
     def __init__(self):
         pass
 
@@ -174,243 +166,225 @@ class handle_recraft_image_output:
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if exc_type is not None and exc_type is UnidentifiedImageError:
-            raise Exception("Received output data was not an image; likely an SVG. If you used style_id, make sure it is not a Vector art style.")
+            raise Exception(
+                "Received output data was not an image; likely an SVG. "
+                "If you used style_id, make sure it is not a Vector art style."
+            )
 
 
-class RecraftColorRGBNode:
-    """
-    Create Recraft Color by choosing specific RGB values.
-    """
-
-    RETURN_TYPES = (RecraftIO.COLOR,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    RETURN_NAMES = ("recraft_color",)
-    FUNCTION = "create_color"
-    CATEGORY = "api node/image/Recraft"
+class RecraftColorRGBNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftColorRGB",
+            display_name="Recraft Color RGB",
+            category="api node/image/Recraft",
+            description="Create Recraft Color by choosing specific RGB values.",
+            inputs=[
+                IO.Int.Input("r", default=0, min=0, max=255, tooltip="Red value of color."),
+                IO.Int.Input("g", default=0, min=0, max=255, tooltip="Green value of color."),
+                IO.Int.Input("b", default=0, min=0, max=255, tooltip="Blue value of color."),
+                IO.Custom(RecraftIO.COLOR).Input("recraft_color", optional=True),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.COLOR).Output(display_name="recraft_color"),
+            ],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "r": (IO.INT, {
-                    "default": 0,
-                    "min": 0,
-                    "max": 255,
-                    "tooltip": "Red value of color."
-                }),
-                "g": (IO.INT, {
-                    "default": 0,
-                    "min": 0,
-                    "max": 255,
-                    "tooltip": "Green value of color."
-                }),
-                "b": (IO.INT, {
-                    "default": 0,
-                    "min": 0,
-                    "max": 255,
-                    "tooltip": "Blue value of color."
-                }),
-            },
-            "optional": {
-                "recraft_color": (RecraftIO.COLOR,),
-            }
-        }
-
-    def create_color(self, r: int, g: int, b: int, recraft_color: RecraftColorChain=None):
+    def execute(cls, r: int, g: int, b: int, recraft_color: RecraftColorChain = None) -> IO.NodeOutput:
         recraft_color = recraft_color.clone() if recraft_color else RecraftColorChain()
         recraft_color.add(RecraftColor(r, g, b))
-        return (recraft_color, )
+        return IO.NodeOutput(recraft_color)
 
 
-class RecraftControlsNode:
-    """
-    Create Recraft Controls for customizing Recraft generation.
-    """
-
-    RETURN_TYPES = (RecraftIO.CONTROLS,)
-    RETURN_NAMES = ("recraft_controls",)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "create_controls"
-    CATEGORY = "api node/image/Recraft"
+class RecraftControlsNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftControls",
+            display_name="Recraft Controls",
+            category="api node/image/Recraft",
+            description="Create Recraft Controls for customizing Recraft generation.",
+            inputs=[
+                IO.Custom(RecraftIO.COLOR).Input("colors", optional=True),
+                IO.Custom(RecraftIO.COLOR).Input("background_color", optional=True),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.CONTROLS).Output(display_name="recraft_controls"),
+            ],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-            },
-            "optional": {
-                "colors": (RecraftIO.COLOR,),
-                "background_color": (RecraftIO.COLOR,),
-            }
-        }
-
-    def create_controls(self, colors: RecraftColorChain=None, background_color: RecraftColorChain=None):
-        return (RecraftControls(colors=colors, background_color=background_color), )
+    def execute(cls, colors: RecraftColorChain = None, background_color: RecraftColorChain = None) -> IO.NodeOutput:
+        return IO.NodeOutput(RecraftControls(colors=colors, background_color=background_color))
 
 
-class RecraftStyleV3RealisticImageNode:
-    """
-    Select realistic_image style and optional substyle.
-    """
-
-    RETURN_TYPES = (RecraftIO.STYLEV3,)
-    RETURN_NAMES = ("recraft_style",)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "create_style"
-    CATEGORY = "api node/image/Recraft"
-
+class RecraftStyleV3RealisticImageNode(IO.ComfyNode):
     RECRAFT_STYLE = RecraftStyleV3.realistic_image
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "substyle": (get_v3_substyles(s.RECRAFT_STYLE),),
-            }
-        }
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftStyleV3RealisticImage",
+            display_name="Recraft Style - Realistic Image",
+            category="api node/image/Recraft",
+            description="Select realistic_image style and optional substyle.",
+            inputs=[
+                IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.STYLEV3).Output(display_name="recraft_style"),
+            ],
+        )
 
-    def create_style(self, substyle: str):
+    @classmethod
+    def execute(cls, substyle: str) -> IO.NodeOutput:
         if substyle == "None":
             substyle = None
-        return (RecraftStyle(self.RECRAFT_STYLE, substyle),)
+        return IO.NodeOutput(RecraftStyle(cls.RECRAFT_STYLE, substyle))
 
 
 class RecraftStyleV3DigitalIllustrationNode(RecraftStyleV3RealisticImageNode):
-    """
-    Select digital_illustration style and optional substyle.
-    """
-
     RECRAFT_STYLE = RecraftStyleV3.digital_illustration
 
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftStyleV3DigitalIllustration",
+            display_name="Recraft Style - Digital Illustration",
+            category="api node/image/Recraft",
+            description="Select realistic_image style and optional substyle.",
+            inputs=[
+                IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.STYLEV3).Output(display_name="recraft_style"),
+            ],
+        )
+
 
 class RecraftStyleV3VectorIllustrationNode(RecraftStyleV3RealisticImageNode):
-    """
-    Select vector_illustration style and optional substyle.
-    """
-
     RECRAFT_STYLE = RecraftStyleV3.vector_illustration
 
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftStyleV3VectorIllustrationNode",
+            display_name="Recraft Style - Realistic Image",
+            category="api node/image/Recraft",
+            description="Select realistic_image style and optional substyle.",
+            inputs=[
+                IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.STYLEV3).Output(display_name="recraft_style"),
+            ],
+        )
+
 
 class RecraftStyleV3LogoRasterNode(RecraftStyleV3RealisticImageNode):
-    """
-    Select vector_illustration style and optional substyle.
-    """
-
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "substyle": (get_v3_substyles(s.RECRAFT_STYLE, include_none=False),),
-            }
-        }
-
     RECRAFT_STYLE = RecraftStyleV3.logo_raster
 
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftStyleV3LogoRaster",
+            display_name="Recraft Style - Logo Raster",
+            category="api node/image/Recraft",
+            description="Select realistic_image style and optional substyle.",
+            inputs=[
+                IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE, include_none=False)),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.STYLEV3).Output(display_name="recraft_style"),
+            ],
+        )
 
-class RecraftStyleInfiniteStyleLibrary:
-    """
-    Select style based on preexisting UUID from Recraft's Infinite Style Library.
-    """
 
-    RETURN_TYPES = (RecraftIO.STYLEV3,)
-    RETURN_NAMES = ("recraft_style",)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "create_style"
-    CATEGORY = "api node/image/Recraft"
+class RecraftStyleInfiniteStyleLibrary(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftStyleV3InfiniteStyleLibrary",
+            display_name="Recraft Style - Infinite Style Library",
+            category="api node/image/Recraft",
+            description="Select style based on preexisting UUID from Recraft's Infinite Style Library.",
+            inputs=[
+                IO.String.Input("style_id", default="", tooltip="UUID of style from Infinite Style Library."),
+            ],
+            outputs=[
+                IO.Custom(RecraftIO.STYLEV3).Output(display_name="recraft_style"),
+            ],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "style_id": (IO.STRING, {
-                    "default": "",
-                    "tooltip": "UUID of style from Infinite Style Library.",
-                })
-            }
-        }
-
-    def create_style(self, style_id: str):
+    def execute(cls, style_id: str) -> IO.NodeOutput:
         if not style_id:
             raise Exception("The style_id input cannot be empty.")
-        return (RecraftStyle(style_id=style_id),)
+        return IO.NodeOutput(RecraftStyle(style_id=style_id))
 
 
-class RecraftTextToImageNode:
-    """
-    Generates images synchronously based on prompt and resolution.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftTextToImageNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftTextToImageNode",
+            display_name="Recraft Text to Image",
+            category="api node/image/Recraft",
+            description="Generates images synchronously based on prompt and resolution.",
+            inputs=[
+                IO.String.Input("prompt", multiline=True, default="", tooltip="Prompt for the image generation."),
+                IO.Combo.Input(
+                    "size",
+                    options=[res.value for res in RecraftImageSize],
+                    default=RecraftImageSize.res_1024x1024,
+                    tooltip="The size of the generated image.",
+                ),
+                IO.Int.Input(
+                    "n",
+                    default=1,
+                    min=1,
+                    max=6,
+                    tooltip="The number of images to generate.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; "
+                    "actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Custom(RecraftIO.STYLEV3).Input("recraft_style", optional=True),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    force_input=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+                IO.Custom(RecraftIO.CONTROLS).Input(
+                    "recraft_controls",
+                    tooltip="Optional additional controls over the generation via the Recraft Controls node.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation.",
-                    },
-                ),
-                "size": (
-                    [res.value for res in RecraftImageSize],
-                    {
-                        "default": RecraftImageSize.res_1024x1024,
-                        "tooltip": "The size of the generated image.",
-                    },
-                ),
-                "n": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 6,
-                        "tooltip": "The number of images to generate.",
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "recraft_style": (RecraftIO.STYLEV3,),
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-                "recraft_controls": (
-                    RecraftIO.CONTROLS,
-                    {
-                        "tooltip": "Optional additional controls over the generation via the Recraft Controls node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         prompt: str,
         size: str,
         n: int,
@@ -418,9 +392,7 @@ class RecraftTextToImageNode:
         recraft_style: RecraftStyle = None,
         negative_prompt: str = None,
         recraft_controls: RecraftControls = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False, max_length=1000)
         default_style = RecraftStyle(RecraftStyleV3.realistic_image)
         if recraft_style is None:
@@ -433,14 +405,11 @@ class RecraftTextToImageNode:
         if not negative_prompt:
             negative_prompt = None
 
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/recraft/image_generation",
-                method=HttpMethod.POST,
-                request_model=RecraftImageGenerationRequest,
-                response_model=RecraftImageGenerationResponse,
-            ),
-            request=RecraftImageGenerationRequest(
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/recraft/image_generation", method="POST"),
+            response_model=RecraftImageGenerationResponse,
+            data=RecraftImageGenerationRequest(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
                 model=RecraftModel.recraftv3,
@@ -451,109 +420,83 @@ class RecraftTextToImageNode:
                 style_id=recraft_style.style_id,
                 controls=controls_api,
             ),
-            auth_kwargs=kwargs,
+            max_retries=1,
         )
-        response: RecraftImageGenerationResponse = await operation.execute()
         images = []
-        urls = []
         for data in response.data:
             with handle_recraft_image_output():
-                if unique_id and data.url:
-                    urls.append(data.url)
-                    urls_string = '\n'.join(urls)
-                    PromptServer.instance.send_progress_text(
-                        f"Result URL: {urls_string}", unique_id
-                    )
-                image = bytesio_to_image_tensor(
-                    await download_url_to_bytesio(data.url, timeout=1024)
-                )
+                image = bytesio_to_image_tensor(await download_url_as_bytesio(data.url, timeout=1024))
             if len(image.shape) < 4:
                 image = image.unsqueeze(0)
             images.append(image)
-        output_image = torch.cat(images, dim=0)
 
-        return (output_image,)
+        return IO.NodeOutput(torch.cat(images, dim=0))
 
 
-class RecraftImageToImageNode:
-    """
-    Modify image based on prompt and strength.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftImageToImageNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftImageToImageNode",
+            display_name="Recraft Image to Image",
+            category="api node/image/Recraft",
+            description="Modify image based on prompt and strength.",
+            inputs=[
+                IO.Image.Input("image"),
+                IO.String.Input("prompt", multiline=True, default="", tooltip="Prompt for the image generation."),
+                IO.Int.Input(
+                    "n",
+                    default=1,
+                    min=1,
+                    max=6,
+                    tooltip="The number of images to generate.",
+                ),
+                IO.Float.Input(
+                    "strength",
+                    default=0.5,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    tooltip="Defines the difference with the original image, should lie in [0, 1], "
+                    "where 0 means almost identical, and 1 means miserable similarity.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; "
+                    "actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Custom(RecraftIO.STYLEV3).Input("recraft_style", optional=True),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    force_input=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+                IO.Custom(RecraftIO.CONTROLS).Input(
+                    "recraft_controls",
+                    tooltip="Optional additional controls over the generation via the Recraft Controls node.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation.",
-                    },
-                ),
-                "n": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 6,
-                        "tooltip": "The number of images to generate.",
-                    },
-                ),
-                "strength": (
-                    IO.FLOAT,
-                    {
-                        "default": 0.5,
-                        "min": 0.0,
-                        "max": 1.0,
-                        "step": 0.01,
-                        "tooltip": "Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity."
-                    }
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "recraft_style": (RecraftIO.STYLEV3,),
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-                "recraft_controls": (
-                    RecraftIO.CONTROLS,
-                    {
-                        "tooltip": "Optional additional controls over the generation via the Recraft Controls node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         image: torch.Tensor,
         prompt: str,
         n: int,
@@ -562,8 +505,7 @@ class RecraftImageToImageNode:
         recraft_style: RecraftStyle = None,
         negative_prompt: str = None,
         recraft_controls: RecraftControls = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False, max_length=1000)
         default_style = RecraftStyle(RecraftStyleV3.realistic_image)
         if recraft_style is None:
@@ -593,83 +535,69 @@ class RecraftImageToImageNode:
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
                 path="/proxy/recraft/images/imageToImage",
                 request=request,
-                auth_kwargs=kwargs,
             )
             with handle_recraft_image_output():
                 images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0))
             pbar.update(1)
 
-        images_tensor = torch.cat(images, dim=0)
-        return (images_tensor, )
+        return IO.NodeOutput(torch.cat(images, dim=0))
 
 
-class RecraftImageInpaintingNode:
-    """
-    Modify image based on prompt and mask.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftImageInpaintingNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftImageInpaintingNode",
+            display_name="Recraft Image Inpainting",
+            category="api node/image/Recraft",
+            description="Modify image based on prompt and mask.",
+            inputs=[
+                IO.Image.Input("image"),
+                IO.Mask.Input("mask"),
+                IO.String.Input("prompt", multiline=True, default="", tooltip="Prompt for the image generation."),
+                IO.Int.Input(
+                    "n",
+                    default=1,
+                    min=1,
+                    max=6,
+                    tooltip="The number of images to generate.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; "
+                    "actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Custom(RecraftIO.STYLEV3).Input("recraft_style", optional=True),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    force_input=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-                "mask": (IO.MASK, ),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation.",
-                    },
-                ),
-                "n": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 6,
-                        "tooltip": "The number of images to generate.",
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "recraft_style": (RecraftIO.STYLEV3,),
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         image: torch.Tensor,
         mask: torch.Tensor,
         prompt: str,
@@ -677,8 +605,7 @@ class RecraftImageInpaintingNode:
         seed,
         recraft_style: RecraftStyle = None,
         negative_prompt: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False, max_length=1000)
         default_style = RecraftStyle(RecraftStyleV3.realistic_image)
         if recraft_style is None:
@@ -705,96 +632,73 @@ class RecraftImageInpaintingNode:
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
-                mask=mask[i:i+1],
+                mask=mask[i : i + 1],
                 path="/proxy/recraft/images/inpaint",
                 request=request,
-                auth_kwargs=kwargs,
             )
             with handle_recraft_image_output():
                 images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0))
             pbar.update(1)
 
-        images_tensor = torch.cat(images, dim=0)
-        return (images_tensor, )
+        return IO.NodeOutput(torch.cat(images, dim=0))
 
 
-class RecraftTextToVectorNode:
-    """
-    Generates SVG synchronously based on prompt and resolution.
-    """
-
-    RETURN_TYPES = ("SVG",) # Changed
-    DESCRIPTION = cleandoc(__doc__ or "") if 'cleandoc' in globals() else __doc__ # Keep cleandoc if other nodes use it
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftTextToVectorNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftTextToVectorNode",
+            display_name="Recraft Text to Vector",
+            category="api node/image/Recraft",
+            description="Generates SVG synchronously based on prompt and resolution.",
+            inputs=[
+                IO.String.Input("prompt", default="", tooltip="Prompt for the image generation.", multiline=True),
+                IO.Combo.Input("substyle", options=get_v3_substyles(RecraftStyleV3.vector_illustration)),
+                IO.Combo.Input(
+                    "size",
+                    options=[res.value for res in RecraftImageSize],
+                    default=RecraftImageSize.res_1024x1024,
+                    tooltip="The size of the generated image.",
+                ),
+                IO.Int.Input("n", default=1, min=1, max=6, tooltip="The number of images to generate."),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; "
+                    "actual results are nondeterministic regardless of seed.",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    force_input=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+                IO.Custom(RecraftIO.CONTROLS).Input(
+                    "recraft_controls",
+                    tooltip="Optional additional controls over the generation via the Recraft Controls node.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.SVG.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation.",
-                    },
-                ),
-                "substyle": (get_v3_substyles(RecraftStyleV3.vector_illustration),),
-                "size": (
-                    [res.value for res in RecraftImageSize],
-                    {
-                        "default": RecraftImageSize.res_1024x1024,
-                        "tooltip": "The size of the generated image.",
-                    },
-                ),
-                "n": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 6,
-                        "tooltip": "The number of images to generate.",
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-                "recraft_controls": (
-                    RecraftIO.CONTROLS,
-                    {
-                        "tooltip": "Optional additional controls over the generation via the Recraft Controls node."
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         prompt: str,
         substyle: str,
         size: str,
@@ -802,9 +706,7 @@ class RecraftTextToVectorNode:
         seed,
         negative_prompt: str = None,
         recraft_controls: RecraftControls = None,
-        unique_id: Optional[str] = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False, max_length=1000)
         # create RecraftStyle so strings will be formatted properly (i.e. "None" will become None)
         recraft_style = RecraftStyle(RecraftStyleV3.vector_illustration, substyle=substyle)
@@ -816,14 +718,11 @@ class RecraftTextToVectorNode:
         if not negative_prompt:
             negative_prompt = None
 
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/recraft/image_generation",
-                method=HttpMethod.POST,
-                request_model=RecraftImageGenerationRequest,
-                response_model=RecraftImageGenerationResponse,
-            ),
-            request=RecraftImageGenerationRequest(
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/recraft/image_generation", method="POST"),
+            response_model=RecraftImageGenerationResponse,
+            data=RecraftImageGenerationRequest(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
                 model=RecraftModel.recraftv3,
@@ -833,139 +732,105 @@ class RecraftTextToVectorNode:
                 substyle=recraft_style.substyle,
                 controls=controls_api,
             ),
-            auth_kwargs=kwargs,
+            max_retries=1,
         )
-        response: RecraftImageGenerationResponse = await operation.execute()
         svg_data = []
-        urls = []
         for data in response.data:
-            if unique_id and data.url:
-                urls.append(data.url)
-                # Print result on each iteration in case of error
-                PromptServer.instance.send_progress_text(
-                    f"Result URL: {' '.join(urls)}", unique_id
-                )
-            svg_data.append(await download_url_to_bytesio(data.url, timeout=1024))
+            svg_data.append(await download_url_as_bytesio(data.url, timeout=1024))
 
-        return (SVG(svg_data),)
+        return IO.NodeOutput(SVG(svg_data))
 
 
-class RecraftVectorizeImageNode:
-    """
-    Generates SVG synchronously from an input image.
-    """
-
-    RETURN_TYPES = ("SVG",) # Changed
-    DESCRIPTION = cleandoc(__doc__ or "") if 'cleandoc' in globals() else __doc__ # Keep cleandoc if other nodes use it
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftVectorizeImageNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftVectorizeImageNode",
+            display_name="Recraft Vectorize Image",
+            category="api node/image/Recraft",
+            description="Generates SVG synchronously from an input image.",
+            inputs=[
+                IO.Image.Input("image"),
+            ],
+            outputs=[
+                IO.SVG.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-            },
-            "optional": {
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
-
-    async def api_call(
-        self,
-        image: torch.Tensor,
-        **kwargs,
-    ):
+    async def execute(cls, image: torch.Tensor) -> IO.NodeOutput:
         svgs = []
         total = image.shape[0]
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
                 path="/proxy/recraft/images/vectorize",
-                auth_kwargs=kwargs,
             )
             svgs.append(SVG(sub_bytes))
             pbar.update(1)
 
-        return (SVG.combine_all(svgs), )
+        return IO.NodeOutput(SVG.combine_all(svgs))
 
 
-class RecraftReplaceBackgroundNode:
-    """
-    Replace background on image, based on provided prompt.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftReplaceBackgroundNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftReplaceBackgroundNode",
+            display_name="Recraft Replace Background",
+            category="api node/image/Recraft",
+            description="Replace background on image, based on provided prompt.",
+            inputs=[
+                IO.Image.Input("image"),
+                IO.String.Input("prompt", tooltip="Prompt for the image generation.", default="", multiline=True),
+                IO.Int.Input("n", default=1, min=1, max=6, tooltip="The number of images to generate."),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="Seed to determine if node should re-run; "
+                    "actual results are nondeterministic regardless of seed.",
+                ),
+                IO.Custom(RecraftIO.STYLEV3).Input("recraft_style", optional=True),
+                IO.String.Input(
+                    "negative_prompt",
+                    default="",
+                    force_input=True,
+                    tooltip="An optional text description of undesired elements on an image.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-                "prompt": (
-                    IO.STRING,
-                    {
-                        "multiline": True,
-                        "default": "",
-                        "tooltip": "Prompt for the image generation.",
-                    },
-                ),
-                "n": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 6,
-                        "tooltip": "The number of images to generate.",
-                    },
-                ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "control_after_generate": True,
-                        "tooltip": "Seed to determine if node should re-run; actual results are nondeterministic regardless of seed.",
-                    },
-                ),
-            },
-            "optional": {
-                "recraft_style": (RecraftIO.STYLEV3,),
-                "negative_prompt": (
-                    IO.STRING,
-                    {
-                        "default": "",
-                        "forceInput": True,
-                        "tooltip": "An optional text description of undesired elements on an image.",
-                    },
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
-
-    async def api_call(
-        self,
+    async def execute(
+        cls,
         image: torch.Tensor,
         prompt: str,
         n: int,
         seed,
         recraft_style: RecraftStyle = None,
         negative_prompt: str = None,
-        **kwargs,
-    ):
+    ) -> IO.NodeOutput:
         default_style = RecraftStyle(RecraftStyleV3.realistic_image)
         if recraft_style is None:
             recraft_style = default_style
@@ -988,165 +853,151 @@ class RecraftReplaceBackgroundNode:
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
                 path="/proxy/recraft/images/replaceBackground",
                 request=request,
-                auth_kwargs=kwargs,
             )
             images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0))
             pbar.update(1)
 
-        images_tensor = torch.cat(images, dim=0)
-        return (images_tensor, )
+        return IO.NodeOutput(torch.cat(images, dim=0))
 
 
-class RecraftRemoveBackgroundNode:
-    """
-    Remove background from image, and return processed image and mask.
-    """
-
-    RETURN_TYPES = (IO.IMAGE, IO.MASK)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
+class RecraftRemoveBackgroundNode(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftRemoveBackgroundNode",
+            display_name="Recraft Remove Background",
+            category="api node/image/Recraft",
+            description="Remove background from image, and return processed image and mask.",
+            inputs=[
+                IO.Image.Input("image"),
+            ],
+            outputs=[
+                IO.Image.Output(),
+                IO.Mask.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-            },
-            "optional": {
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
-
-    async def api_call(
-        self,
-        image: torch.Tensor,
-        **kwargs,
-    ):
+    async def execute(cls, image: torch.Tensor) -> IO.NodeOutput:
         images = []
         total = image.shape[0]
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
                 path="/proxy/recraft/images/removeBackground",
-                auth_kwargs=kwargs,
             )
             images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0))
             pbar.update(1)
 
         images_tensor = torch.cat(images, dim=0)
         # use alpha channel as masks, in B,H,W format
-        masks_tensor = images_tensor[:,:,:,-1:].squeeze(-1)
-        return (images_tensor, masks_tensor)
+        masks_tensor = images_tensor[:, :, :, -1:].squeeze(-1)
+        return IO.NodeOutput(images_tensor, masks_tensor)
 
 
-class RecraftCrispUpscaleNode:
-    """
-    Upscale image synchronously.
-    Enhances a given raster image using ‘crisp upscale’ tool, increasing image resolution, making the image sharper and cleaner.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
-
+class RecraftCrispUpscaleNode(IO.ComfyNode):
     RECRAFT_PATH = "/proxy/recraft/images/crispUpscale"
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "image": (IO.IMAGE, ),
-            },
-            "optional": {
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-            },
-        }
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftCrispUpscaleNode",
+            display_name="Recraft Crisp Upscale Image",
+            category="api node/image/Recraft",
+            description="Upscale image synchronously.\n"
+            "Enhances a given raster image using ‘crisp upscale’ tool, "
+            "increasing image resolution, making the image sharper and cleaner.",
+            inputs=[
+                IO.Image.Input("image"),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-    async def api_call(
-        self,
-        image: torch.Tensor,
-        **kwargs,
-    ):
+    @classmethod
+    async def execute(cls, image: torch.Tensor) -> IO.NodeOutput:
         images = []
         total = image.shape[0]
         pbar = ProgressBar(total)
         for i in range(total):
             sub_bytes = await handle_recraft_file_request(
+                cls,
                 image=image[i],
-                path=self.RECRAFT_PATH,
-                auth_kwargs=kwargs,
+                path=cls.RECRAFT_PATH,
             )
             images.append(torch.cat([bytesio_to_image_tensor(x) for x in sub_bytes], dim=0))
             pbar.update(1)
 
-        images_tensor = torch.cat(images, dim=0)
-        return (images_tensor,)
+        return IO.NodeOutput(torch.cat(images, dim=0))
 
 
 class RecraftCreativeUpscaleNode(RecraftCrispUpscaleNode):
-    """
-    Upscale image synchronously.
-    Enhances a given raster image using ‘creative upscale’ tool, boosting resolution with a focus on refining small details and faces.
-    """
-
-    RETURN_TYPES = (IO.IMAGE,)
-    DESCRIPTION = cleandoc(__doc__ or "")  # Handle potential None value
-    FUNCTION = "api_call"
-    API_NODE = True
-    CATEGORY = "api node/image/Recraft"
-
     RECRAFT_PATH = "/proxy/recraft/images/creativeUpscale"
 
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="RecraftCreativeUpscaleNode",
+            display_name="Recraft Creative Upscale Image",
+            category="api node/image/Recraft",
+            description="Upscale image synchronously.\n"
+            "Enhances a given raster image using ‘creative upscale’ tool, "
+            "boosting resolution with a focus on refining small details and faces.",
+            inputs=[
+                IO.Image.Input("image"),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
 
-# A dictionary that contains all nodes you want to export with their names
-# NOTE: names should be globally unique
-NODE_CLASS_MAPPINGS = {
-    "RecraftTextToImageNode": RecraftTextToImageNode,
-    "RecraftImageToImageNode": RecraftImageToImageNode,
-    "RecraftImageInpaintingNode": RecraftImageInpaintingNode,
-    "RecraftTextToVectorNode": RecraftTextToVectorNode,
-    "RecraftVectorizeImageNode": RecraftVectorizeImageNode,
-    "RecraftRemoveBackgroundNode": RecraftRemoveBackgroundNode,
-    "RecraftReplaceBackgroundNode": RecraftReplaceBackgroundNode,
-    "RecraftCrispUpscaleNode": RecraftCrispUpscaleNode,
-    "RecraftCreativeUpscaleNode": RecraftCreativeUpscaleNode,
-    "RecraftStyleV3RealisticImage": RecraftStyleV3RealisticImageNode,
-    "RecraftStyleV3DigitalIllustration": RecraftStyleV3DigitalIllustrationNode,
-    "RecraftStyleV3LogoRaster": RecraftStyleV3LogoRasterNode,
-    "RecraftStyleV3InfiniteStyleLibrary": RecraftStyleInfiniteStyleLibrary,
-    "RecraftColorRGB": RecraftColorRGBNode,
-    "RecraftControls": RecraftControlsNode,
-}
 
-# A dictionary that contains the friendly/humanly readable titles for the nodes
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "RecraftTextToImageNode": "Recraft Text to Image",
-    "RecraftImageToImageNode": "Recraft Image to Image",
-    "RecraftImageInpaintingNode": "Recraft Image Inpainting",
-    "RecraftTextToVectorNode": "Recraft Text to Vector",
-    "RecraftVectorizeImageNode": "Recraft Vectorize Image",
-    "RecraftRemoveBackgroundNode": "Recraft Remove Background",
-    "RecraftReplaceBackgroundNode": "Recraft Replace Background",
-    "RecraftCrispUpscaleNode": "Recraft Crisp Upscale Image",
-    "RecraftCreativeUpscaleNode": "Recraft Creative Upscale Image",
-    "RecraftStyleV3RealisticImage": "Recraft Style - Realistic Image",
-    "RecraftStyleV3DigitalIllustration": "Recraft Style - Digital Illustration",
-    "RecraftStyleV3LogoRaster": "Recraft Style - Logo Raster",
-    "RecraftStyleV3InfiniteStyleLibrary": "Recraft Style - Infinite Style Library",
-    "RecraftColorRGB": "Recraft Color RGB",
-    "RecraftControls": "Recraft Controls",
-}
+class RecraftExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            RecraftTextToImageNode,
+            RecraftImageToImageNode,
+            RecraftImageInpaintingNode,
+            RecraftTextToVectorNode,
+            RecraftVectorizeImageNode,
+            RecraftRemoveBackgroundNode,
+            RecraftReplaceBackgroundNode,
+            RecraftCrispUpscaleNode,
+            RecraftCreativeUpscaleNode,
+            RecraftStyleV3RealisticImageNode,
+            RecraftStyleV3DigitalIllustrationNode,
+            RecraftStyleV3LogoRasterNode,
+            RecraftStyleInfiniteStyleLibrary,
+            RecraftColorRGBNode,
+            RecraftControlsNode,
+        ]
+
+
+async def comfy_entrypoint() -> RecraftExtension:
+    return RecraftExtension()

From 3fa7a5c04ae69ad168a875e8d3d453783d60899d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Oct 2025 21:21:01 -0700
Subject: [PATCH 05/20] Speed up offloading using pinned memory. (#10526)

To enable this feature use: --fast pinned_memory
---
 comfy/cli_args.py         |  1 +
 comfy/model_management.py | 30 ++++++++++++++++++++++++++++++
 comfy/model_patcher.py    | 26 +++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index cc1f12482..001abd843 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -144,6 +144,7 @@ class PerformanceFeature(enum.Enum):
     Fp8MatrixMultiplication = "fp8_matrix_mult"
     CublasOps = "cublas_ops"
     AutoTune = "autotune"
+    PinnedMem = "pinned_memory"
 
 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))
 
diff --git a/comfy/model_management.py b/comfy/model_management.py
index afe78f36e..3e5b977d4 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1080,6 +1080,36 @@ def cast_to_device(tensor, device, dtype, copy=False):
     non_blocking = device_supports_non_blocking(device)
     return cast_to(tensor, dtype=dtype, device=device, non_blocking=non_blocking, copy=copy)
 
+def pin_memory(tensor):
+    if PerformanceFeature.PinnedMem not in args.fast:
+        return False
+
+    if not is_nvidia():
+        return False
+
+    if not is_device_cpu(tensor.device):
+        return False
+
+    if torch.cuda.cudart().cudaHostRegister(tensor.data_ptr(), tensor.numel() * tensor.element_size(), 1) == 0:
+        return True
+
+    return False
+
+def unpin_memory(tensor):
+    if PerformanceFeature.PinnedMem not in args.fast:
+        return False
+
+    if not is_nvidia():
+        return False
+
+    if not is_device_cpu(tensor.device):
+        return False
+
+    if torch.cuda.cudart().cudaHostUnregister(tensor.data_ptr()) == 0:
+        return True
+
+    return False
+
 def sage_attention_enabled():
     return args.use_sage_attention
 
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index c0b68fb8c..aec73349c 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -238,6 +238,7 @@ class ModelPatcher:
         self.force_cast_weights = False
         self.patches_uuid = uuid.uuid4()
         self.parent = None
+        self.pinned = set()
 
         self.attachments: dict[str] = {}
         self.additional_models: dict[str, list[ModelPatcher]] = {}
@@ -618,6 +619,21 @@ class ModelPatcher:
         else:
             set_func(out_weight, inplace_update=inplace_update, seed=string_to_seed(key))
 
+    def pin_weight_to_device(self, key):
+        weight, set_func, convert_func = get_key_weight(self.model, key)
+        if comfy.model_management.pin_memory(weight):
+            self.pinned.add(key)
+
+    def unpin_weight(self, key):
+        if key in self.pinned:
+            weight, set_func, convert_func = get_key_weight(self.model, key)
+            comfy.model_management.unpin_memory(weight)
+            self.pinned.remove(key)
+
+    def unpin_all_weights(self):
+        for key in list(self.pinned):
+            self.unpin_weight(key)
+
     def _load_list(self):
         loading = []
         for n, m in self.model.named_modules():
@@ -683,6 +699,8 @@ class ModelPatcher:
                             patch_counter += 1
 
                     cast_weight = True
+                    for param in params:
+                        self.pin_weight_to_device("{}.{}".format(n, param))
                 else:
                     if hasattr(m, "comfy_cast_weights"):
                         wipe_lowvram_weight(m)
@@ -713,7 +731,9 @@ class ModelPatcher:
                         continue
 
                 for param in params:
-                    self.patch_weight_to_device("{}.{}".format(n, param), device_to=device_to)
+                    key = "{}.{}".format(n, param)
+                    self.unpin_weight(key)
+                    self.patch_weight_to_device(key, device_to=device_to)
 
                 logging.debug("lowvram: loaded module regularly {} {}".format(n, m))
                 m.comfy_patched_weights = True
@@ -762,6 +782,7 @@ class ModelPatcher:
         self.eject_model()
         if unpatch_weights:
             self.unpatch_hooks()
+            self.unpin_all_weights()
             if self.model.model_lowvram:
                 for m in self.model.modules():
                     move_weight_functions(m, device_to)
@@ -857,6 +878,9 @@ class ModelPatcher:
                         memory_freed += module_mem
                         logging.debug("freed {}".format(n))
 
+                        for param in params:
+                            self.pin_weight_to_device("{}.{}".format(n, param))
+
             self.model.model_lowvram = True
             self.model.lowvram_patch_counter += patch_counter
             self.model.model_loaded_weight_memory -= memory_freed

From e525673f7201b6c49af0fa0e6baf44e4e98bb10c Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Oct 2025 21:37:00 -0700
Subject: [PATCH 06/20] Fix issue. (#10527)

---
 comfy/sd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index 6411bb27d..de4eee96e 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -1330,7 +1330,7 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
     else:
         unet_dtype = dtype
 
-    if hasattr(model_config, "layer_quant_config"):
+    if model_config.layer_quant_config is not None:
         manual_cast_dtype = model_management.unet_manual_cast(None, load_device, model_config.supported_inference_dtypes)
     else:
         manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)

From 6c14f3afac0ea28dba24fe8783e7c1f09c03b31f Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 29 Oct 2025 20:14:56 +0200
Subject: [PATCH 07/20] use new API client in Luma and Minimax nodes (#10528)

---
 comfy_api_nodes/apinode_utils.py         |  81 ------
 comfy_api_nodes/apis/minimax_api.py      | 120 ++++++++
 comfy_api_nodes/nodes_ideogram.py        |   2 +-
 comfy_api_nodes/nodes_luma.py            | 354 ++++++-----------------
 comfy_api_nodes/nodes_minimax.py         | 237 +++++----------
 comfy_api_nodes/util/client.py           |   2 +-
 comfy_api_nodes/util/download_helpers.py |   3 +-
 7 files changed, 283 insertions(+), 516 deletions(-)
 create mode 100644 comfy_api_nodes/apis/minimax_api.py

diff --git a/comfy_api_nodes/apinode_utils.py b/comfy_api_nodes/apinode_utils.py
index 4182c8f80..6a72b9d1d 100644
--- a/comfy_api_nodes/apinode_utils.py
+++ b/comfy_api_nodes/apinode_utils.py
@@ -3,14 +3,6 @@ import aiohttp
 import mimetypes
 from typing import Optional, Union
 from comfy.utils import common_upscale
-from comfy_api_nodes.apis.client import (
-    ApiClient,
-    ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    UploadRequest,
-    UploadResponse,
-)
 from server import PromptServer
 from comfy.cli_args import args
 
@@ -19,7 +11,6 @@ from PIL import Image
 import torch
 import math
 import base64
-from .util import tensor_to_bytesio, bytesio_to_image_tensor
 from io import BytesIO
 
 
@@ -148,11 +139,6 @@ async def download_url_to_bytesio(
             return BytesIO(await resp.read())
 
 
-def process_image_response(response_content: bytes | str) -> torch.Tensor:
-    """Uses content from a Response object and converts it to a torch.Tensor"""
-    return bytesio_to_image_tensor(BytesIO(response_content))
-
-
 def text_filepath_to_base64_string(filepath: str) -> str:
     """Converts a text file to a base64 string."""
     with open(filepath, "rb") as f:
@@ -169,73 +155,6 @@ def text_filepath_to_data_uri(filepath: str) -> str:
     return f"data:{mime_type};base64,{base64_string}"
 
 
-async def upload_file_to_comfyapi(
-    file_bytes_io: BytesIO,
-    filename: str,
-    upload_mime_type: Optional[str],
-    auth_kwargs: Optional[dict[str, str]] = None,
-) -> str:
-    """
-    Uploads a single file to ComfyUI API and returns its download URL.
-
-    Args:
-        file_bytes_io: BytesIO object containing the file data.
-        filename: The filename of the file.
-        upload_mime_type: MIME type of the file.
-        auth_kwargs: Optional authentication token(s).
-
-    Returns:
-        The download URL for the uploaded file.
-    """
-    if upload_mime_type is None:
-        request_object = UploadRequest(file_name=filename)
-    else:
-        request_object = UploadRequest(file_name=filename, content_type=upload_mime_type)
-    operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path="/customers/storage",
-            method=HttpMethod.POST,
-            request_model=UploadRequest,
-            response_model=UploadResponse,
-        ),
-        request=request_object,
-        auth_kwargs=auth_kwargs,
-    )
-
-    response: UploadResponse = await operation.execute()
-    await ApiClient.upload_file(response.upload_url, file_bytes_io, content_type=upload_mime_type)
-    return response.download_url
-
-
-async def upload_images_to_comfyapi(
-    image: torch.Tensor,
-    max_images=8,
-    auth_kwargs: Optional[dict[str, str]] = None,
-    mime_type: Optional[str] = None,
-) -> list[str]:
-    """
-    Uploads images to ComfyUI API and returns download URLs.
-    To upload multiple images, stack them in the batch dimension first.
-
-    Args:
-        image: Input torch.Tensor image.
-        max_images: Maximum number of images to upload.
-        auth_kwargs: Optional authentication token(s).
-        mime_type: Optional MIME type for the image.
-    """
-    # if batch, try to upload each file if max_images is greater than 0
-    download_urls: list[str] = []
-    is_batch = len(image.shape) > 3
-    batch_len = image.shape[0] if is_batch else 1
-
-    for idx in range(min(batch_len, max_images)):
-        tensor = image[idx] if is_batch else image
-        img_io = tensor_to_bytesio(tensor, mime_type=mime_type)
-        url = await upload_file_to_comfyapi(img_io, img_io.name, mime_type, auth_kwargs)
-        download_urls.append(url)
-    return download_urls
-
-
 def resize_mask_to_image(
     mask: torch.Tensor,
     image: torch.Tensor,
diff --git a/comfy_api_nodes/apis/minimax_api.py b/comfy_api_nodes/apis/minimax_api.py
new file mode 100644
index 000000000..d747e177a
--- /dev/null
+++ b/comfy_api_nodes/apis/minimax_api.py
@@ -0,0 +1,120 @@
+from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class MinimaxBaseResponse(BaseModel):
+    status_code: int = Field(
+        ...,
+        description='Status code. 0 indicates success, other values indicate errors.',
+    )
+    status_msg: str = Field(
+        ..., description='Specific error details or success message.'
+    )
+
+
+class File(BaseModel):
+    bytes: Optional[int] = Field(None, description='File size in bytes')
+    created_at: Optional[int] = Field(
+        None, description='Unix timestamp when the file was created, in seconds'
+    )
+    download_url: Optional[str] = Field(
+        None, description='The URL to download the video'
+    )
+    backup_download_url: Optional[str] = Field(
+        None, description='The backup URL to download the video'
+    )
+
+    file_id: Optional[int] = Field(None, description='Unique identifier for the file')
+    filename: Optional[str] = Field(None, description='The name of the file')
+    purpose: Optional[str] = Field(None, description='The purpose of using the file')
+
+
+class MinimaxFileRetrieveResponse(BaseModel):
+    base_resp: MinimaxBaseResponse
+    file: File
+
+
+class MiniMaxModel(str, Enum):
+    T2V_01_Director = 'T2V-01-Director'
+    I2V_01_Director = 'I2V-01-Director'
+    S2V_01 = 'S2V-01'
+    I2V_01 = 'I2V-01'
+    I2V_01_live = 'I2V-01-live'
+    T2V_01 = 'T2V-01'
+    Hailuo_02 = 'MiniMax-Hailuo-02'
+
+
+class Status6(str, Enum):
+    Queueing = 'Queueing'
+    Preparing = 'Preparing'
+    Processing = 'Processing'
+    Success = 'Success'
+    Fail = 'Fail'
+
+
+class MinimaxTaskResultResponse(BaseModel):
+    base_resp: MinimaxBaseResponse
+    file_id: Optional[str] = Field(
+        None,
+        description='After the task status changes to Success, this field returns the file ID corresponding to the generated video.',
+    )
+    status: Status6 = Field(
+        ...,
+        description="Task status: 'Queueing' (in queue), 'Preparing' (task is preparing), 'Processing' (generating), 'Success' (task completed successfully), or 'Fail' (task failed).",
+    )
+    task_id: str = Field(..., description='The task ID being queried.')
+
+
+class SubjectReferenceItem(BaseModel):
+    image: Optional[str] = Field(
+        None, description='URL or base64 encoding of the subject reference image.'
+    )
+    mask: Optional[str] = Field(
+        None,
+        description='URL or base64 encoding of the mask for the subject reference image.',
+    )
+
+
+class MinimaxVideoGenerationRequest(BaseModel):
+    callback_url: Optional[str] = Field(
+        None,
+        description='Optional. URL to receive real-time status updates about the video generation task.',
+    )
+    first_frame_image: Optional[str] = Field(
+        None,
+        description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.',
+    )
+    model: MiniMaxModel = Field(
+        ...,
+        description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01',
+    )
+    prompt: Optional[str] = Field(
+        None,
+        description='Description of the video. Should be less than 2000 characters. Supports camera movement instructions in [brackets].',
+        max_length=2000,
+    )
+    prompt_optimizer: Optional[bool] = Field(
+        True,
+        description='If true (default), the model will automatically optimize the prompt. Set to false for more precise control.',
+    )
+    subject_reference: Optional[list[SubjectReferenceItem]] = Field(
+        None,
+        description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.',
+    )
+    duration: Optional[int] = Field(
+        None,
+        description="The length of the output video in seconds."
+    )
+    resolution: Optional[str] = Field(
+        None,
+        description="The dimensions of the video display. 1080p corresponds to 1920 x 1080 pixels, 768p corresponds to 1366 x 768 pixels."
+    )
+
+
+class MinimaxVideoGenerationResponse(BaseModel):
+    base_resp: MinimaxBaseResponse
+    task_id: str = Field(
+        ..., description='The task ID for the asynchronous video generation task.'
+    )
diff --git a/comfy_api_nodes/nodes_ideogram.py b/comfy_api_nodes/nodes_ideogram.py
index 9eae5f11a..d8fd3378b 100644
--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@@ -20,9 +20,9 @@ from comfy_api_nodes.apis.client import (
 
 from comfy_api_nodes.apinode_utils import (
     download_url_to_bytesio,
-    bytesio_to_image_tensor,
     resize_mask_to_image,
 )
+from comfy_api_nodes.util import bytesio_to_image_tensor
 from server import PromptServer
 
 V1_V1_RES_MAP = {
diff --git a/comfy_api_nodes/nodes_luma.py b/comfy_api_nodes/nodes_luma.py
index e74441e5e..894f2b08c 100644
--- a/comfy_api_nodes/nodes_luma.py
+++ b/comfy_api_nodes/nodes_luma.py
@@ -1,69 +1,51 @@
-from __future__ import annotations
-from inspect import cleandoc
 from typing import Optional
+
+import torch
 from typing_extensions import override
-from comfy_api.latest import ComfyExtension, IO
-from comfy_api.input_impl.video_types import VideoFromFile
+
+from comfy_api.latest import IO, ComfyExtension
 from comfy_api_nodes.apis.luma_api import (
-    LumaImageModel,
-    LumaVideoModel,
-    LumaVideoOutputResolution,
-    LumaVideoModelOutputDuration,
     LumaAspectRatio,
-    LumaState,
-    LumaImageGenerationRequest,
-    LumaGenerationRequest,
-    LumaGeneration,
     LumaCharacterRef,
-    LumaModifyImageRef,
+    LumaConceptChain,
+    LumaGeneration,
+    LumaGenerationRequest,
+    LumaImageGenerationRequest,
     LumaImageIdentity,
+    LumaImageModel,
+    LumaImageReference,
+    LumaIO,
+    LumaKeyframes,
+    LumaModifyImageRef,
     LumaReference,
     LumaReferenceChain,
-    LumaImageReference,
-    LumaKeyframes,
-    LumaConceptChain,
-    LumaIO,
+    LumaVideoModel,
+    LumaVideoModelOutputDuration,
+    LumaVideoOutputResolution,
     get_luma_concepts,
 )
-from comfy_api_nodes.apis.client import (
+from comfy_api_nodes.util import (
     ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    PollingOperation,
-    EmptyRequest,
-)
-from comfy_api_nodes.apinode_utils import (
+    download_url_to_image_tensor,
+    download_url_to_video_output,
+    poll_op,
+    sync_op,
     upload_images_to_comfyapi,
-    process_image_response,
+    validate_string,
 )
-from server import PromptServer
-from comfy_api_nodes.util import validate_string
-
-import aiohttp
-import torch
-from io import BytesIO
 
 LUMA_T2V_AVERAGE_DURATION = 105
 LUMA_I2V_AVERAGE_DURATION = 100
 
-def image_result_url_extractor(response: LumaGeneration):
-    return response.assets.image if hasattr(response, "assets") and hasattr(response.assets, "image") else None
-
-def video_result_url_extractor(response: LumaGeneration):
-    return response.assets.video if hasattr(response, "assets") and hasattr(response.assets, "video") else None
 
 class LumaReferenceNode(IO.ComfyNode):
-    """
-    Holds an image and weight for use with Luma Generate Image node.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaReferenceNode",
             display_name="Luma Reference",
             category="api node/image/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Holds an image and weight for use with Luma Generate Image node.",
             inputs=[
                 IO.Image.Input(
                     "image",
@@ -83,17 +65,10 @@ class LumaReferenceNode(IO.ComfyNode):
                 ),
             ],
             outputs=[IO.Custom(LumaIO.LUMA_REF).Output(display_name="luma_ref")],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
         )
 
     @classmethod
-    def execute(
-        cls, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None
-    ) -> IO.NodeOutput:
+    def execute(cls, image: torch.Tensor, weight: float, luma_ref: LumaReferenceChain = None) -> IO.NodeOutput:
         if luma_ref is not None:
             luma_ref = luma_ref.clone()
         else:
@@ -103,17 +78,13 @@ class LumaReferenceNode(IO.ComfyNode):
 
 
 class LumaConceptsNode(IO.ComfyNode):
-    """
-    Holds one or more Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaConceptsNode",
             display_name="Luma Concepts",
             category="api node/video/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes.",
             inputs=[
                 IO.Combo.Input(
                     "concept1",
@@ -138,11 +109,6 @@ class LumaConceptsNode(IO.ComfyNode):
                 ),
             ],
             outputs=[IO.Custom(LumaIO.LUMA_CONCEPTS).Output(display_name="luma_concepts")],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
         )
 
     @classmethod
@@ -161,17 +127,13 @@ class LumaConceptsNode(IO.ComfyNode):
 
 
 class LumaImageGenerationNode(IO.ComfyNode):
-    """
-    Generates images synchronously based on prompt and aspect ratio.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaImageNode",
             display_name="Luma Text to Image",
             category="api node/image/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates images synchronously based on prompt and aspect ratio.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -237,45 +199,30 @@ class LumaImageGenerationNode(IO.ComfyNode):
         aspect_ratio: str,
         seed,
         style_image_weight: float,
-        image_luma_ref: LumaReferenceChain = None,
-        style_image: torch.Tensor = None,
-        character_image: torch.Tensor = None,
+        image_luma_ref: Optional[LumaReferenceChain] = None,
+        style_image: Optional[torch.Tensor] = None,
+        character_image: Optional[torch.Tensor] = None,
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=True, min_length=3)
-        auth_kwargs = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
         # handle image_luma_ref
         api_image_ref = None
         if image_luma_ref is not None:
-            api_image_ref = await cls._convert_luma_refs(
-                image_luma_ref, max_refs=4, auth_kwargs=auth_kwargs,
-            )
+            api_image_ref = await cls._convert_luma_refs(image_luma_ref, max_refs=4)
         # handle style_luma_ref
         api_style_ref = None
         if style_image is not None:
-            api_style_ref = await cls._convert_style_image(
-                style_image, weight=style_image_weight, auth_kwargs=auth_kwargs,
-            )
+            api_style_ref = await cls._convert_style_image(style_image, weight=style_image_weight)
         # handle character_ref images
         character_ref = None
         if character_image is not None:
-            download_urls = await upload_images_to_comfyapi(
-                character_image, max_images=4, auth_kwargs=auth_kwargs,
-            )
-            character_ref = LumaCharacterRef(
-                identity0=LumaImageIdentity(images=download_urls)
-            )
+            download_urls = await upload_images_to_comfyapi(cls, character_image, max_images=4)
+            character_ref = LumaCharacterRef(identity0=LumaImageIdentity(images=download_urls))
 
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/luma/generations/image",
-                method=HttpMethod.POST,
-                request_model=LumaImageGenerationRequest,
-                response_model=LumaGeneration,
-            ),
-            request=LumaImageGenerationRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/luma/generations/image", method="POST"),
+            response_model=LumaGeneration,
+            data=LumaImageGenerationRequest(
                 prompt=prompt,
                 model=model,
                 aspect_ratio=aspect_ratio,
@@ -283,41 +230,21 @@ class LumaImageGenerationNode(IO.ComfyNode):
                 style_ref=api_style_ref,
                 character_ref=character_ref,
             ),
-            auth_kwargs=auth_kwargs,
         )
-        response_api: LumaGeneration = await operation.execute()
-
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/luma/generations/{response_api.id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=LumaGeneration,
-            ),
-            completed_statuses=[LumaState.completed],
-            failed_statuses=[LumaState.failed],
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/luma/generations/{response_api.id}"),
+            response_model=LumaGeneration,
             status_extractor=lambda x: x.state,
-            result_url_extractor=image_result_url_extractor,
-            node_id=cls.hidden.unique_id,
-            auth_kwargs=auth_kwargs,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.assets.image) as img_response:
-                img = process_image_response(await img_response.content.read())
-        return IO.NodeOutput(img)
+        return IO.NodeOutput(await download_url_to_image_tensor(response_poll.assets.image))
 
     @classmethod
-    async def _convert_luma_refs(
-        cls, luma_ref: LumaReferenceChain, max_refs: int, auth_kwargs: Optional[dict[str,str]] = None
-    ):
+    async def _convert_luma_refs(cls, luma_ref: LumaReferenceChain, max_refs: int):
         luma_urls = []
         ref_count = 0
         for ref in luma_ref.refs:
-            download_urls = await upload_images_to_comfyapi(
-                ref.image, max_images=1, auth_kwargs=auth_kwargs
-            )
+            download_urls = await upload_images_to_comfyapi(cls, ref.image, max_images=1)
             luma_urls.append(download_urls[0])
             ref_count += 1
             if ref_count >= max_refs:
@@ -325,27 +252,19 @@ class LumaImageGenerationNode(IO.ComfyNode):
         return luma_ref.create_api_model(download_urls=luma_urls, max_refs=max_refs)
 
     @classmethod
-    async def _convert_style_image(
-        cls, style_image: torch.Tensor, weight: float, auth_kwargs: Optional[dict[str,str]] = None
-    ):
-        chain = LumaReferenceChain(
-            first_ref=LumaReference(image=style_image, weight=weight)
-        )
-        return await cls._convert_luma_refs(chain, max_refs=1, auth_kwargs=auth_kwargs)
+    async def _convert_style_image(cls, style_image: torch.Tensor, weight: float):
+        chain = LumaReferenceChain(first_ref=LumaReference(image=style_image, weight=weight))
+        return await cls._convert_luma_refs(chain, max_refs=1)
 
 
 class LumaImageModifyNode(IO.ComfyNode):
-    """
-    Modifies images synchronously based on prompt and aspect ratio.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaImageModifyNode",
             display_name="Luma Image to Image",
             category="api node/image/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Modifies images synchronously based on prompt and aspect ratio.",
             inputs=[
                 IO.Image.Input(
                     "image",
@@ -395,68 +314,37 @@ class LumaImageModifyNode(IO.ComfyNode):
         image_weight: float,
         seed,
     ) -> IO.NodeOutput:
-        auth_kwargs = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        # first, upload image
-        download_urls = await upload_images_to_comfyapi(
-            image, max_images=1, auth_kwargs=auth_kwargs,
-        )
+        download_urls = await upload_images_to_comfyapi(cls, image, max_images=1)
         image_url = download_urls[0]
-        # next, make Luma call with download url provided
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/luma/generations/image",
-                method=HttpMethod.POST,
-                request_model=LumaImageGenerationRequest,
-                response_model=LumaGeneration,
-            ),
-            request=LumaImageGenerationRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/luma/generations/image", method="POST"),
+            response_model=LumaGeneration,
+            data=LumaImageGenerationRequest(
                 prompt=prompt,
                 model=model,
                 modify_image_ref=LumaModifyImageRef(
-                    url=image_url, weight=round(max(min(1.0-image_weight, 0.98), 0.0), 2)
+                    url=image_url, weight=round(max(min(1.0 - image_weight, 0.98), 0.0), 2)
                 ),
             ),
-            auth_kwargs=auth_kwargs,
         )
-        response_api: LumaGeneration = await operation.execute()
-
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/luma/generations/{response_api.id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=LumaGeneration,
-            ),
-            completed_statuses=[LumaState.completed],
-            failed_statuses=[LumaState.failed],
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/luma/generations/{response_api.id}"),
+            response_model=LumaGeneration,
             status_extractor=lambda x: x.state,
-            result_url_extractor=image_result_url_extractor,
-            node_id=cls.hidden.unique_id,
-            auth_kwargs=auth_kwargs,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.assets.image) as img_response:
-                img = process_image_response(await img_response.content.read())
-        return IO.NodeOutput(img)
+        return IO.NodeOutput(await download_url_to_image_tensor(response_poll.assets.image))
 
 
 class LumaTextToVideoGenerationNode(IO.ComfyNode):
-    """
-    Generates videos synchronously based on prompt and output_size.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaVideoNode",
             display_name="Luma Text to Video",
             category="api node/video/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos synchronously based on prompt and output_size.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -498,7 +386,7 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode):
                     "luma_concepts",
                     tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
                     optional=True,
-                )
+                ),
             ],
             outputs=[IO.Video.Output()],
             hidden=[
@@ -519,24 +407,17 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode):
         duration: str,
         loop: bool,
         seed,
-        luma_concepts: LumaConceptChain = None,
+        luma_concepts: Optional[LumaConceptChain] = None,
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False, min_length=3)
         duration = duration if model != LumaVideoModel.ray_1_6 else None
         resolution = resolution if model != LumaVideoModel.ray_1_6 else None
 
-        auth_kwargs = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/luma/generations",
-                method=HttpMethod.POST,
-                request_model=LumaGenerationRequest,
-                response_model=LumaGeneration,
-            ),
-            request=LumaGenerationRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/luma/generations", method="POST"),
+            response_model=LumaGeneration,
+            data=LumaGenerationRequest(
                 prompt=prompt,
                 model=model,
                 resolution=resolution,
@@ -545,47 +426,25 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode):
                 loop=loop,
                 concepts=luma_concepts.create_api_model() if luma_concepts else None,
             ),
-            auth_kwargs=auth_kwargs,
         )
-        response_api: LumaGeneration = await operation.execute()
-
-        if cls.hidden.unique_id:
-            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", cls.hidden.unique_id)
-
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/luma/generations/{response_api.id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=LumaGeneration,
-            ),
-            completed_statuses=[LumaState.completed],
-            failed_statuses=[LumaState.failed],
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/luma/generations/{response_api.id}"),
+            response_model=LumaGeneration,
             status_extractor=lambda x: x.state,
-            result_url_extractor=video_result_url_extractor,
-            node_id=cls.hidden.unique_id,
             estimated_duration=LUMA_T2V_AVERAGE_DURATION,
-            auth_kwargs=auth_kwargs,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.assets.video) as vid_response:
-                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
+        return IO.NodeOutput(await download_url_to_video_output(response_poll.assets.video))
 
 
 class LumaImageToVideoGenerationNode(IO.ComfyNode):
-    """
-    Generates videos synchronously based on prompt, input images, and output_size.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="LumaImageToVideoNode",
             display_name="Luma Image to Video",
             category="api node/video/Luma",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos synchronously based on prompt, input images, and output_size.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -637,7 +496,7 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode):
                     "luma_concepts",
                     tooltip="Optional Camera Concepts to dictate camera motion via the Luma Concepts node.",
                     optional=True,
-                )
+                ),
             ],
             outputs=[IO.Video.Output()],
             hidden=[
@@ -662,25 +521,15 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode):
         luma_concepts: LumaConceptChain = None,
     ) -> IO.NodeOutput:
         if first_image is None and last_image is None:
-            raise Exception(
-                "At least one of first_image and last_image requires an input."
-            )
-        auth_kwargs = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        keyframes = await cls._convert_to_keyframes(first_image, last_image, auth_kwargs=auth_kwargs)
+            raise Exception("At least one of first_image and last_image requires an input.")
+        keyframes = await cls._convert_to_keyframes(first_image, last_image)
         duration = duration if model != LumaVideoModel.ray_1_6 else None
         resolution = resolution if model != LumaVideoModel.ray_1_6 else None
-
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/luma/generations",
-                method=HttpMethod.POST,
-                request_model=LumaGenerationRequest,
-                response_model=LumaGeneration,
-            ),
-            request=LumaGenerationRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/luma/generations", method="POST"),
+            response_model=LumaGeneration,
+            data=LumaGenerationRequest(
                 prompt=prompt,
                 model=model,
                 aspect_ratio=LumaAspectRatio.ratio_16_9,  # ignored, but still needed by the API for some reason
@@ -690,54 +539,31 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode):
                 keyframes=keyframes,
                 concepts=luma_concepts.create_api_model() if luma_concepts else None,
             ),
-            auth_kwargs=auth_kwargs,
         )
-        response_api: LumaGeneration = await operation.execute()
-
-        if cls.hidden.unique_id:
-            PromptServer.instance.send_progress_text(f"Luma video generation started: {response_api.id}", cls.hidden.unique_id)
-
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/luma/generations/{response_api.id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=LumaGeneration,
-            ),
-            completed_statuses=[LumaState.completed],
-            failed_statuses=[LumaState.failed],
+        response_poll = await poll_op(
+            cls,
+            poll_endpoint=ApiEndpoint(path=f"/proxy/luma/generations/{response_api.id}"),
+            response_model=LumaGeneration,
             status_extractor=lambda x: x.state,
-            result_url_extractor=video_result_url_extractor,
-            node_id=cls.hidden.unique_id,
             estimated_duration=LUMA_I2V_AVERAGE_DURATION,
-            auth_kwargs=auth_kwargs,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.assets.video) as vid_response:
-                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
+        return IO.NodeOutput(await download_url_to_video_output(response_poll.assets.video))
 
     @classmethod
     async def _convert_to_keyframes(
         cls,
         first_image: torch.Tensor = None,
         last_image: torch.Tensor = None,
-        auth_kwargs: Optional[dict[str,str]] = None,
     ):
         if first_image is None and last_image is None:
             return None
         frame0 = None
         frame1 = None
         if first_image is not None:
-            download_urls = await upload_images_to_comfyapi(
-                first_image, max_images=1, auth_kwargs=auth_kwargs,
-            )
+            download_urls = await upload_images_to_comfyapi(cls, first_image, max_images=1)
             frame0 = LumaImageReference(type="image", url=download_urls[0])
         if last_image is not None:
-            download_urls = await upload_images_to_comfyapi(
-                last_image, max_images=1, auth_kwargs=auth_kwargs,
-            )
+            download_urls = await upload_images_to_comfyapi(cls, last_image, max_images=1)
             frame1 = LumaImageReference(type="image", url=download_urls[0])
         return LumaKeyframes(frame0=frame0, frame1=frame1)
 
diff --git a/comfy_api_nodes/nodes_minimax.py b/comfy_api_nodes/nodes_minimax.py
index e3722e79b..05cbb700f 100644
--- a/comfy_api_nodes/nodes_minimax.py
+++ b/comfy_api_nodes/nodes_minimax.py
@@ -1,71 +1,57 @@
-from inspect import cleandoc
 from typing import Optional
-import logging
-import torch
 
+import torch
 from typing_extensions import override
-from comfy_api.latest import ComfyExtension, IO
-from comfy_api.input_impl.video_types import VideoFromFile
-from comfy_api_nodes.apis import (
+
+from comfy_api.latest import IO, ComfyExtension
+from comfy_api_nodes.apis.minimax_api import (
+    MinimaxFileRetrieveResponse,
+    MiniMaxModel,
+    MinimaxTaskResultResponse,
     MinimaxVideoGenerationRequest,
     MinimaxVideoGenerationResponse,
-    MinimaxFileRetrieveResponse,
-    MinimaxTaskResultResponse,
     SubjectReferenceItem,
-    MiniMaxModel,
 )
-from comfy_api_nodes.apis.client import (
+from comfy_api_nodes.util import (
     ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    PollingOperation,
-    EmptyRequest,
-)
-from comfy_api_nodes.apinode_utils import (
-    download_url_to_bytesio,
+    download_url_to_video_output,
+    poll_op,
+    sync_op,
     upload_images_to_comfyapi,
+    validate_string,
 )
-from comfy_api_nodes.util import validate_string
-from server import PromptServer
-
 
 I2V_AVERAGE_DURATION = 114
 T2V_AVERAGE_DURATION = 234
 
 
 async def _generate_mm_video(
+    cls: type[IO.ComfyNode],
     *,
-    auth: dict[str, str],
-    node_id: str,
     prompt_text: str,
     seed: int,
     model: str,
-    image: Optional[torch.Tensor] = None,   # used for ImageToVideo
-    subject: Optional[torch.Tensor] = None, # used for SubjectToVideo
+    image: Optional[torch.Tensor] = None,  # used for ImageToVideo
+    subject: Optional[torch.Tensor] = None,  # used for SubjectToVideo
     average_duration: Optional[int] = None,
 ) -> IO.NodeOutput:
     if image is None:
         validate_string(prompt_text, field_name="prompt_text")
-    # upload image, if passed in
     image_url = None
     if image is not None:
-        image_url = (await upload_images_to_comfyapi(image, max_images=1, auth_kwargs=auth))[0]
+        image_url = (await upload_images_to_comfyapi(cls, image, max_images=1))[0]
 
     # TODO: figure out how to deal with subject properly, API returns invalid params when using S2V-01 model
     subject_reference = None
     if subject is not None:
-        subject_url = (await upload_images_to_comfyapi(subject, max_images=1, auth_kwargs=auth))[0]
+        subject_url = (await upload_images_to_comfyapi(cls, subject, max_images=1))[0]
         subject_reference = [SubjectReferenceItem(image=subject_url)]
 
-
-    video_generate_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path="/proxy/minimax/video_generation",
-            method=HttpMethod.POST,
-            request_model=MinimaxVideoGenerationRequest,
-            response_model=MinimaxVideoGenerationResponse,
-        ),
-        request=MinimaxVideoGenerationRequest(
+    response = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"),
+        response_model=MinimaxVideoGenerationResponse,
+        data=MinimaxVideoGenerationRequest(
             model=MiniMaxModel(model),
             prompt=prompt_text,
             callback_url=None,
@@ -73,81 +59,50 @@ async def _generate_mm_video(
             subject_reference=subject_reference,
             prompt_optimizer=None,
         ),
-        auth_kwargs=auth,
     )
-    response = await video_generate_operation.execute()
 
     task_id = response.task_id
     if not task_id:
         raise Exception(f"MiniMax generation failed: {response.base_resp}")
 
-    video_generate_operation = PollingOperation(
-        poll_endpoint=ApiEndpoint(
-            path="/proxy/minimax/query/video_generation",
-            method=HttpMethod.GET,
-            request_model=EmptyRequest,
-            response_model=MinimaxTaskResultResponse,
-            query_params={"task_id": task_id},
-        ),
-        completed_statuses=["Success"],
-        failed_statuses=["Fail"],
+    task_result = await poll_op(
+        cls,
+        ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}),
+        response_model=MinimaxTaskResultResponse,
         status_extractor=lambda x: x.status.value,
         estimated_duration=average_duration,
-        node_id=node_id,
-        auth_kwargs=auth,
     )
-    task_result = await video_generate_operation.execute()
 
     file_id = task_result.file_id
     if file_id is None:
         raise Exception("Request was not successful. Missing file ID.")
-    file_retrieve_operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path="/proxy/minimax/files/retrieve",
-            method=HttpMethod.GET,
-            request_model=EmptyRequest,
-            response_model=MinimaxFileRetrieveResponse,
-            query_params={"file_id": int(file_id)},
-        ),
-        request=EmptyRequest(),
-        auth_kwargs=auth,
+    file_result = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}),
+        response_model=MinimaxFileRetrieveResponse,
     )
-    file_result = await file_retrieve_operation.execute()
 
     file_url = file_result.file.download_url
     if file_url is None:
-        raise Exception(
-            f"No video was found in the response. Full response: {file_result.model_dump()}"
-        )
-    logging.info("Generated video URL: %s", file_url)
-    if node_id:
-        if hasattr(file_result.file, "backup_download_url"):
-            message = f"Result URL: {file_url}\nBackup URL: {file_result.file.backup_download_url}"
-        else:
-            message = f"Result URL: {file_url}"
-        PromptServer.instance.send_progress_text(message, node_id)
-
-    # Download and return as VideoFromFile
-    video_io = await download_url_to_bytesio(file_url)
-    if video_io is None:
-        error_msg = f"Failed to download video from {file_url}"
-        logging.error(error_msg)
-        raise Exception(error_msg)
-    return IO.NodeOutput(VideoFromFile(video_io))
+        raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}")
+    if file_result.file.backup_download_url:
+        try:
+            return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2))
+        except Exception:  # if we have a second URL to retrieve the result, try again using that one
+            return IO.NodeOutput(
+                await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3)
+            )
+    return IO.NodeOutput(await download_url_to_video_output(file_url))
 
 
 class MinimaxTextToVideoNode(IO.ComfyNode):
-    """
-    Generates videos synchronously based on a prompt, and optional parameters using MiniMax's API.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="MinimaxTextToVideoNode",
             display_name="MiniMax Text to Video",
             category="api node/video/MiniMax",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos synchronously based on a prompt, and optional parameters.",
             inputs=[
                 IO.String.Input(
                     "prompt_text",
@@ -189,11 +144,7 @@ class MinimaxTextToVideoNode(IO.ComfyNode):
         seed: int = 0,
     ) -> IO.NodeOutput:
         return await _generate_mm_video(
-            auth={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
             prompt_text=prompt_text,
             seed=seed,
             model=model,
@@ -204,17 +155,13 @@ class MinimaxTextToVideoNode(IO.ComfyNode):
 
 
 class MinimaxImageToVideoNode(IO.ComfyNode):
-    """
-    Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="MinimaxImageToVideoNode",
             display_name="MiniMax Image to Video",
             category="api node/video/MiniMax",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos synchronously based on an image and prompt, and optional parameters.",
             inputs=[
                 IO.Image.Input(
                     "image",
@@ -261,11 +208,7 @@ class MinimaxImageToVideoNode(IO.ComfyNode):
         seed: int = 0,
     ) -> IO.NodeOutput:
         return await _generate_mm_video(
-            auth={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
             prompt_text=prompt_text,
             seed=seed,
             model=model,
@@ -276,17 +219,13 @@ class MinimaxImageToVideoNode(IO.ComfyNode):
 
 
 class MinimaxSubjectToVideoNode(IO.ComfyNode):
-    """
-    Generates videos synchronously based on an image and prompt, and optional parameters using MiniMax's API.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="MinimaxSubjectToVideoNode",
             display_name="MiniMax Subject to Video",
             category="api node/video/MiniMax",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos synchronously based on an image and prompt, and optional parameters.",
             inputs=[
                 IO.Image.Input(
                     "subject",
@@ -333,11 +272,7 @@ class MinimaxSubjectToVideoNode(IO.ComfyNode):
         seed: int = 0,
     ) -> IO.NodeOutput:
         return await _generate_mm_video(
-            auth={
-                "auth_token": cls.hidden.auth_token_comfy_org,
-                "comfy_api_key": cls.hidden.api_key_comfy_org,
-            },
-            node_id=cls.hidden.unique_id,
+            cls,
             prompt_text=prompt_text,
             seed=seed,
             model=model,
@@ -348,15 +283,13 @@ class MinimaxSubjectToVideoNode(IO.ComfyNode):
 
 
 class MinimaxHailuoVideoNode(IO.ComfyNode):
-    """Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model."""
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="MinimaxHailuoVideoNode",
             display_name="MiniMax Hailuo Video",
             category="api node/video/MiniMax",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model.",
             inputs=[
                 IO.String.Input(
                     "prompt_text",
@@ -420,10 +353,6 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
         resolution: str = "768P",
         model: str = "MiniMax-Hailuo-02",
     ) -> IO.NodeOutput:
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
         if first_frame_image is None:
             validate_string(prompt_text, field_name="prompt_text")
 
@@ -435,16 +364,13 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
         # upload image, if passed in
         image_url = None
         if first_frame_image is not None:
-            image_url = (await upload_images_to_comfyapi(first_frame_image, max_images=1, auth_kwargs=auth))[0]
+            image_url = (await upload_images_to_comfyapi(cls, first_frame_image, max_images=1))[0]
 
-        video_generate_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/minimax/video_generation",
-                method=HttpMethod.POST,
-                request_model=MinimaxVideoGenerationRequest,
-                response_model=MinimaxVideoGenerationResponse,
-            ),
-            request=MinimaxVideoGenerationRequest(
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/minimax/video_generation", method="POST"),
+            response_model=MinimaxVideoGenerationResponse,
+            data=MinimaxVideoGenerationRequest(
                 model=MiniMaxModel(model),
                 prompt=prompt_text,
                 callback_url=None,
@@ -453,67 +379,42 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
                 duration=duration,
                 resolution=resolution,
             ),
-            auth_kwargs=auth,
         )
-        response = await video_generate_operation.execute()
 
         task_id = response.task_id
         if not task_id:
             raise Exception(f"MiniMax generation failed: {response.base_resp}")
 
         average_duration = 120 if resolution == "768P" else 240
-        video_generate_operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path="/proxy/minimax/query/video_generation",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=MinimaxTaskResultResponse,
-                query_params={"task_id": task_id},
-            ),
-            completed_statuses=["Success"],
-            failed_statuses=["Fail"],
+        task_result = await poll_op(
+            cls,
+            ApiEndpoint(path="/proxy/minimax/query/video_generation", query_params={"task_id": task_id}),
+            response_model=MinimaxTaskResultResponse,
             status_extractor=lambda x: x.status.value,
             estimated_duration=average_duration,
-            node_id=cls.hidden.unique_id,
-            auth_kwargs=auth,
         )
-        task_result = await video_generate_operation.execute()
 
         file_id = task_result.file_id
         if file_id is None:
             raise Exception("Request was not successful. Missing file ID.")
-        file_retrieve_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/minimax/files/retrieve",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=MinimaxFileRetrieveResponse,
-                query_params={"file_id": int(file_id)},
-            ),
-            request=EmptyRequest(),
-            auth_kwargs=auth,
+        file_result = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/minimax/files/retrieve", query_params={"file_id": int(file_id)}),
+            response_model=MinimaxFileRetrieveResponse,
         )
-        file_result = await file_retrieve_operation.execute()
 
         file_url = file_result.file.download_url
         if file_url is None:
-            raise Exception(
-                f"No video was found in the response. Full response: {file_result.model_dump()}"
-            )
-        logging.info("Generated video URL: %s", file_url)
-        if cls.hidden.unique_id:
-            if hasattr(file_result.file, "backup_download_url"):
-                message = f"Result URL: {file_url}\nBackup URL: {file_result.file.backup_download_url}"
-            else:
-                message = f"Result URL: {file_url}"
-            PromptServer.instance.send_progress_text(message, cls.hidden.unique_id)
+            raise Exception(f"No video was found in the response. Full response: {file_result.model_dump()}")
 
-        video_io = await download_url_to_bytesio(file_url)
-        if video_io is None:
-            error_msg = f"Failed to download video from {file_url}"
-            logging.error(error_msg)
-            raise Exception(error_msg)
-        return IO.NodeOutput(VideoFromFile(video_io))
+        if file_result.file.backup_download_url:
+            try:
+                return IO.NodeOutput(await download_url_to_video_output(file_url, timeout=10, max_retries=2))
+            except Exception:  # if we have a second URL to retrieve the result, try again using that one
+                return IO.NodeOutput(
+                    await download_url_to_video_output(file_result.file.backup_download_url, max_retries=3)
+                )
+        return IO.NodeOutput(await download_url_to_video_output(file_url))
 
 
 class MinimaxExtension(ComfyExtension):
diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py
index 9c036d64b..9ae512fe5 100644
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -78,7 +78,7 @@ class _PollUIState:
 
 _RETRY_STATUS = {408, 429, 500, 502, 503, 504}
 COMPLETED_STATUSES = ["succeeded", "succeed", "success", "completed"]
-FAILED_STATUSES = ["cancelled", "canceled", "failed", "error"]
+FAILED_STATUSES = ["cancelled", "canceled", "fail", "failed", "error"]
 QUEUED_STATUSES = ["created", "queued", "queueing", "submitted"]
 
 
diff --git a/comfy_api_nodes/util/download_helpers.py b/comfy_api_nodes/util/download_helpers.py
index f89045e12..364874bed 100644
--- a/comfy_api_nodes/util/download_helpers.py
+++ b/comfy_api_nodes/util/download_helpers.py
@@ -232,11 +232,12 @@ async def download_url_to_video_output(
     video_url: str,
     *,
     timeout: float = None,
+    max_retries: int = 5,
     cls: type[COMFY_IO.ComfyNode] = None,
 ) -> VideoFromFile:
     """Downloads a video from a URL and returns a `VIDEO` output."""
     result = BytesIO()
-    await download_url_to_bytesio(video_url, result, timeout=timeout, cls=cls)
+    await download_url_to_bytesio(video_url, result, timeout=timeout, max_retries=max_retries, cls=cls)
     return VideoFromFile(result)
 
 

From 1a58087ac2eb64be3645934d0025aafaa5bdce38 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Oct 2025 12:43:51 -0700
Subject: [PATCH 08/20] Reduce memory usage for fp8 scaled op. (#10531)

---
 comfy/quant_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index b14e03084..fb35a0d40 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -358,7 +358,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
         lp_amax = torch.finfo(dtype).max
-        tensor_scaled = tensor.float() / scale
+        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
         torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
         qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
 

From ec4fc2a09a390d0d81500c51fb9e4d8a7a5ce1fc Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Oct 2025 12:48:06 -0700
Subject: [PATCH 09/20] Fix case of weights not being unpinned. (#10533)

---
 comfy/model_patcher.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index aec73349c..119119e96 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -1283,5 +1283,6 @@ class ModelPatcher:
         self.clear_cached_hook_weights()
 
     def __del__(self):
+        self.unpin_all_weights()
         self.detach(unpatch_all=False)
 

From ab7ab5be23fb9b71d1790f424e7dcf91dc1fe0cc Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 30 Oct 2025 07:17:46 +1000
Subject: [PATCH 10/20] Fix Race condition in --async-offload that can cause
 corruption (#10501)

* mm: factor out the current stream getter

Make this a reusable function.

* ops: sync the offload stream with the consumption of w&b

This sync is nessacary as pytorch will queue cuda async frees on the
same stream as created to tensor. In the case of async offload, this
will be on the offload stream.

Weights and biases can go out of scope in python which then
triggers the pytorch garbage collector to queue the free operation on
the offload stream possible before the compute stream has used the
weight. This causes a use after free on weight data leading to total
corruption of some workflows.

So sync the offload stream with the compute stream after the weight
has been used so the free has to wait for the weight to be used.

The cast_bias_weight is extended in a backwards compatible way with
the new behaviour opt-in on a defaulted parameter. This handles
custom node packs calling cast_bias_weight and defeatures
async-offload for them (as they do not handle the race).

The pattern is now:

cast_bias_weight(... , offloadable=True) #This might be offloaded
thing(weight, bias, ...)
uncast_bias_weight(...)

* controlnet: adopt new cast_bias_weight synchronization scheme

This is nessacary for safe async weight offloading.

* mm: sync the last stream in the queue, not the next

Currently this peeks ahead to sync the next stream in the queue of
streams with the compute stream. This doesnt allow a lot of
parallelization, as then end result is you can only get one weight load
ahead regardless of how many streams you have.

Rotate the loop logic here to synchronize the end of the queue before
returning the next stream. This allows weights to be loaded ahead of the
compute streams position.
---
 comfy/controlnet.py       |  17 +++---
 comfy/model_management.py |  28 +++++----
 comfy/ops.py              | 121 ++++++++++++++++++++++++++++----------
 3 files changed, 114 insertions(+), 52 deletions(-)

diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index f08ff4b36..0b5e30f52 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -310,11 +310,13 @@ class ControlLoraOps:
             self.bias = None
 
         def forward(self, input):
-            weight, bias = comfy.ops.cast_bias_weight(self, input)
+            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
             if self.up is not None:
-                return torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
+                x = torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
             else:
-                return torch.nn.functional.linear(input, weight, bias)
+                x = torch.nn.functional.linear(input, weight, bias)
+            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
     class Conv2d(torch.nn.Module, comfy.ops.CastWeightBiasOp):
         def __init__(
@@ -350,12 +352,13 @@ class ControlLoraOps:
 
 
         def forward(self, input):
-            weight, bias = comfy.ops.cast_bias_weight(self, input)
+            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
             if self.up is not None:
-                return torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
+                x = torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
             else:
-                return torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
-
+                x = torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
+            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
 class ControlLora(ControlNet):
     def __init__(self, control_weights, global_average_pooling=False, model_options={}): #TODO? model_options
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3e5b977d4..79c0dfdb4 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1013,6 +1013,16 @@ if args.async_offload:
     NUM_STREAMS = 2
     logging.info("Using async weight offloading with {} streams".format(NUM_STREAMS))
 
+def current_stream(device):
+    if device is None:
+        return None
+    if is_device_cuda(device):
+        return torch.cuda.current_stream()
+    elif is_device_xpu(device):
+        return torch.xpu.current_stream()
+    else:
+        return None
+
 stream_counters = {}
 def get_offload_stream(device):
     stream_counter = stream_counters.get(device, 0)
@@ -1021,21 +1031,17 @@ def get_offload_stream(device):
 
     if device in STREAMS:
         ss = STREAMS[device]
-        s = ss[stream_counter]
+        #Sync the oldest stream in the queue with the current
+        ss[stream_counter].wait_stream(current_stream(device))
         stream_counter = (stream_counter + 1) % len(ss)
-        if is_device_cuda(device):
-            ss[stream_counter].wait_stream(torch.cuda.current_stream())
-        elif is_device_xpu(device):
-            ss[stream_counter].wait_stream(torch.xpu.current_stream())
         stream_counters[device] = stream_counter
-        return s
+        return ss[stream_counter]
     elif is_device_cuda(device):
         ss = []
         for k in range(NUM_STREAMS):
             ss.append(torch.cuda.Stream(device=device, priority=0))
         STREAMS[device] = ss
         s = ss[stream_counter]
-        stream_counter = (stream_counter + 1) % len(ss)
         stream_counters[device] = stream_counter
         return s
     elif is_device_xpu(device):
@@ -1044,18 +1050,14 @@ def get_offload_stream(device):
             ss.append(torch.xpu.Stream(device=device, priority=0))
         STREAMS[device] = ss
         s = ss[stream_counter]
-        stream_counter = (stream_counter + 1) % len(ss)
         stream_counters[device] = stream_counter
         return s
     return None
 
 def sync_stream(device, stream):
-    if stream is None:
+    if stream is None or current_stream(device) is None:
         return
-    if is_device_cuda(device):
-        torch.cuda.current_stream().wait_stream(stream)
-    elif is_device_xpu(device):
-        torch.xpu.current_stream().wait_stream(stream)
+    current_stream(device).wait_stream(stream)
 
 def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None):
     if device is None or weight.device == device:
diff --git a/comfy/ops.py b/comfy/ops.py
index 93731eedf..71ca7a2bd 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -70,8 +70,12 @@ cast_to = comfy.model_management.cast_to #TODO: remove once no more references
 def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
+
 @torch.compiler.disable()
-def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
+def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False):
+    # NOTE: offloadable=False is a a legacy and if you are a custom node author reading this please pass
+    # offloadable=True and call uncast_bias_weight() after your last usage of the weight/bias. This
+    # will add async-offload support to your cast and improve performance.
     if input is not None:
         if dtype is None:
             dtype = input.dtype
@@ -80,7 +84,11 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
         if device is None:
             device = input.device
 
-    offload_stream = comfy.model_management.get_offload_stream(device)
+    if offloadable:
+        offload_stream = comfy.model_management.get_offload_stream(device)
+    else:
+        offload_stream = None
+
     if offload_stream is not None:
         wf_context = offload_stream
     else:
@@ -105,7 +113,24 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
                 weight = f(weight)
 
     comfy.model_management.sync_stream(device, offload_stream)
-    return weight, bias
+    if offloadable:
+        return weight, bias, offload_stream
+    else:
+        #Legacy function signature
+        return weight, bias
+
+
+def uncast_bias_weight(s, weight, bias, offload_stream):
+    if offload_stream is None:
+        return
+    if weight is not None:
+        device = weight.device
+    else:
+        if bias is None:
+            return
+        device = bias.device
+    offload_stream.wait_stream(comfy.model_management.current_stream(device))
+
 
 class CastWeightBiasOp:
     comfy_cast_weights = False
@@ -118,8 +143,10 @@ class disable_weight_init:
             return None
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.linear(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = torch.nn.functional.linear(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -133,8 +160,10 @@ class disable_weight_init:
             return None
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._conv_forward(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = self._conv_forward(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -148,8 +177,10 @@ class disable_weight_init:
             return None
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._conv_forward(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = self._conv_forward(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -172,8 +203,10 @@ class disable_weight_init:
                 return super()._conv_forward(input, weight, bias, *args, **kwargs)
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._conv_forward(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = self._conv_forward(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -187,8 +220,10 @@ class disable_weight_init:
             return None
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -203,11 +238,14 @@ class disable_weight_init:
 
         def forward_comfy_cast_weights(self, input):
             if self.weight is not None:
-                weight, bias = cast_bias_weight(self, input)
+                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
             else:
                 weight = None
                 bias = None
-            return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
+                offload_stream = None
+            x = torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -223,11 +261,15 @@ class disable_weight_init:
 
         def forward_comfy_cast_weights(self, input):
             if self.weight is not None:
-                weight, bias = cast_bias_weight(self, input)
+                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
             else:
                 weight = None
-            return comfy.rmsnorm.rms_norm(input, weight, self.eps)  # TODO: switch to commented out line when old torch is deprecated
-            # return torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)
+                bias = None
+                offload_stream = None
+            x = comfy.rmsnorm.rms_norm(input, weight, self.eps)  # TODO: switch to commented out line when old torch is deprecated
+            # x = torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -246,10 +288,12 @@ class disable_weight_init:
                 input, output_size, self.stride, self.padding, self.kernel_size,
                 num_spatial_dims, self.dilation)
 
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.conv_transpose2d(
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = torch.nn.functional.conv_transpose2d(
                 input, weight, bias, self.stride, self.padding,
                 output_padding, self.groups, self.dilation)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -268,10 +312,12 @@ class disable_weight_init:
                 input, output_size, self.stride, self.padding, self.kernel_size,
                 num_spatial_dims, self.dilation)
 
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.conv_transpose1d(
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = torch.nn.functional.conv_transpose1d(
                 input, weight, bias, self.stride, self.padding,
                 output_padding, self.groups, self.dilation)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -289,8 +335,11 @@ class disable_weight_init:
             output_dtype = out_dtype
             if self.weight.dtype == torch.float16 or self.weight.dtype == torch.bfloat16:
                 out_dtype = None
-            weight, bias = cast_bias_weight(self, device=input.device, dtype=out_dtype)
-            return torch.nn.functional.embedding(input, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse).to(dtype=output_dtype)
+            weight, bias, offload_stream = cast_bias_weight(self, device=input.device, dtype=out_dtype, offloadable=True)
+            x = torch.nn.functional.embedding(input, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse).to(dtype=output_dtype)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
+
 
         def forward(self, *args, **kwargs):
             run_every_op()
@@ -361,7 +410,7 @@ def fp8_linear(self, input):
     input_dtype = input.dtype
 
     if len(input.shape) == 3:
-        w, bias = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype)
+        w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True)
 
         scale_weight = self.scale_weight
         scale_input = self.scale_input
@@ -382,6 +431,8 @@ def fp8_linear(self, input):
         quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
         o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
 
+        uncast_bias_weight(self, w, bias, offload_stream)
+
         if tensor_2d:
             return o.reshape(input_shape[0], -1)
         return o.reshape((-1, input_shape[1], self.weight.shape[0]))
@@ -404,8 +455,10 @@ class fp8_ops(manual_cast):
                 except Exception as e:
                     logging.info("Exception during fp8 op: {}".format(e))
 
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.linear(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = torch.nn.functional.linear(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
 def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None):
     logging.info("Using scaled fp8: fp8 matrix mult: {}, scale input: {}".format(fp8_matrix_mult, scale_input))
@@ -433,12 +486,14 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None
                     if out is not None:
                         return out
 
-                weight, bias = cast_bias_weight(self, input)
+                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
 
                 if weight.numel() < input.numel(): #TODO: optimize
-                    return torch.nn.functional.linear(input, weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype), bias)
+                    x = torch.nn.functional.linear(input, weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype), bias)
                 else:
-                    return torch.nn.functional.linear(input * self.scale_weight.to(device=weight.device, dtype=weight.dtype), weight, bias)
+                    x = torch.nn.functional.linear(input * self.scale_weight.to(device=weight.device, dtype=weight.dtype), weight, bias)
+                uncast_bias_weight(self, weight, bias, offload_stream)
+                return x
 
             def convert_weight(self, weight, inplace=False, **kwargs):
                 if inplace:
@@ -577,8 +632,10 @@ class MixedPrecisionOps(disable_weight_init):
             return torch.nn.functional.linear(input, weight, bias)
 
         def forward_comfy_cast_weights(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._forward(input, weight, bias)
+            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+            x = self._forward(input, weight, bias)
+            uncast_bias_weight(self, weight, bias, offload_stream)
+            return x
 
         def forward(self, input, *args, **kwargs):
             run_every_op()

From 25de7b1bfa22dd98922f047a1342cc97f8e46c5b Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Oct 2025 14:20:27 -0700
Subject: [PATCH 11/20] Try to fix slow load issue on low ram hardware with
 pinned mem. (#10536)

---
 comfy/model_patcher.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 119119e96..74b9e48bc 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -658,6 +658,7 @@ class ModelPatcher:
             loading = self._load_list()
 
             load_completely = []
+            offloaded = []
             loading.sort(reverse=True)
             for x in loading:
                 n = x[1]
@@ -699,8 +700,7 @@ class ModelPatcher:
                             patch_counter += 1
 
                     cast_weight = True
-                    for param in params:
-                        self.pin_weight_to_device("{}.{}".format(n, param))
+                    offloaded.append((module_mem, n, m, params))
                 else:
                     if hasattr(m, "comfy_cast_weights"):
                         wipe_lowvram_weight(m)
@@ -741,6 +741,12 @@ class ModelPatcher:
             for x in load_completely:
                 x[2].to(device_to)
 
+            for x in offloaded:
+                n = x[1]
+                params = x[3]
+                for param in params:
+                    self.pin_weight_to_device("{}.{}".format(n, param))
+
             if lowvram_counter > 0:
                 logging.info("loaded partially {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), patch_counter))
                 self.model.model_lowvram = True

From 906c0899575a83ac69bb095e835fdec748891da4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Oct 2025 16:29:01 -0700
Subject: [PATCH 12/20] Fix small performance regression with fp8 fast and
 scaled fp8. (#10537)

---
 comfy/ops.py       | 6 +++++-
 comfy/quant_ops.py | 5 +++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index 71ca7a2bd..18f6b804b 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -421,14 +421,18 @@ def fp8_linear(self, input):
 
         if scale_input is None:
             scale_input = torch.ones((), device=input.device, dtype=torch.float32)
+            input = torch.clamp(input, min=-448, max=448, out=input)
+            input = input.reshape(-1, input_shape[2]).to(dtype).contiguous()
+            layout_params_weight = {'scale': scale_input, 'orig_dtype': input_dtype}
+            quantized_input = QuantizedTensor(input.reshape(-1, input_shape[2]).to(dtype).contiguous(), TensorCoreFP8Layout, layout_params_weight)
         else:
             scale_input = scale_input.to(input.device)
+            quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
 
         # Wrap weight in QuantizedTensor - this enables unified dispatch
         # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
         layout_params_weight = {'scale': scale_weight, 'orig_dtype': input_dtype}
         quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
-        quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
         o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
 
         uncast_bias_weight(self, w, bias, offload_stream)
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index fb35a0d40..c822fe53c 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -357,9 +357,10 @@ class TensorCoreFP8Layout(QuantizedLayout):
             scale = torch.tensor(scale)
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
-        lp_amax = torch.finfo(dtype).max
         tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
-        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
+        # TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
+        # lp_amax = torch.finfo(dtype).max
+        # torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
         qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
 
         layout_params = {

From 998bf60bebd03e57a55e106434657849342b733f Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Wed, 29 Oct 2025 16:37:06 -0700
Subject: [PATCH 13/20] Add units/info for the numbers displayed on 'load
 completely' and 'load partially' log messages (#10538)

---
 comfy/model_patcher.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 74b9e48bc..ed3f3f5cb 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -655,6 +655,7 @@ class ModelPatcher:
             mem_counter = 0
             patch_counter = 0
             lowvram_counter = 0
+            lowvram_mem_counter = 0
             loading = self._load_list()
 
             load_completely = []
@@ -675,6 +676,7 @@ class ModelPatcher:
                     if mem_counter + module_mem >= lowvram_model_memory:
                         lowvram_weight = True
                         lowvram_counter += 1
+                        lowvram_mem_counter += module_mem
                         if hasattr(m, "prev_comfy_cast_weights"): #Already lowvramed
                             continue
 
@@ -748,10 +750,10 @@ class ModelPatcher:
                     self.pin_weight_to_device("{}.{}".format(n, param))
 
             if lowvram_counter > 0:
-                logging.info("loaded partially {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), patch_counter))
+                logging.info("loaded partially; {:.2f} MB usable, {:.2f} MB loaded, {:.2f} MB offloaded, lowvram patches: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), lowvram_mem_counter / (1024 * 1024), patch_counter))
                 self.model.model_lowvram = True
             else:
-                logging.info("loaded completely {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
+                logging.info("loaded completely; {:.2f} MB usable, {:.2f} MB loaded, full load: {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), full_load))
                 self.model.model_lowvram = False
                 if full_load:
                     self.model.to(device_to)

From 163b629c70a349c7d1e91eebc5365713e770af8a Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 30 Oct 2025 08:49:03 +0200
Subject: [PATCH 14/20] use new API client in Pixverse and Ideogram nodes
 (#10543)

---
 comfy_api_nodes/apinode_utils.py         | 109 +------------
 comfy_api_nodes/nodes_bfl.py             |  43 +----
 comfy_api_nodes/nodes_bytedance.py       |  12 +-
 comfy_api_nodes/nodes_ideogram.py        | 137 ++++------------
 comfy_api_nodes/nodes_kling.py           |   2 +-
 comfy_api_nodes/nodes_pixverse.py        | 194 ++++++-----------------
 comfy_api_nodes/nodes_recraft.py         |   4 +-
 comfy_api_nodes/nodes_runway.py          |  10 +-
 comfy_api_nodes/nodes_vidu.py            |  12 +-
 comfy_api_nodes/util/__init__.py         |  10 +-
 comfy_api_nodes/util/conversions.py      |  21 +++
 comfy_api_nodes/util/validation_utils.py | 125 ++++++++++-----
 12 files changed, 220 insertions(+), 459 deletions(-)

diff --git a/comfy_api_nodes/apinode_utils.py b/comfy_api_nodes/apinode_utils.py
index 6a72b9d1d..ecd604ff8 100644
--- a/comfy_api_nodes/apinode_utils.py
+++ b/comfy_api_nodes/apinode_utils.py
@@ -1,15 +1,12 @@
 from __future__ import annotations
 import aiohttp
 import mimetypes
-from typing import Optional, Union
-from comfy.utils import common_upscale
+from typing import Union
 from server import PromptServer
-from comfy.cli_args import args
 
 import numpy as np
 from PIL import Image
 import torch
-import math
 import base64
 from io import BytesIO
 
@@ -60,85 +57,6 @@ async def validate_and_cast_response(
     return torch.stack(image_tensors, dim=0)
 
 
-def validate_aspect_ratio(
-    aspect_ratio: str,
-    minimum_ratio: float,
-    maximum_ratio: float,
-    minimum_ratio_str: str,
-    maximum_ratio_str: str,
-) -> float:
-    """Validates and casts an aspect ratio string to a float.
-
-    Args:
-        aspect_ratio: The aspect ratio string to validate.
-        minimum_ratio: The minimum aspect ratio.
-        maximum_ratio: The maximum aspect ratio.
-        minimum_ratio_str: The minimum aspect ratio string.
-        maximum_ratio_str: The maximum aspect ratio string.
-
-    Returns:
-        The validated and cast aspect ratio.
-
-    Raises:
-        Exception: If the aspect ratio is not valid.
-    """
-    # get ratio values
-    numbers = aspect_ratio.split(":")
-    if len(numbers) != 2:
-        raise TypeError(
-            f"Aspect ratio must be in the format X:Y, such as 16:9, but was {aspect_ratio}."
-        )
-    try:
-        numerator = int(numbers[0])
-        denominator = int(numbers[1])
-    except ValueError as exc:
-        raise TypeError(
-            f"Aspect ratio must contain numbers separated by ':', such as 16:9, but was {aspect_ratio}."
-        ) from exc
-    calculated_ratio = numerator / denominator
-    # if not close to minimum and maximum, check bounds
-    if not math.isclose(calculated_ratio, minimum_ratio) or not math.isclose(
-        calculated_ratio, maximum_ratio
-    ):
-        if calculated_ratio < minimum_ratio:
-            raise TypeError(
-                f"Aspect ratio cannot reduce to any less than {minimum_ratio_str} ({minimum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
-            )
-        if calculated_ratio > maximum_ratio:
-            raise TypeError(
-                f"Aspect ratio cannot reduce to any greater than {maximum_ratio_str} ({maximum_ratio}), but was {aspect_ratio} ({calculated_ratio})."
-            )
-    return aspect_ratio
-
-
-async def download_url_to_bytesio(
-    url: str, timeout: int = None, auth_kwargs: Optional[dict[str, str]] = None
-) -> BytesIO:
-    """Downloads content from a URL using requests and returns it as BytesIO.
-
-    Args:
-        url: The URL to download.
-        timeout: Request timeout in seconds. Defaults to None (no timeout).
-
-    Returns:
-        BytesIO object containing the downloaded content.
-    """
-    headers = {}
-    if url.startswith("/proxy/"):
-        url = str(args.comfy_api_base).rstrip("/") + url
-        auth_token = auth_kwargs.get("auth_token")
-        comfy_api_key = auth_kwargs.get("comfy_api_key")
-        if auth_token:
-            headers["Authorization"] = f"Bearer {auth_token}"
-        elif comfy_api_key:
-            headers["X-API-KEY"] = comfy_api_key
-    timeout_cfg = aiohttp.ClientTimeout(total=timeout) if timeout else None
-    async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
-        async with session.get(url, headers=headers) as resp:
-            resp.raise_for_status()  # Raises HTTPError for bad responses (4XX or 5XX)
-            return BytesIO(await resp.read())
-
-
 def text_filepath_to_base64_string(filepath: str) -> str:
     """Converts a text file to a base64 string."""
     with open(filepath, "rb") as f:
@@ -153,28 +71,3 @@ def text_filepath_to_data_uri(filepath: str) -> str:
     if mime_type is None:
         mime_type = "application/octet-stream"
     return f"data:{mime_type};base64,{base64_string}"
-
-
-def resize_mask_to_image(
-    mask: torch.Tensor,
-    image: torch.Tensor,
-    upscale_method="nearest-exact",
-    crop="disabled",
-    allow_gradient=True,
-    add_channel_dim=False,
-):
-    """
-    Resize mask to be the same dimensions as an image, while maintaining proper format for API calls.
-    """
-    _, H, W, _ = image.shape
-    mask = mask.unsqueeze(-1)
-    mask = mask.movedim(-1, 1)
-    mask = common_upscale(
-        mask, width=W, height=H, upscale_method=upscale_method, crop=crop
-    )
-    mask = mask.movedim(1, -1)
-    if not add_channel_dim:
-        mask = mask.squeeze(-1)
-    if not allow_gradient:
-        mask = (mask > 0.5).float()
-    return mask
diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py
index baa74fd52..1740fb377 100644
--- a/comfy_api_nodes/nodes_bfl.py
+++ b/comfy_api_nodes/nodes_bfl.py
@@ -5,10 +5,6 @@ import torch
 from typing_extensions import override
 
 from comfy_api.latest import IO, ComfyExtension
-from comfy_api_nodes.apinode_utils import (
-    resize_mask_to_image,
-    validate_aspect_ratio,
-)
 from comfy_api_nodes.apis.bfl_api import (
     BFLFluxExpandImageRequest,
     BFLFluxFillImageRequest,
@@ -23,8 +19,10 @@ from comfy_api_nodes.util import (
     ApiEndpoint,
     download_url_to_image_tensor,
     poll_op,
+    resize_mask_to_image,
     sync_op,
     tensor_to_base64_string,
+    validate_aspect_ratio_string,
     validate_string,
 )
 
@@ -43,11 +41,6 @@ class FluxProUltraImageNode(IO.ComfyNode):
     Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution.
     """
 
-    MINIMUM_RATIO = 1 / 4
-    MAXIMUM_RATIO = 4 / 1
-    MINIMUM_RATIO_STR = "1:4"
-    MAXIMUM_RATIO_STR = "4:1"
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
@@ -112,16 +105,7 @@ class FluxProUltraImageNode(IO.ComfyNode):
 
     @classmethod
     def validate_inputs(cls, aspect_ratio: str):
-        try:
-            validate_aspect_ratio(
-                aspect_ratio,
-                minimum_ratio=cls.MINIMUM_RATIO,
-                maximum_ratio=cls.MAXIMUM_RATIO,
-                minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-                maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-            )
-        except Exception as e:
-            return str(e)
+        validate_aspect_ratio_string(aspect_ratio, (1, 4), (4, 1))
         return True
 
     @classmethod
@@ -145,13 +129,7 @@ class FluxProUltraImageNode(IO.ComfyNode):
                 prompt=prompt,
                 prompt_upsampling=prompt_upsampling,
                 seed=seed,
-                aspect_ratio=validate_aspect_ratio(
-                    aspect_ratio,
-                    minimum_ratio=cls.MINIMUM_RATIO,
-                    maximum_ratio=cls.MAXIMUM_RATIO,
-                    minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-                    maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-                ),
+                aspect_ratio=aspect_ratio,
                 raw=raw,
                 image_prompt=(image_prompt if image_prompt is None else tensor_to_base64_string(image_prompt)),
                 image_prompt_strength=(None if image_prompt is None else round(image_prompt_strength, 2)),
@@ -180,11 +158,6 @@ class FluxKontextProImageNode(IO.ComfyNode):
     Edits images using Flux.1 Kontext [pro] via api based on prompt and aspect ratio.
     """
 
-    MINIMUM_RATIO = 1 / 4
-    MAXIMUM_RATIO = 4 / 1
-    MINIMUM_RATIO_STR = "1:4"
-    MAXIMUM_RATIO_STR = "4:1"
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
@@ -261,13 +234,7 @@ class FluxKontextProImageNode(IO.ComfyNode):
         seed=0,
         prompt_upsampling=False,
     ) -> IO.NodeOutput:
-        aspect_ratio = validate_aspect_ratio(
-            aspect_ratio,
-            minimum_ratio=cls.MINIMUM_RATIO,
-            maximum_ratio=cls.MAXIMUM_RATIO,
-            minimum_ratio_str=cls.MINIMUM_RATIO_STR,
-            maximum_ratio_str=cls.MAXIMUM_RATIO_STR,
-        )
+        validate_aspect_ratio_string(aspect_ratio, (1, 4), (4, 1))
         if input_image is None:
             validate_string(prompt, strip_whitespace=False)
         initial_response = await sync_op(
diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py
index 534af380d..caced471e 100644
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -17,7 +17,7 @@ from comfy_api_nodes.util import (
     poll_op,
     sync_op,
     upload_images_to_comfyapi,
-    validate_image_aspect_ratio_range,
+    validate_image_aspect_ratio,
     validate_image_dimensions,
     validate_string,
 )
@@ -403,7 +403,7 @@ class ByteDanceImageEditNode(IO.ComfyNode):
         validate_string(prompt, strip_whitespace=True, min_length=1)
         if get_number_of_images(image) != 1:
             raise ValueError("Exactly one input image is required.")
-        validate_image_aspect_ratio_range(image, (1, 3), (3, 1))
+        validate_image_aspect_ratio(image, (1, 3), (3, 1))
         source_url = (await upload_images_to_comfyapi(cls, image, max_images=1, mime_type="image/png"))[0]
         payload = Image2ImageTaskCreationRequest(
             model=model,
@@ -565,7 +565,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
         reference_images_urls = []
         if n_input_images:
             for i in image:
-                validate_image_aspect_ratio_range(i, (1, 3), (3, 1))
+                validate_image_aspect_ratio(i, (1, 3), (3, 1))
             reference_images_urls = await upload_images_to_comfyapi(
                 cls,
                 image,
@@ -798,7 +798,7 @@ class ByteDanceImageToVideoNode(IO.ComfyNode):
         validate_string(prompt, strip_whitespace=True, min_length=1)
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
         validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
-        validate_image_aspect_ratio_range(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+        validate_image_aspect_ratio(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         image_url = (await upload_images_to_comfyapi(cls, image, max_images=1))[0]
         prompt = (
@@ -923,7 +923,7 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode):
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "camerafixed", "watermark"])
         for i in (first_frame, last_frame):
             validate_image_dimensions(i, min_width=300, min_height=300, max_width=6000, max_height=6000)
-            validate_image_aspect_ratio_range(i, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+            validate_image_aspect_ratio(i, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         download_urls = await upload_images_to_comfyapi(
             cls,
@@ -1045,7 +1045,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode):
         raise_if_text_params(prompt, ["resolution", "ratio", "duration", "seed", "watermark"])
         for image in images:
             validate_image_dimensions(image, min_width=300, min_height=300, max_width=6000, max_height=6000)
-            validate_image_aspect_ratio_range(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
+            validate_image_aspect_ratio(image, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
 
         image_urls = await upload_images_to_comfyapi(cls, images, max_images=4, mime_type="image/png")
         prompt = (
diff --git a/comfy_api_nodes/nodes_ideogram.py b/comfy_api_nodes/nodes_ideogram.py
index d8fd3378b..48f94e612 100644
--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@@ -1,6 +1,6 @@
 from io import BytesIO
 from typing_extensions import override
-from comfy_api.latest import ComfyExtension, IO
+from comfy_api.latest import IO, ComfyExtension
 from PIL import Image
 import numpy as np
 import torch
@@ -11,19 +11,13 @@ from comfy_api_nodes.apis import (
     IdeogramV3Request,
     IdeogramV3EditRequest,
 )
-
-from comfy_api_nodes.apis.client import (
+from comfy_api_nodes.util import (
     ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-)
-
-from comfy_api_nodes.apinode_utils import (
-    download_url_to_bytesio,
+    bytesio_to_image_tensor,
+    download_url_as_bytesio,
     resize_mask_to_image,
+    sync_op,
 )
-from comfy_api_nodes.util import bytesio_to_image_tensor
-from server import PromptServer
 
 V1_V1_RES_MAP = {
   "Auto":"AUTO",
@@ -220,7 +214,7 @@ async def download_and_process_images(image_urls):
 
     for image_url in image_urls:
         # Using functions from apinode_utils.py to handle downloading and processing
-        image_bytesio = await download_url_to_bytesio(image_url)  # Download image content to BytesIO
+        image_bytesio = await download_url_as_bytesio(image_url)  # Download image content to BytesIO
         img_tensor = bytesio_to_image_tensor(image_bytesio, mode="RGB")  # Convert to torch.Tensor with RGB mode
         image_tensors.append(img_tensor)
 
@@ -233,19 +227,6 @@ async def download_and_process_images(image_urls):
     return stacked_tensors
 
 
-def display_image_urls_on_node(image_urls, node_id):
-    if node_id and image_urls:
-        if len(image_urls) == 1:
-            PromptServer.instance.send_progress_text(
-                f"Generated Image URL:\n{image_urls[0]}", node_id
-            )
-        else:
-            urls_text = "Generated Image URLs:\n" + "\n".join(
-                f"{i+1}. {url}" for i, url in enumerate(image_urls)
-            )
-            PromptServer.instance.send_progress_text(urls_text, node_id)
-
-
 class IdeogramV1(IO.ComfyNode):
 
     @classmethod
@@ -334,44 +315,30 @@ class IdeogramV1(IO.ComfyNode):
         aspect_ratio = V1_V2_RATIO_MAP.get(aspect_ratio, None)
         model = "V_1_TURBO" if turbo else "V_1"
 
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/ideogram/generate",
-                method=HttpMethod.POST,
-                request_model=IdeogramGenerateRequest,
-                response_model=IdeogramGenerateResponse,
-            ),
-            request=IdeogramGenerateRequest(
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/ideogram/generate", method="POST"),
+            response_model=IdeogramGenerateResponse,
+            data=IdeogramGenerateRequest(
                 image_request=ImageRequest(
                     prompt=prompt,
                     model=model,
                     num_images=num_images,
                     seed=seed,
                     aspect_ratio=aspect_ratio if aspect_ratio != "ASPECT_1_1" else None,
-                    magic_prompt_option=(
-                        magic_prompt_option if magic_prompt_option != "AUTO" else None
-                    ),
+                    magic_prompt_option=(magic_prompt_option if magic_prompt_option != "AUTO" else None),
                     negative_prompt=negative_prompt if negative_prompt else None,
                 )
             ),
-            auth_kwargs=auth,
+            max_retries=1,
         )
 
-        response = await operation.execute()
-
         if not response.data or len(response.data) == 0:
             raise Exception("No images were generated in the response")
 
         image_urls = [image_data.url for image_data in response.data if image_data.url]
-
         if not image_urls:
             raise Exception("No image URLs were generated in the response")
-
-        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
         return IO.NodeOutput(await download_and_process_images(image_urls))
 
 
@@ -500,18 +467,11 @@ class IdeogramV2(IO.ComfyNode):
         else:
             final_aspect_ratio = aspect_ratio if aspect_ratio != "ASPECT_1_1" else None
 
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/ideogram/generate",
-                method=HttpMethod.POST,
-                request_model=IdeogramGenerateRequest,
-                response_model=IdeogramGenerateResponse,
-            ),
-            request=IdeogramGenerateRequest(
+        response = await sync_op(
+            cls,
+            endpoint=ApiEndpoint(path="/proxy/ideogram/generate", method="POST"),
+            response_model=IdeogramGenerateResponse,
+            data=IdeogramGenerateRequest(
                 image_request=ImageRequest(
                     prompt=prompt,
                     model=model,
@@ -519,28 +479,20 @@ class IdeogramV2(IO.ComfyNode):
                     seed=seed,
                     aspect_ratio=final_aspect_ratio,
                     resolution=final_resolution,
-                    magic_prompt_option=(
-                        magic_prompt_option if magic_prompt_option != "AUTO" else None
-                    ),
+                    magic_prompt_option=(magic_prompt_option if magic_prompt_option != "AUTO" else None),
                     style_type=style_type if style_type != "NONE" else None,
                     negative_prompt=negative_prompt if negative_prompt else None,
                     color_palette=color_palette if color_palette else None,
                 )
             ),
-            auth_kwargs=auth,
+            max_retries=1,
         )
-
-        response = await operation.execute()
-
         if not response.data or len(response.data) == 0:
             raise Exception("No images were generated in the response")
 
         image_urls = [image_data.url for image_data in response.data if image_data.url]
-
         if not image_urls:
             raise Exception("No image URLs were generated in the response")
-
-        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
         return IO.NodeOutput(await download_and_process_images(image_urls))
 
 
@@ -656,10 +608,6 @@ class IdeogramV3(IO.ComfyNode):
         character_image=None,
         character_mask=None,
     ):
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
         if rendering_speed == "BALANCED":  # for backward compatibility
             rendering_speed = "DEFAULT"
 
@@ -694,9 +642,6 @@ class IdeogramV3(IO.ComfyNode):
 
         # Check if both image and mask are provided for editing mode
         if image is not None and mask is not None:
-            # Edit mode
-            path = "/proxy/ideogram/ideogram-v3/edit"
-
             # Process image and mask
             input_tensor = image.squeeze().cpu()
             # Resize mask to match image dimension
@@ -749,27 +694,20 @@ class IdeogramV3(IO.ComfyNode):
             if character_mask_binary:
                 files["character_mask_binary"] = character_mask_binary
 
-            # Execute the operation for edit mode
-            operation = SynchronousOperation(
-                endpoint=ApiEndpoint(
-                    path=path,
-                    method=HttpMethod.POST,
-                    request_model=IdeogramV3EditRequest,
-                    response_model=IdeogramGenerateResponse,
-                ),
-                request=edit_request,
+            response = await sync_op(
+                cls,
+                ApiEndpoint(path="/proxy/ideogram/ideogram-v3/edit", method="POST"),
+                response_model=IdeogramGenerateResponse,
+                data=edit_request,
                 files=files,
                 content_type="multipart/form-data",
-                auth_kwargs=auth,
+                max_retries=1,
             )
 
         elif image is not None or mask is not None:
             # If only one of image or mask is provided, raise an error
             raise Exception("Ideogram V3 image editing requires both an image AND a mask")
         else:
-            # Generation mode
-            path = "/proxy/ideogram/ideogram-v3/generate"
-
             # Create generation request
             gen_request = IdeogramV3Request(
                 prompt=prompt,
@@ -800,32 +738,22 @@ class IdeogramV3(IO.ComfyNode):
             if files:
                 gen_request.style_type = "AUTO"
 
-            # Execute the operation for generation mode
-            operation = SynchronousOperation(
-                endpoint=ApiEndpoint(
-                    path=path,
-                    method=HttpMethod.POST,
-                    request_model=IdeogramV3Request,
-                    response_model=IdeogramGenerateResponse,
-                ),
-                request=gen_request,
+            response = await sync_op(
+                cls,
+                endpoint=ApiEndpoint(path="/proxy/ideogram/ideogram-v3/generate", method="POST"),
+                response_model=IdeogramGenerateResponse,
+                data=gen_request,
                 files=files if files else None,
                 content_type="multipart/form-data",
-                auth_kwargs=auth,
+                max_retries=1,
             )
 
-        # Execute the operation and process response
-        response = await operation.execute()
-
         if not response.data or len(response.data) == 0:
             raise Exception("No images were generated in the response")
 
         image_urls = [image_data.url for image_data in response.data if image_data.url]
-
         if not image_urls:
             raise Exception("No image URLs were generated in the response")
-
-        display_image_urls_on_node(image_urls, cls.hidden.unique_id)
         return IO.NodeOutput(await download_and_process_images(image_urls))
 
 
@@ -838,5 +766,6 @@ class IdeogramExtension(ComfyExtension):
             IdeogramV3,
         ]
 
+
 async def comfy_entrypoint() -> IdeogramExtension:
     return IdeogramExtension()
diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index eea65c9ac..7b23e9cf9 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -282,7 +282,7 @@ def validate_input_image(image: torch.Tensor) -> None:
     See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo
     """
     validate_image_dimensions(image, min_width=300, min_height=300)
-    validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5)
+    validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1))
 
 
 def get_video_from_response(response) -> KlingVideoResult:
diff --git a/comfy_api_nodes/nodes_pixverse.py b/comfy_api_nodes/nodes_pixverse.py
index b2b841be8..6e1686af0 100644
--- a/comfy_api_nodes/nodes_pixverse.py
+++ b/comfy_api_nodes/nodes_pixverse.py
@@ -1,7 +1,6 @@
-from inspect import cleandoc
-from typing import Optional
+import torch
 from typing_extensions import override
-from io import BytesIO
+from comfy_api.latest import IO, ComfyExtension
 from comfy_api_nodes.apis.pixverse_api import (
     PixverseTextVideoRequest,
     PixverseImageVideoRequest,
@@ -17,53 +16,30 @@ from comfy_api_nodes.apis.pixverse_api import (
     PixverseIO,
     pixverse_templates,
 )
-from comfy_api_nodes.apis.client import (
+from comfy_api_nodes.util import (
     ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    PollingOperation,
-    EmptyRequest,
+    download_url_to_video_output,
+    poll_op,
+    sync_op,
+    tensor_to_bytesio,
+    validate_string,
 )
-from comfy_api_nodes.util import validate_string, tensor_to_bytesio
-from comfy_api.input_impl import VideoFromFile
-from comfy_api.latest import ComfyExtension, IO
-
-import torch
-import aiohttp
-
 
 AVERAGE_DURATION_T2V = 32
 AVERAGE_DURATION_I2V = 30
 AVERAGE_DURATION_T2T = 52
 
 
-def get_video_url_from_response(
-    response: PixverseGenerationStatusResponse,
-) -> Optional[str]:
-    if response.Resp is None or response.Resp.url is None:
-        return None
-    return str(response.Resp.url)
-
-
-async def upload_image_to_pixverse(image: torch.Tensor, auth_kwargs=None):
-    # first, upload image to Pixverse and get image id to use in actual generation call
-    operation = SynchronousOperation(
-        endpoint=ApiEndpoint(
-            path="/proxy/pixverse/image/upload",
-            method=HttpMethod.POST,
-            request_model=EmptyRequest,
-            response_model=PixverseImageUploadResponse,
-        ),
-        request=EmptyRequest(),
+async def upload_image_to_pixverse(cls: type[IO.ComfyNode], image: torch.Tensor):
+    response_upload = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/pixverse/image/upload", method="POST"),
+        response_model=PixverseImageUploadResponse,
         files={"image": tensor_to_bytesio(image)},
         content_type="multipart/form-data",
-        auth_kwargs=auth_kwargs,
     )
-    response_upload: PixverseImageUploadResponse = await operation.execute()
-
     if response_upload.Resp is None:
         raise Exception(f"PixVerse image upload request failed: '{response_upload.ErrMsg}'")
-
     return response_upload.Resp.img_id
 
 
@@ -93,17 +69,13 @@ class PixverseTemplateNode(IO.ComfyNode):
 
 
 class PixverseTextToVideoNode(IO.ComfyNode):
-    """
-    Generates videos based on prompt and output_size.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="PixverseTextToVideoNode",
             display_name="PixVerse Text to Video",
             category="api node/video/PixVerse",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos based on prompt and output_size.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -170,7 +142,7 @@ class PixverseTextToVideoNode(IO.ComfyNode):
         negative_prompt: str = None,
         pixverse_template: int = None,
     ) -> IO.NodeOutput:
-        validate_string(prompt, strip_whitespace=False)
+        validate_string(prompt, strip_whitespace=False, min_length=1)
         # 1080p is limited to 5 seconds duration
         # only normal motion_mode supported for 1080p or for non-5 second duration
         if quality == PixverseQuality.res_1080p:
@@ -179,18 +151,11 @@ class PixverseTextToVideoNode(IO.ComfyNode):
         elif duration_seconds != PixverseDuration.dur_5:
             motion_mode = PixverseMotionMode.normal
 
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/pixverse/video/text/generate",
-                method=HttpMethod.POST,
-                request_model=PixverseTextVideoRequest,
-                response_model=PixverseVideoResponse,
-            ),
-            request=PixverseTextVideoRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/pixverse/video/text/generate", method="POST"),
+            response_model=PixverseVideoResponse,
+            data=PixverseTextVideoRequest(
                 prompt=prompt,
                 aspect_ratio=aspect_ratio,
                 quality=quality,
@@ -200,20 +165,14 @@ class PixverseTextToVideoNode(IO.ComfyNode):
                 template_id=pixverse_template,
                 seed=seed,
             ),
-            auth_kwargs=auth,
         )
-        response_api = await operation.execute()
-
         if response_api.Resp is None:
             raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'")
 
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=PixverseGenerationStatusResponse,
-            ),
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}"),
+            response_model=PixverseGenerationStatusResponse,
             completed_statuses=[PixverseStatus.successful],
             failed_statuses=[
                 PixverseStatus.contents_moderation,
@@ -221,30 +180,19 @@ class PixverseTextToVideoNode(IO.ComfyNode):
                 PixverseStatus.deleted,
             ],
             status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=auth,
-            node_id=cls.hidden.unique_id,
-            result_url_extractor=get_video_url_from_response,
             estimated_duration=AVERAGE_DURATION_T2V,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.Resp.url) as vid_response:
-                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
+        return IO.NodeOutput(await download_url_to_video_output(response_poll.Resp.url))
 
 
 class PixverseImageToVideoNode(IO.ComfyNode):
-    """
-    Generates videos based on prompt and output_size.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="PixverseImageToVideoNode",
             display_name="PixVerse Image to Video",
             category="api node/video/PixVerse",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos based on prompt and output_size.",
             inputs=[
                 IO.Image.Input("image"),
                 IO.String.Input(
@@ -309,11 +257,7 @@ class PixverseImageToVideoNode(IO.ComfyNode):
         pixverse_template: int = None,
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False)
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        img_id = await upload_image_to_pixverse(image, auth_kwargs=auth)
+        img_id = await upload_image_to_pixverse(cls, image)
 
         # 1080p is limited to 5 seconds duration
         # only normal motion_mode supported for 1080p or for non-5 second duration
@@ -323,14 +267,11 @@ class PixverseImageToVideoNode(IO.ComfyNode):
         elif duration_seconds != PixverseDuration.dur_5:
             motion_mode = PixverseMotionMode.normal
 
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/pixverse/video/img/generate",
-                method=HttpMethod.POST,
-                request_model=PixverseImageVideoRequest,
-                response_model=PixverseVideoResponse,
-            ),
-            request=PixverseImageVideoRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/pixverse/video/img/generate", method="POST"),
+            response_model=PixverseVideoResponse,
+            data=PixverseImageVideoRequest(
                 img_id=img_id,
                 prompt=prompt,
                 quality=quality,
@@ -340,20 +281,15 @@ class PixverseImageToVideoNode(IO.ComfyNode):
                 template_id=pixverse_template,
                 seed=seed,
             ),
-            auth_kwargs=auth,
         )
-        response_api = await operation.execute()
 
         if response_api.Resp is None:
             raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'")
 
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=PixverseGenerationStatusResponse,
-            ),
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}"),
+            response_model=PixverseGenerationStatusResponse,
             completed_statuses=[PixverseStatus.successful],
             failed_statuses=[
                 PixverseStatus.contents_moderation,
@@ -361,30 +297,19 @@ class PixverseImageToVideoNode(IO.ComfyNode):
                 PixverseStatus.deleted,
             ],
             status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=auth,
-            node_id=cls.hidden.unique_id,
-            result_url_extractor=get_video_url_from_response,
             estimated_duration=AVERAGE_DURATION_I2V,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.Resp.url) as vid_response:
-                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
+        return IO.NodeOutput(await download_url_to_video_output(response_poll.Resp.url))
 
 
 class PixverseTransitionVideoNode(IO.ComfyNode):
-    """
-    Generates videos based on prompt and output_size.
-    """
-
     @classmethod
     def define_schema(cls) -> IO.Schema:
         return IO.Schema(
             node_id="PixverseTransitionVideoNode",
             display_name="PixVerse Transition Video",
             category="api node/video/PixVerse",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates videos based on prompt and output_size.",
             inputs=[
                 IO.Image.Input("first_frame"),
                 IO.Image.Input("last_frame"),
@@ -445,12 +370,8 @@ class PixverseTransitionVideoNode(IO.ComfyNode):
         negative_prompt: str = None,
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False)
-        auth = {
-            "auth_token": cls.hidden.auth_token_comfy_org,
-            "comfy_api_key": cls.hidden.api_key_comfy_org,
-        }
-        first_frame_id = await upload_image_to_pixverse(first_frame, auth_kwargs=auth)
-        last_frame_id = await upload_image_to_pixverse(last_frame, auth_kwargs=auth)
+        first_frame_id = await upload_image_to_pixverse(cls, first_frame)
+        last_frame_id = await upload_image_to_pixverse(cls, last_frame)
 
         # 1080p is limited to 5 seconds duration
         # only normal motion_mode supported for 1080p or for non-5 second duration
@@ -460,14 +381,11 @@ class PixverseTransitionVideoNode(IO.ComfyNode):
         elif duration_seconds != PixverseDuration.dur_5:
             motion_mode = PixverseMotionMode.normal
 
-        operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path="/proxy/pixverse/video/transition/generate",
-                method=HttpMethod.POST,
-                request_model=PixverseTransitionVideoRequest,
-                response_model=PixverseVideoResponse,
-            ),
-            request=PixverseTransitionVideoRequest(
+        response_api = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/pixverse/video/transition/generate", method="POST"),
+            response_model=PixverseVideoResponse,
+            data=PixverseTransitionVideoRequest(
                 first_frame_img=first_frame_id,
                 last_frame_img=last_frame_id,
                 prompt=prompt,
@@ -477,20 +395,15 @@ class PixverseTransitionVideoNode(IO.ComfyNode):
                 negative_prompt=negative_prompt if negative_prompt else None,
                 seed=seed,
             ),
-            auth_kwargs=auth,
         )
-        response_api = await operation.execute()
 
         if response_api.Resp is None:
             raise Exception(f"PixVerse request failed: '{response_api.ErrMsg}'")
 
-        operation = PollingOperation(
-            poll_endpoint=ApiEndpoint(
-                path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=PixverseGenerationStatusResponse,
-            ),
+        response_poll = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/pixverse/video/result/{response_api.Resp.video_id}"),
+            response_model=PixverseGenerationStatusResponse,
             completed_statuses=[PixverseStatus.successful],
             failed_statuses=[
                 PixverseStatus.contents_moderation,
@@ -498,16 +411,9 @@ class PixverseTransitionVideoNode(IO.ComfyNode):
                 PixverseStatus.deleted,
             ],
             status_extractor=lambda x: x.Resp.status,
-            auth_kwargs=auth,
-            node_id=cls.hidden.unique_id,
-            result_url_extractor=get_video_url_from_response,
             estimated_duration=AVERAGE_DURATION_T2V,
         )
-        response_poll = await operation.execute()
-
-        async with aiohttp.ClientSession() as session:
-            async with session.get(response_poll.Resp.url) as vid_response:
-                return IO.NodeOutput(VideoFromFile(BytesIO(await vid_response.content.read())))
+        return IO.NodeOutput(await download_url_to_video_output(response_poll.Resp.url))
 
 
 class PixVerseExtension(ComfyExtension):
diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py
index dee186cd6..e3440b946 100644
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@@ -8,9 +8,6 @@ from typing_extensions import override
 
 from comfy.utils import ProgressBar
 from comfy_api.latest import IO, ComfyExtension
-from comfy_api_nodes.apinode_utils import (
-    resize_mask_to_image,
-)
 from comfy_api_nodes.apis.recraft_api import (
     RecraftColor,
     RecraftColorChain,
@@ -28,6 +25,7 @@ from comfy_api_nodes.util import (
     ApiEndpoint,
     bytesio_to_image_tensor,
     download_url_as_bytesio,
+    resize_mask_to_image,
     sync_op,
     tensor_to_bytesio,
     validate_string,
diff --git a/comfy_api_nodes/nodes_runway.py b/comfy_api_nodes/nodes_runway.py
index 0543d1d0e..2fdafbbfe 100644
--- a/comfy_api_nodes/nodes_runway.py
+++ b/comfy_api_nodes/nodes_runway.py
@@ -200,7 +200,7 @@ class RunwayImageToVideoNodeGen3a(IO.ComfyNode):
     ) -> IO.NodeOutput:
         validate_string(prompt, min_length=1)
         validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
-        validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
+        validate_image_aspect_ratio(start_frame, (1, 2), (2, 1))
 
         download_urls = await upload_images_to_comfyapi(
             cls,
@@ -290,7 +290,7 @@ class RunwayImageToVideoNodeGen4(IO.ComfyNode):
     ) -> IO.NodeOutput:
         validate_string(prompt, min_length=1)
         validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
-        validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
+        validate_image_aspect_ratio(start_frame, (1, 2), (2, 1))
 
         download_urls = await upload_images_to_comfyapi(
             cls,
@@ -390,8 +390,8 @@ class RunwayFirstLastFrameNode(IO.ComfyNode):
         validate_string(prompt, min_length=1)
         validate_image_dimensions(start_frame, max_width=7999, max_height=7999)
         validate_image_dimensions(end_frame, max_width=7999, max_height=7999)
-        validate_image_aspect_ratio(start_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
-        validate_image_aspect_ratio(end_frame, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
+        validate_image_aspect_ratio(start_frame, (1, 2), (2, 1))
+        validate_image_aspect_ratio(end_frame, (1, 2), (2, 1))
 
         stacked_input_images = image_tensor_pair_to_batch(start_frame, end_frame)
         download_urls = await upload_images_to_comfyapi(
@@ -475,7 +475,7 @@ class RunwayTextToImageNode(IO.ComfyNode):
         reference_images = None
         if reference_image is not None:
             validate_image_dimensions(reference_image, max_width=7999, max_height=7999)
-            validate_image_aspect_ratio(reference_image, min_aspect_ratio=0.5, max_aspect_ratio=2.0)
+            validate_image_aspect_ratio(reference_image, (1, 2), (2, 1))
             download_urls = await upload_images_to_comfyapi(
                 cls,
                 reference_image,
diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py
index 0e0572f8c..7a679f0d9 100644
--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@@ -14,9 +14,9 @@ from comfy_api_nodes.util import (
     poll_op,
     sync_op,
     upload_images_to_comfyapi,
-    validate_aspect_ratio_closeness,
-    validate_image_aspect_ratio_range,
+    validate_image_aspect_ratio,
     validate_image_dimensions,
+    validate_images_aspect_ratio_closeness,
 )
 
 VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
@@ -114,7 +114,7 @@ async def execute_task(
         cls,
         ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % response.task_id),
         response_model=TaskStatusResponse,
-        status_extractor=lambda r: r.state.value,
+        status_extractor=lambda r: r.state,
         estimated_duration=estimated_duration,
     )
 
@@ -307,7 +307,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
     ) -> IO.NodeOutput:
         if get_number_of_images(image) > 1:
             raise ValueError("Only one input image is allowed.")
-        validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
+        validate_image_aspect_ratio(image, (1, 4), (4, 1))
         payload = TaskCreationRequest(
             model_name=model,
             prompt=prompt,
@@ -423,7 +423,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
         if a > 7:
             raise ValueError("Too many images, maximum allowed is 7.")
         for image in images:
-            validate_image_aspect_ratio_range(image, (1, 4), (4, 1))
+            validate_image_aspect_ratio(image, (1, 4), (4, 1))
             validate_image_dimensions(image, min_width=128, min_height=128)
         payload = TaskCreationRequest(
             model_name=model,
@@ -533,7 +533,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
         resolution: str,
         movement_amplitude: str,
     ) -> IO.NodeOutput:
-        validate_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
+        validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
         payload = TaskCreationRequest(
             model_name=model,
             prompt=prompt,
diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py
index 0cca2b59b..bbc71363a 100644
--- a/comfy_api_nodes/util/__init__.py
+++ b/comfy_api_nodes/util/__init__.py
@@ -14,6 +14,7 @@ from .conversions import (
     downscale_image_tensor,
     image_tensor_pair_to_batch,
     pil_to_bytesio,
+    resize_mask_to_image,
     tensor_to_base64_string,
     tensor_to_bytesio,
     tensor_to_pil,
@@ -34,12 +35,12 @@ from .upload_helpers import (
 )
 from .validation_utils import (
     get_number_of_images,
-    validate_aspect_ratio_closeness,
+    validate_aspect_ratio_string,
     validate_audio_duration,
     validate_container_format_is_mp4,
     validate_image_aspect_ratio,
-    validate_image_aspect_ratio_range,
     validate_image_dimensions,
+    validate_images_aspect_ratio_closeness,
     validate_string,
     validate_video_dimensions,
     validate_video_duration,
@@ -70,6 +71,7 @@ __all__ = [
     "downscale_image_tensor",
     "image_tensor_pair_to_batch",
     "pil_to_bytesio",
+    "resize_mask_to_image",
     "tensor_to_base64_string",
     "tensor_to_bytesio",
     "tensor_to_pil",
@@ -77,12 +79,12 @@ __all__ = [
     "video_to_base64_string",
     # Validation utilities
     "get_number_of_images",
-    "validate_aspect_ratio_closeness",
+    "validate_aspect_ratio_string",
     "validate_audio_duration",
     "validate_container_format_is_mp4",
     "validate_image_aspect_ratio",
-    "validate_image_aspect_ratio_range",
     "validate_image_dimensions",
+    "validate_images_aspect_ratio_closeness",
     "validate_string",
     "validate_video_dimensions",
     "validate_video_duration",
diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py
index 9f4c90c5c..b59c2bd84 100644
--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@@ -430,3 +430,24 @@ def audio_bytes_to_audio_input(audio_bytes: bytes) -> dict:
     wav = torch.cat(frames, dim=1)  # [C, T]
     wav = _f32_pcm(wav)
     return {"waveform": wav.unsqueeze(0).contiguous(), "sample_rate": out_sr}
+
+
+def resize_mask_to_image(
+    mask: torch.Tensor,
+    image: torch.Tensor,
+    upscale_method="nearest-exact",
+    crop="disabled",
+    allow_gradient=True,
+    add_channel_dim=False,
+):
+    """Resize mask to be the same dimensions as an image, while maintaining proper format for API calls."""
+    _, height, width, _ = image.shape
+    mask = mask.unsqueeze(-1)
+    mask = mask.movedim(-1, 1)
+    mask = common_upscale(mask, width=width, height=height, upscale_method=upscale_method, crop=crop)
+    mask = mask.movedim(1, -1)
+    if not add_channel_dim:
+        mask = mask.squeeze(-1)
+    if not allow_gradient:
+        mask = (mask > 0.5).float()
+    return mask
diff --git a/comfy_api_nodes/util/validation_utils.py b/comfy_api_nodes/util/validation_utils.py
index 22da05bc1..ec7006aed 100644
--- a/comfy_api_nodes/util/validation_utils.py
+++ b/comfy_api_nodes/util/validation_utils.py
@@ -37,63 +37,62 @@ def validate_image_dimensions(
 
 def validate_image_aspect_ratio(
     image: torch.Tensor,
-    min_aspect_ratio: Optional[float] = None,
-    max_aspect_ratio: Optional[float] = None,
-):
-    width, height = get_image_dimensions(image)
-    aspect_ratio = width / height
-
-    if min_aspect_ratio is not None and aspect_ratio < min_aspect_ratio:
-        raise ValueError(f"Image aspect ratio must be at least {min_aspect_ratio}, got {aspect_ratio}")
-    if max_aspect_ratio is not None and aspect_ratio > max_aspect_ratio:
-        raise ValueError(f"Image aspect ratio must be at most {max_aspect_ratio}, got {aspect_ratio}")
-
-
-def validate_image_aspect_ratio_range(
-    image: torch.Tensor,
-    min_ratio: tuple[float, float],  # e.g. (1, 4)
-    max_ratio: tuple[float, float],  # e.g. (4, 1)
+    min_ratio: Optional[tuple[float, float]] = None,  # e.g. (1, 4)
+    max_ratio: Optional[tuple[float, float]] = None,  # e.g. (4, 1)
     *,
     strict: bool = True,  # True -> (min, max); False -> [min, max]
 ) -> float:
-    a1, b1 = min_ratio
-    a2, b2 = max_ratio
-    if a1 <= 0 or b1 <= 0 or a2 <= 0 or b2 <= 0:
-        raise ValueError("Ratios must be positive, like (1, 4) or (4, 1).")
-    lo, hi = (a1 / b1), (a2 / b2)
-    if lo > hi:
-        lo, hi = hi, lo
-        a1, b1, a2, b2 = a2, b2, a1, b1  # swap only for error text
+    """Validates that image aspect ratio is within min and max. If a bound is None, that side is not checked."""
     w, h = get_image_dimensions(image)
     if w <= 0 or h <= 0:
         raise ValueError(f"Invalid image dimensions: {w}x{h}")
     ar = w / h
-    ok = (lo < ar < hi) if strict else (lo <= ar <= hi)
-    if not ok:
-        op = "<" if strict else "≤"
-        raise ValueError(f"Image aspect ratio {ar:.6g} is outside allowed range: {a1}:{b1} {op} ratio {op} {a2}:{b2}")
+    _assert_ratio_bounds(ar, min_ratio=min_ratio, max_ratio=max_ratio, strict=strict)
     return ar
 
 
-def validate_aspect_ratio_closeness(
-    start_img,
-    end_img,
-    min_rel: float,
-    max_rel: float,
+def validate_images_aspect_ratio_closeness(
+    first_image: torch.Tensor,
+    second_image: torch.Tensor,
+    min_rel: float,   # e.g. 0.8
+    max_rel: float,   # e.g. 1.25
     *,
-    strict: bool = False,  # True => exclusive, False => inclusive
-) -> None:
-    w1, h1 = get_image_dimensions(start_img)
-    w2, h2 = get_image_dimensions(end_img)
+    strict: bool = False,  # True -> (min, max); False -> [min, max]
+) -> float:
+    """
+    Validates that the two images' aspect ratios are 'close'.
+    The closeness factor is C = max(ar1, ar2) / min(ar1, ar2)  (C >= 1).
+    We require C <= limit, where limit = max(max_rel, 1.0 / min_rel).
+
+    Returns the computed closeness factor C.
+    """
+    w1, h1 = get_image_dimensions(first_image)
+    w2, h2 = get_image_dimensions(second_image)
     if min(w1, h1, w2, h2) <= 0:
         raise ValueError("Invalid image dimensions")
     ar1 = w1 / h1
     ar2 = w2 / h2
-    # Normalize so it is symmetric (no need to check both ar1/ar2 and ar2/ar1)
     closeness = max(ar1, ar2) / min(ar1, ar2)
-    limit = max(max_rel, 1.0 / min_rel)  # for 0.8..1.25 this is 1.25
+    limit = max(max_rel, 1.0 / min_rel)
     if (closeness >= limit) if strict else (closeness > limit):
-        raise ValueError(f"Aspect ratios must be close: start/end={ar1/ar2:.4f}, allowed range {min_rel}–{max_rel}.")
+        raise ValueError(
+            f"Aspect ratios must be close: ar1/ar2={ar1/ar2:.2g}, "
+            f"allowed range {min_rel}–{max_rel} (limit {limit:.2g})."
+        )
+    return closeness
+
+
+def validate_aspect_ratio_string(
+    aspect_ratio: str,
+    min_ratio: Optional[tuple[float, float]] = None,  # e.g. (1, 4)
+    max_ratio: Optional[tuple[float, float]] = None,  # e.g. (4, 1)
+    *,
+    strict: bool = False,  # True -> (min, max); False -> [min, max]
+) -> float:
+    """Parses 'X:Y' and validates it against optional bounds. Returns the numeric ratio."""
+    ar = _parse_aspect_ratio_string(aspect_ratio)
+    _assert_ratio_bounds(ar, min_ratio=min_ratio, max_ratio=max_ratio, strict=strict)
+    return ar
 
 
 def validate_video_dimensions(
@@ -183,3 +182,49 @@ def validate_container_format_is_mp4(video: VideoInput) -> None:
     container_format = video.get_container_format()
     if container_format not in ["mp4", "mov,mp4,m4a,3gp,3g2,mj2"]:
         raise ValueError(f"Only MP4 container format supported. Got: {container_format}")
+
+
+def _ratio_from_tuple(r: tuple[float, float]) -> float:
+    a, b = r
+    if a <= 0 or b <= 0:
+        raise ValueError(f"Ratios must be positive, got {a}:{b}.")
+    return a / b
+
+
+def _assert_ratio_bounds(
+    ar: float,
+    *,
+    min_ratio: Optional[tuple[float, float]] = None,
+    max_ratio: Optional[tuple[float, float]] = None,
+    strict: bool = True,
+) -> None:
+    """Validate a numeric aspect ratio against optional min/max ratio bounds."""
+    lo = _ratio_from_tuple(min_ratio) if min_ratio is not None else None
+    hi = _ratio_from_tuple(max_ratio) if max_ratio is not None else None
+
+    if lo is not None and hi is not None and lo > hi:
+        lo, hi = hi, lo  # normalize order if caller swapped them
+
+    if lo is not None:
+        if (ar <= lo) if strict else (ar < lo):
+            op = "<" if strict else "≤"
+            raise ValueError(f"Aspect ratio `{ar:.2g}` must be {op} {lo:.2g}.")
+    if hi is not None:
+        if (ar >= hi) if strict else (ar > hi):
+            op = "<" if strict else "≤"
+            raise ValueError(f"Aspect ratio `{ar:.2g}` must be {op} {hi:.2g}.")
+
+
+def _parse_aspect_ratio_string(ar_str: str) -> float:
+    """Parse 'X:Y' with integer parts into a positive float ratio X/Y."""
+    parts = ar_str.split(":")
+    if len(parts) != 2:
+        raise ValueError(f"Aspect ratio must be 'X:Y' (e.g., 16:9), got '{ar_str}'.")
+    try:
+        a = int(parts[0].strip())
+        b = int(parts[1].strip())
+    except ValueError as exc:
+        raise ValueError(f"Aspect ratio must contain integers separated by ':', got '{ar_str}'.") from exc
+    if a <= 0 or b <= 0:
+        raise ValueError(f"Aspect ratio parts must be positive integers, got {a}:{b}.")
+    return a / b

From dfac94695be95076d8028d04005a744f3ec0de8d Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 30 Oct 2025 19:22:35 +0200
Subject: [PATCH 15/20] fix img2img operation in Dall2 node (#10552)

---
 comfy_api_nodes/nodes_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index c467e840c..b4568fc85 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -225,7 +225,7 @@ class OpenAIDalle2(ComfyNodeABC):
             ),
             files=(
                 {
-                    "image": img_binary,
+                    "image": ("image.png", img_binary, "image/png"),
                 }
                 if img_binary
                 else None

From 513b0c46fba3bf40191d684ff81207ad935f1717 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Fri, 31 Oct 2025 07:39:02 +1000
Subject: [PATCH 16/20] Add RAM Pressure cache mode (#10454)

* execution: Roll the UI cache into the outputs

Currently the UI cache is parallel to the output cache with
expectations of being a content superset of the output cache.
At the same time the UI and output cache are maintained completely
seperately, making it awkward to free the output cache content without
changing the behaviour of the UI cache.

There are two actual users (getters) of the UI cache. The first is
the case of a direct content hit on the output cache when executing a
node. This case is very naturally handled by merging the UI and outputs
cache.

The second case is the history JSON generation at the end of the prompt.
This currently works by asking the cache for all_node_ids and then
pulling the cache contents for those nodes. all_node_ids is the nodes
of the dynamic prompt.

So fold the UI cache into the output cache. The current UI cache setter
now writes to a prompt-scope dict. When the output cache is set, just
get this value from the dict and tuple up with the outputs.

When generating the history, simply iterate prompt-scope dict.

This prepares support for more complex caching strategies (like RAM
pressure caching) where less than 1 workflow will be cached and it
will be desirable to keep the UI cache and output cache in sync.

* sd: Implement RAM getter for VAE

* model_patcher: Implement RAM getter for ModelPatcher

* sd: Implement RAM getter for CLIP

* Implement RAM Pressure cache

Implement a cache sensitive to RAM pressure. When RAM headroom drops
down below a certain threshold, evict RAM-expensive nodes from the
cache.

Models and tensors are measured directly for RAM usage. An OOM score
is then computed based on the RAM usage of the node.

Note the due to indirection through shared objects (like a model
patcher), multiple nodes can account the same RAM as their individual
usage. The intent is this will free chains of nodes particularly
model loaders and associate loras as they all score similar and are
sorted in close to each other.

Has a bias towards unloading model nodes mid flow while being able
to keep results like text encodings and VAE.

* execution: Convert the cache entry to NamedTuple

As commented in review.

Convert this to a named tuple and abstract away the tuple type
completely from graph.py.
---
 comfy/cli_args.py          |  1 +
 comfy/model_patcher.py     |  3 ++
 comfy/sd.py                | 14 +++++++
 comfy_execution/caching.py | 83 ++++++++++++++++++++++++++++++++++++++
 comfy_execution/graph.py   |  9 ++++-
 execution.py               | 81 +++++++++++++++++++++----------------
 main.py                    |  4 +-
 7 files changed, 157 insertions(+), 38 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 001abd843..3d5bc7c90 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -105,6 +105,7 @@ cache_group = parser.add_mutually_exclusive_group()
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
+cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index ed3f3f5cb..674a214ca 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -276,6 +276,9 @@ class ModelPatcher:
         self.size = comfy.model_management.module_size(self.model)
         return self.size
 
+    def get_ram_usage(self):
+        return self.model_size()
+
     def loaded_size(self):
         return self.model.model_loaded_weight_memory
 
diff --git a/comfy/sd.py b/comfy/sd.py
index de4eee96e..9e5ebbf15 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -143,6 +143,9 @@ class CLIP:
         n.apply_hooks_to_conds = self.apply_hooks_to_conds
         return n
 
+    def get_ram_usage(self):
+        return self.patcher.get_ram_usage()
+
     def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
         return self.patcher.add_patches(patches, strength_patch, strength_model)
 
@@ -293,6 +296,7 @@ class VAE:
         self.working_dtypes = [torch.bfloat16, torch.float32]
         self.disable_offload = False
         self.not_video = False
+        self.size = None
 
         self.downscale_index_formula = None
         self.upscale_index_formula = None
@@ -595,6 +599,16 @@ class VAE:
 
         self.patcher = comfy.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device)
         logging.info("VAE load device: {}, offload device: {}, dtype: {}".format(self.device, offload_device, self.vae_dtype))
+        self.model_size()
+
+    def model_size(self):
+        if self.size is not None:
+            return self.size
+        self.size = comfy.model_management.module_size(self.first_stage_model)
+        return self.size
+
+    def get_ram_usage(self):
+        return self.model_size()
 
     def throw_exception_if_invalid(self):
         if self.first_stage_model is None:
diff --git a/comfy_execution/caching.py b/comfy_execution/caching.py
index 566bc3f9c..b498f43e7 100644
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@@ -1,4 +1,9 @@
+import bisect
+import gc
 import itertools
+import psutil
+import time
+import torch
 from typing import Sequence, Mapping, Dict
 from comfy_execution.graph import DynamicPrompt
 from abc import ABC, abstractmethod
@@ -188,6 +193,9 @@ class BasicCache:
         self._clean_cache()
         self._clean_subcaches()
 
+    def poll(self, **kwargs):
+        pass
+
     def _set_immediate(self, node_id, value):
         assert self.initialized
         cache_key = self.cache_key_set.get_data_key(node_id)
@@ -276,6 +284,9 @@ class NullCache:
     def clean_unused(self):
         pass
 
+    def poll(self, **kwargs):
+        pass
+
     def get(self, node_id):
         return None
 
@@ -336,3 +347,75 @@ class LRUCache(BasicCache):
             self._mark_used(child_id)
             self.children[cache_key].append(self.cache_key_set.get_data_key(child_id))
         return self
+
+
+#Iterating the cache for usage analysis might be expensive, so if we trigger make sure
+#to take a chunk out to give breathing space on high-node / low-ram-per-node flows.
+
+RAM_CACHE_HYSTERESIS = 1.1
+
+#This is kinda in GB but not really. It needs to be non-zero for the below heuristic
+#and as long as Multi GB models dwarf this it will approximate OOM scoring OK
+
+RAM_CACHE_DEFAULT_RAM_USAGE = 0.1
+
+#Exponential bias towards evicting older workflows so garbage will be taken out
+#in constantly changing setups.
+
+RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER = 1.3
+
+class RAMPressureCache(LRUCache):
+
+    def __init__(self, key_class):
+        super().__init__(key_class, 0)
+        self.timestamps = {}
+
+    def clean_unused(self):
+        self._clean_subcaches()
+
+    def set(self, node_id, value):
+        self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
+        super().set(node_id, value)
+
+    def get(self, node_id):
+        self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
+        return super().get(node_id)
+
+    def poll(self, ram_headroom):
+        def _ram_gb():
+            return psutil.virtual_memory().available / (1024**3)
+
+        if _ram_gb() > ram_headroom:
+            return
+        gc.collect()
+        if _ram_gb() > ram_headroom:
+            return
+
+        clean_list = []
+
+        for key, (outputs, _), in self.cache.items():
+            oom_score =  RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
+
+            ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
+            def scan_list_for_ram_usage(outputs):
+                nonlocal ram_usage
+                for output in outputs:
+                    if isinstance(output, list):
+                        scan_list_for_ram_usage(output)
+                    elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
+                        #score Tensors at a 50% discount for RAM usage as they are likely to
+                        #be high value intermediates
+                        ram_usage += (output.numel() * output.element_size()) * 0.5
+                    elif hasattr(output, "get_ram_usage"):
+                        ram_usage += output.get_ram_usage()
+            scan_list_for_ram_usage(outputs)
+
+            oom_score *= ram_usage
+            #In the case where we have no information on the node ram usage at all,
+            #break OOM score ties on the last touch timestamp (pure LRU)
+            bisect.insort(clean_list, (oom_score, self.timestamps[key], key))
+
+        while _ram_gb() < ram_headroom * RAM_CACHE_HYSTERESIS and clean_list:
+            _, _, key = clean_list.pop()
+            del self.cache[key]
+            gc.collect()
diff --git a/comfy_execution/graph.py b/comfy_execution/graph.py
index 341c9735d..0d811e354 100644
--- a/comfy_execution/graph.py
+++ b/comfy_execution/graph.py
@@ -209,10 +209,15 @@ class ExecutionList(TopologicalSort):
             self.execution_cache_listeners[from_node_id] = set()
         self.execution_cache_listeners[from_node_id].add(to_node_id)
 
-    def get_output_cache(self, from_node_id, to_node_id):
+    def get_cache(self, from_node_id, to_node_id):
         if not to_node_id in self.execution_cache:
             return None
-        return self.execution_cache[to_node_id].get(from_node_id)
+        value = self.execution_cache[to_node_id].get(from_node_id)
+        if value is None:
+            return None
+        #Write back to the main cache on touch.
+        self.output_cache.set(from_node_id, value)
+        return value
 
     def cache_update(self, node_id, value):
         if node_id in self.execution_cache_listeners:
diff --git a/execution.py b/execution.py
index 20e106213..17c77beab 100644
--- a/execution.py
+++ b/execution.py
@@ -21,6 +21,7 @@ from comfy_execution.caching import (
     NullCache,
     HierarchicalCache,
     LRUCache,
+    RAMPressureCache,
 )
 from comfy_execution.graph import (
     DynamicPrompt,
@@ -88,49 +89,56 @@ class IsChangedCache:
         return self.is_changed[node_id]
 
 
+class CacheEntry(NamedTuple):
+    ui: dict
+    outputs: list
+
+
 class CacheType(Enum):
     CLASSIC = 0
     LRU = 1
     NONE = 2
+    RAM_PRESSURE = 3
 
 
 class CacheSet:
-    def __init__(self, cache_type=None, cache_size=None):
+    def __init__(self, cache_type=None, cache_args={}):
         if cache_type == CacheType.NONE:
             self.init_null_cache()
             logging.info("Disabling intermediate node cache.")
+        elif cache_type == CacheType.RAM_PRESSURE:
+            cache_ram = cache_args.get("ram", 16.0)
+            self.init_ram_cache(cache_ram)
+            logging.info("Using RAM pressure cache.")
         elif cache_type == CacheType.LRU:
-            if cache_size is None:
-                cache_size = 0
+            cache_size = cache_args.get("lru", 0)
             self.init_lru_cache(cache_size)
             logging.info("Using LRU cache")
         else:
             self.init_classic_cache()
 
-        self.all = [self.outputs, self.ui, self.objects]
+        self.all = [self.outputs, self.objects]
 
     # Performs like the old cache -- dump data ASAP
     def init_classic_cache(self):
         self.outputs = HierarchicalCache(CacheKeySetInputSignature)
-        self.ui = HierarchicalCache(CacheKeySetInputSignature)
         self.objects = HierarchicalCache(CacheKeySetID)
 
     def init_lru_cache(self, cache_size):
         self.outputs = LRUCache(CacheKeySetInputSignature, max_size=cache_size)
-        self.ui = LRUCache(CacheKeySetInputSignature, max_size=cache_size)
+        self.objects = HierarchicalCache(CacheKeySetID)
+
+    def init_ram_cache(self, min_headroom):
+        self.outputs = RAMPressureCache(CacheKeySetInputSignature)
         self.objects = HierarchicalCache(CacheKeySetID)
 
     def init_null_cache(self):
         self.outputs = NullCache()
-        #The UI cache is expected to be iterable at the end of each workflow
-        #so it must cache at least a full workflow. Use Heirachical
-        self.ui = HierarchicalCache(CacheKeySetInputSignature)
         self.objects = NullCache()
 
     def recursive_debug_dump(self):
         result = {
             "outputs": self.outputs.recursive_debug_dump(),
-            "ui": self.ui.recursive_debug_dump(),
         }
         return result
 
@@ -157,14 +165,14 @@ def get_input_data(inputs, class_def, unique_id, execution_list=None, dynprompt=
             if execution_list is None:
                 mark_missing()
                 continue # This might be a lazily-evaluated input
-            cached_output = execution_list.get_output_cache(input_unique_id, unique_id)
-            if cached_output is None:
+            cached = execution_list.get_cache(input_unique_id, unique_id)
+            if cached is None or cached.outputs is None:
                 mark_missing()
                 continue
-            if output_index >= len(cached_output):
+            if output_index >= len(cached.outputs):
                 mark_missing()
                 continue
-            obj = cached_output[output_index]
+            obj = cached.outputs[output_index]
             input_data_all[x] = obj
         elif input_category is not None:
             input_data_all[x] = [input_data]
@@ -393,7 +401,7 @@ def format_value(x):
     else:
         return str(x)
 
-async def execute(server, dynprompt, caches, current_item, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes):
+async def execute(server, dynprompt, caches, current_item, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_outputs):
     unique_id = current_item
     real_node_id = dynprompt.get_real_node_id(unique_id)
     display_node_id = dynprompt.get_display_node_id(unique_id)
@@ -401,12 +409,15 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
     inputs = dynprompt.get_node(unique_id)['inputs']
     class_type = dynprompt.get_node(unique_id)['class_type']
     class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
-    if caches.outputs.get(unique_id) is not None:
+    cached = caches.outputs.get(unique_id)
+    if cached is not None:
         if server.client_id is not None:
-            cached_output = caches.ui.get(unique_id) or {}
-            server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": cached_output.get("output",None), "prompt_id": prompt_id }, server.client_id)
+            cached_ui = cached.ui or {}
+            server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": cached_ui.get("output",None), "prompt_id": prompt_id }, server.client_id)
+            if cached.ui is not None:
+                ui_outputs[unique_id] = cached.ui
         get_progress_state().finish_progress(unique_id)
-        execution_list.cache_update(unique_id, caches.outputs.get(unique_id))
+        execution_list.cache_update(unique_id, cached)
         return (ExecutionResult.SUCCESS, None, None)
 
     input_data_all = None
@@ -436,8 +447,8 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                     for r in result:
                         if is_link(r):
                             source_node, source_output = r[0], r[1]
-                            node_output = execution_list.get_output_cache(source_node, unique_id)[source_output]
-                            for o in node_output:
+                            node_cached = execution_list.get_cache(source_node, unique_id)
+                            for o in node_cached.outputs[source_output]:
                                 resolved_output.append(o)
 
                         else:
@@ -507,7 +518,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                 asyncio.create_task(await_completion())
                 return (ExecutionResult.PENDING, None, None)
         if len(output_ui) > 0:
-            caches.ui.set(unique_id, {
+            ui_outputs[unique_id] = {
                 "meta": {
                     "node_id": unique_id,
                     "display_node": display_node_id,
@@ -515,7 +526,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                     "real_node_id": real_node_id,
                 },
                 "output": output_ui
-            })
+            }
             if server.client_id is not None:
                 server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": output_ui, "prompt_id": prompt_id }, server.client_id)
         if has_subgraph:
@@ -554,8 +565,9 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
             pending_subgraph_results[unique_id] = cached_outputs
             return (ExecutionResult.PENDING, None, None)
 
-        caches.outputs.set(unique_id, output_data)
-        execution_list.cache_update(unique_id, output_data)
+        cache_entry = CacheEntry(ui=ui_outputs.get(unique_id), outputs=output_data)
+        execution_list.cache_update(unique_id, cache_entry)
+        caches.outputs.set(unique_id, cache_entry)
 
     except comfy.model_management.InterruptProcessingException as iex:
         logging.info("Processing interrupted")
@@ -600,14 +612,14 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
     return (ExecutionResult.SUCCESS, None, None)
 
 class PromptExecutor:
-    def __init__(self, server, cache_type=False, cache_size=None):
-        self.cache_size = cache_size
+    def __init__(self, server, cache_type=False, cache_args=None):
+        self.cache_args = cache_args
         self.cache_type = cache_type
         self.server = server
         self.reset()
 
     def reset(self):
-        self.caches = CacheSet(cache_type=self.cache_type, cache_size=self.cache_size)
+        self.caches = CacheSet(cache_type=self.cache_type, cache_args=self.cache_args)
         self.status_messages = []
         self.success = True
 
@@ -682,6 +694,7 @@ class PromptExecutor:
                           broadcast=False)
             pending_subgraph_results = {}
             pending_async_nodes = {} # TODO - Unify this with pending_subgraph_results
+            ui_node_outputs = {}
             executed = set()
             execution_list = ExecutionList(dynamic_prompt, self.caches.outputs)
             current_outputs = self.caches.outputs.all_node_ids()
@@ -695,7 +708,7 @@ class PromptExecutor:
                     break
 
                 assert node_id is not None, "Node ID should not be None at this point"
-                result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes)
+                result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_node_outputs)
                 self.success = result != ExecutionResult.FAILURE
                 if result == ExecutionResult.FAILURE:
                     self.handle_execution_error(prompt_id, dynamic_prompt.original_prompt, current_outputs, executed, error, ex)
@@ -704,18 +717,16 @@ class PromptExecutor:
                     execution_list.unstage_node_execution()
                 else: # result == ExecutionResult.SUCCESS:
                     execution_list.complete_node_execution()
+                self.caches.outputs.poll(ram_headroom=self.cache_args["ram"])
             else:
                 # Only execute when the while-loop ends without break
                 self.add_message("execution_success", { "prompt_id": prompt_id }, broadcast=False)
 
             ui_outputs = {}
             meta_outputs = {}
-            all_node_ids = self.caches.ui.all_node_ids()
-            for node_id in all_node_ids:
-                ui_info = self.caches.ui.get(node_id)
-                if ui_info is not None:
-                    ui_outputs[node_id] = ui_info["output"]
-                    meta_outputs[node_id] = ui_info["meta"]
+            for node_id, ui_info in ui_node_outputs.items():
+                ui_outputs[node_id] = ui_info["output"]
+                meta_outputs[node_id] = ui_info["meta"]
             self.history_result = {
                 "outputs": ui_outputs,
                 "meta": meta_outputs,
diff --git a/main.py b/main.py
index 8d466d2eb..e1b0f1620 100644
--- a/main.py
+++ b/main.py
@@ -172,10 +172,12 @@ def prompt_worker(q, server_instance):
     cache_type = execution.CacheType.CLASSIC
     if args.cache_lru > 0:
         cache_type = execution.CacheType.LRU
+    elif args.cache_ram > 0:
+        cache_type = execution.CacheType.RAM_PRESSURE
     elif args.cache_none:
         cache_type = execution.CacheType.NONE
 
-    e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_size=args.cache_lru)
+    e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : args.cache_ram } )
     last_gc_collect = 0
     need_gc = False
     gc_collect_interval = 10.0

From 614cf9805e1056216487a2d1b1a07206d77f87e7 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 30 Oct 2025 19:11:38 -0700
Subject: [PATCH 17/20] Add a ScaleROPE node. Currently only works on WAN
 models. (#10559)

---
 comfy/ldm/wan/model.py     | 20 ++++++++++++----
 comfy/model_patcher.py     | 13 +++++++++++
 comfy_extras/nodes_rope.py | 47 ++++++++++++++++++++++++++++++++++++++
 nodes.py                   |  1 +
 4 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 comfy_extras/nodes_rope.py

diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
index 90c347d3d..77876c2e7 100644
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -588,7 +588,7 @@ class WanModel(torch.nn.Module):
         x = self.unpatchify(x, grid_sizes)
         return x
 
-    def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None):
+    def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, transformer_options={}):
         patch_size = self.patch_size
         t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
         h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
@@ -601,10 +601,22 @@ class WanModel(torch.nn.Module):
         if steps_w is None:
             steps_w = w_len
 
+        h_start = 0
+        w_start = 0
+        rope_options = transformer_options.get("rope_options", None)
+        if rope_options is not None:
+            t_len = t_len * rope_options.get("scale_t", 1.0)
+            h_len = h_len * rope_options.get("scale_y", 1.0)
+            w_len = w_len * rope_options.get("scale_x", 1.0)
+
+            t_start += rope_options.get("shift_t", 0.0)
+            h_start += rope_options.get("shift_y", 0.0)
+            w_start += rope_options.get("shift_x", 0.0)
+
         img_ids = torch.zeros((steps_t, steps_h, steps_w, 3), device=device, dtype=dtype)
         img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(t_start, t_start + (t_len - 1), steps=steps_t, device=device, dtype=dtype).reshape(-1, 1, 1)
-        img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=steps_h, device=device, dtype=dtype).reshape(1, -1, 1)
-        img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=steps_w, device=device, dtype=dtype).reshape(1, 1, -1)
+        img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(h_start, h_start + (h_len - 1), steps=steps_h, device=device, dtype=dtype).reshape(1, -1, 1)
+        img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(w_start, w_start + (w_len - 1), steps=steps_w, device=device, dtype=dtype).reshape(1, 1, -1)
         img_ids = img_ids.reshape(1, -1, img_ids.shape[-1])
 
         freqs = self.rope_embedder(img_ids).movedim(1, 2)
@@ -630,7 +642,7 @@ class WanModel(torch.nn.Module):
         if self.ref_conv is not None and "reference_latent" in kwargs:
             t_len += 1
 
-        freqs = self.rope_encode(t_len, h, w, device=x.device, dtype=x.dtype)
+        freqs = self.rope_encode(t_len, h, w, device=x.device, dtype=x.dtype, transformer_options=transformer_options)
         return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options, **kwargs)[:, :, :t, :h, :w]
 
     def unpatchify(self, x, grid_sizes):
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 674a214ca..3e8799983 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -454,6 +454,19 @@ class ModelPatcher:
     def set_model_post_input_patch(self, patch):
         self.set_model_patch(patch, "post_input")
 
+    def set_model_rope_options(self, scale_x, shift_x, scale_y, shift_y, scale_t, shift_t, **kwargs):
+        rope_options = self.model_options["transformer_options"].get("rope_options", {})
+        rope_options["scale_x"] = scale_x
+        rope_options["scale_y"] = scale_y
+        rope_options["scale_t"] = scale_t
+
+        rope_options["shift_x"] = shift_x
+        rope_options["shift_y"] = shift_y
+        rope_options["shift_t"] = shift_t
+
+        self.model_options["transformer_options"]["rope_options"] = rope_options
+
+
     def add_object_patch(self, name, obj):
         self.object_patches[name] = obj
 
diff --git a/comfy_extras/nodes_rope.py b/comfy_extras/nodes_rope.py
new file mode 100644
index 000000000..d1feb031e
--- /dev/null
+++ b/comfy_extras/nodes_rope.py
@@ -0,0 +1,47 @@
+from comfy_api.latest import ComfyExtension, io
+from typing_extensions import override
+
+
+class ScaleROPE(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ScaleROPE",
+            category="advanced/model_patches",
+            description="Scale and shift the ROPE of the model.",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("scale_x", default=1.0, min=0.0, max=100.0, step=0.1),
+                io.Float.Input("shift_x", default=0.0, min=-256.0, max=256.0, step=0.1),
+
+                io.Float.Input("scale_y", default=1.0, min=0.0, max=100.0, step=0.1),
+                io.Float.Input("shift_y", default=0.0, min=-256.0, max=256.0, step=0.1),
+
+                io.Float.Input("scale_t", default=1.0, min=0.0, max=100.0, step=0.1),
+                io.Float.Input("shift_t", default=0.0, min=-256.0, max=256.0, step=0.1),
+
+
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, scale_x, shift_x, scale_y, shift_y, scale_t, shift_t) -> io.NodeOutput:
+        m = model.clone()
+        m.set_model_rope_options(scale_x, shift_x, scale_y, shift_y, scale_t, shift_t)
+        return io.NodeOutput(m)
+
+
+class RopeExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            ScaleROPE
+        ]
+
+
+async def comfy_entrypoint() -> RopeExtension:
+    return RopeExtension()
diff --git a/nodes.py b/nodes.py
index 12e365ca9..5689f6fe1 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2329,6 +2329,7 @@ async def init_builtin_extra_nodes():
         "nodes_model_patch.py",
         "nodes_easycache.py",
         "nodes_audio_encoder.py",
+        "nodes_rope.py",
     ]
 
     import_failed = []

From 27d1bd882925e3bbdffb405cea098ac52bb20ac5 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 30 Oct 2025 19:51:58 -0700
Subject: [PATCH 18/20] Fix rope scaling. (#10560)

---
 comfy/ldm/wan/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
index 77876c2e7..5ec1511ce 100644
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -605,9 +605,9 @@ class WanModel(torch.nn.Module):
         w_start = 0
         rope_options = transformer_options.get("rope_options", None)
         if rope_options is not None:
-            t_len = t_len * rope_options.get("scale_t", 1.0)
-            h_len = h_len * rope_options.get("scale_y", 1.0)
-            w_len = w_len * rope_options.get("scale_x", 1.0)
+            t_len = (t_len - 1.0) * rope_options.get("scale_t", 1.0) + 1.0
+            h_len = (h_len - 1.0) * rope_options.get("scale_y", 1.0) + 1.0
+            w_len = (w_len - 1.0) * rope_options.get("scale_x", 1.0) + 1.0
 
             t_start += rope_options.get("shift_t", 0.0)
             h_start += rope_options.get("shift_y", 0.0)

From 7f374e42c833c69c71605507b90f79cc26d14a71 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 31 Oct 2025 12:41:40 -0700
Subject: [PATCH 19/20] ScaleROPE now works on Lumina models. (#10578)

---
 comfy/ldm/lumina/model.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
index f87d98ac0..b4494a51d 100644
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -522,7 +522,7 @@ class NextDiT(nn.Module):
         max_cap_len = max(l_effective_cap_len)
         max_img_len = max(l_effective_img_len)
 
-        position_ids = torch.zeros(bsz, max_seq_len, 3, dtype=torch.int32, device=device)
+        position_ids = torch.zeros(bsz, max_seq_len, 3, dtype=torch.float32, device=device)
 
         for i in range(bsz):
             cap_len = l_effective_cap_len[i]
@@ -531,10 +531,22 @@ class NextDiT(nn.Module):
             H_tokens, W_tokens = H // pH, W // pW
             assert H_tokens * W_tokens == img_len
 
-            position_ids[i, :cap_len, 0] = torch.arange(cap_len, dtype=torch.int32, device=device)
+            rope_options = transformer_options.get("rope_options", None)
+            h_scale = 1.0
+            w_scale = 1.0
+            h_start = 0
+            w_start = 0
+            if rope_options is not None:
+                h_scale = rope_options.get("scale_y", 1.0)
+                w_scale = rope_options.get("scale_x", 1.0)
+
+                h_start = rope_options.get("shift_y", 0.0)
+                w_start = rope_options.get("shift_x", 0.0)
+
+            position_ids[i, :cap_len, 0] = torch.arange(cap_len, dtype=torch.float32, device=device)
             position_ids[i, cap_len:cap_len+img_len, 0] = cap_len
-            row_ids = torch.arange(H_tokens, dtype=torch.int32, device=device).view(-1, 1).repeat(1, W_tokens).flatten()
-            col_ids = torch.arange(W_tokens, dtype=torch.int32, device=device).view(1, -1).repeat(H_tokens, 1).flatten()
+            row_ids = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
+            col_ids = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
             position_ids[i, cap_len:cap_len+img_len, 1] = row_ids
             position_ids[i, cap_len:cap_len+img_len, 2] = col_ids
 

From c58c13b2bad6df0de93cc0cf107e96522a3cb5b3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 31 Oct 2025 21:25:17 -0700
Subject: [PATCH 20/20] Fix torch compile regression on fp8 ops. (#10580)

---
 comfy/ops.py                                  | 24 +++++------------
 comfy/quant_ops.py                            | 27 +++++++++++++++----
 .../comfy_quant/test_mixed_precision.py       |  8 +++---
 tests-unit/comfy_quant/test_quant_registry.py | 20 +++++++-------
 4 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index 18f6b804b..279f6b1a7 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -401,15 +401,9 @@ def fp8_linear(self, input):
     if dtype not in [torch.float8_e4m3fn]:
         return None
 
-    tensor_2d = False
-    if len(input.shape) == 2:
-        tensor_2d = True
-        input = input.unsqueeze(1)
-
-    input_shape = input.shape
     input_dtype = input.dtype
 
-    if len(input.shape) == 3:
+    if input.ndim == 3 or input.ndim == 2:
         w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True)
 
         scale_weight = self.scale_weight
@@ -422,24 +416,20 @@ def fp8_linear(self, input):
         if scale_input is None:
             scale_input = torch.ones((), device=input.device, dtype=torch.float32)
             input = torch.clamp(input, min=-448, max=448, out=input)
-            input = input.reshape(-1, input_shape[2]).to(dtype).contiguous()
             layout_params_weight = {'scale': scale_input, 'orig_dtype': input_dtype}
-            quantized_input = QuantizedTensor(input.reshape(-1, input_shape[2]).to(dtype).contiguous(), TensorCoreFP8Layout, layout_params_weight)
+            quantized_input = QuantizedTensor(input.to(dtype).contiguous(), "TensorCoreFP8Layout", layout_params_weight)
         else:
             scale_input = scale_input.to(input.device)
-            quantized_input = QuantizedTensor.from_float(input.reshape(-1, input_shape[2]), TensorCoreFP8Layout, scale=scale_input, dtype=dtype)
+            quantized_input = QuantizedTensor.from_float(input, "TensorCoreFP8Layout", scale=scale_input, dtype=dtype)
 
         # Wrap weight in QuantizedTensor - this enables unified dispatch
         # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
         layout_params_weight = {'scale': scale_weight, 'orig_dtype': input_dtype}
-        quantized_weight = QuantizedTensor(w, TensorCoreFP8Layout, layout_params_weight)
+        quantized_weight = QuantizedTensor(w, "TensorCoreFP8Layout", layout_params_weight)
         o = torch.nn.functional.linear(quantized_input, quantized_weight, bias)
 
         uncast_bias_weight(self, w, bias, offload_stream)
-
-        if tensor_2d:
-            return o.reshape(input_shape[0], -1)
-        return o.reshape((-1, input_shape[1], self.weight.shape[0]))
+        return o
 
     return None
 
@@ -540,12 +530,12 @@ if CUBLAS_IS_AVAILABLE:
 # ==============================================================================
 # Mixed Precision Operations
 # ==============================================================================
-from .quant_ops import QuantizedTensor, TensorCoreFP8Layout
+from .quant_ops import QuantizedTensor
 
 QUANT_FORMAT_MIXINS = {
     "float8_e4m3fn": {
         "dtype": torch.float8_e4m3fn,
-        "layout_type": TensorCoreFP8Layout,
+        "layout_type": "TensorCoreFP8Layout",
         "parameters": {
             "weight_scale": torch.nn.Parameter(torch.zeros((), dtype=torch.float32), requires_grad=False),
             "input_scale": torch.nn.Parameter(torch.zeros((), dtype=torch.float32), requires_grad=False),
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index c822fe53c..873f173ed 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -123,7 +123,7 @@ class QuantizedTensor(torch.Tensor):
             layout_type: Layout class (subclass of QuantizedLayout)
             layout_params: Dict with layout-specific parameters
         """
-        return torch.Tensor._make_subclass(cls, qdata, require_grad=False)
+        return torch.Tensor._make_wrapper_subclass(cls, qdata.shape, device=qdata.device, dtype=qdata.dtype, requires_grad=False)
 
     def __init__(self, qdata, layout_type, layout_params):
         self._qdata = qdata.contiguous()
@@ -183,11 +183,11 @@ class QuantizedTensor(torch.Tensor):
 
     @classmethod
     def from_float(cls, tensor, layout_type, **quantize_kwargs) -> 'QuantizedTensor':
-        qdata, layout_params = layout_type.quantize(tensor, **quantize_kwargs)
+        qdata, layout_params = LAYOUTS[layout_type].quantize(tensor, **quantize_kwargs)
         return cls(qdata, layout_type, layout_params)
 
     def dequantize(self) -> torch.Tensor:
-        return self._layout_type.dequantize(self._qdata, **self._layout_params)
+        return LAYOUTS[self._layout_type].dequantize(self._qdata, **self._layout_params)
 
     @classmethod
     def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
@@ -379,7 +379,12 @@ class TensorCoreFP8Layout(QuantizedLayout):
         return qtensor._qdata, qtensor._layout_params['scale']
 
 
-@register_layout_op(torch.ops.aten.linear.default, TensorCoreFP8Layout)
+LAYOUTS = {
+    "TensorCoreFP8Layout": TensorCoreFP8Layout,
+}
+
+
+@register_layout_op(torch.ops.aten.linear.default, "TensorCoreFP8Layout")
 def fp8_linear(func, args, kwargs):
     input_tensor = args[0]
     weight = args[1]
@@ -422,7 +427,7 @@ def fp8_linear(func, args, kwargs):
                     'scale': output_scale,
                     'orig_dtype': input_tensor._layout_params['orig_dtype']
                 }
-                return QuantizedTensor(output, TensorCoreFP8Layout, output_params)
+                return QuantizedTensor(output, "TensorCoreFP8Layout", output_params)
             else:
                 return output
 
@@ -436,3 +441,15 @@ def fp8_linear(func, args, kwargs):
         input_tensor = input_tensor.dequantize()
 
     return torch.nn.functional.linear(input_tensor, weight, bias)
+
+
+@register_layout_op(torch.ops.aten.view.default, "TensorCoreFP8Layout")
+@register_layout_op(torch.ops.aten.t.default, "TensorCoreFP8Layout")
+def fp8_func(func, args, kwargs):
+    input_tensor = args[0]
+    if isinstance(input_tensor, QuantizedTensor):
+        plain_input, scale_a = TensorCoreFP8Layout.get_plain_tensors(input_tensor)
+        ar = list(args)
+        ar[0] = plain_input
+        return QuantizedTensor(func(*ar, **kwargs), "TensorCoreFP8Layout", input_tensor._layout_params)
+    return func(*args, **kwargs)
diff --git a/tests-unit/comfy_quant/test_mixed_precision.py b/tests-unit/comfy_quant/test_mixed_precision.py
index 267bc177b..f8d1fd04e 100644
--- a/tests-unit/comfy_quant/test_mixed_precision.py
+++ b/tests-unit/comfy_quant/test_mixed_precision.py
@@ -14,7 +14,7 @@ if not has_gpu():
     args.cpu = True
 
 from comfy import ops
-from comfy.quant_ops import QuantizedTensor, TensorCoreFP8Layout
+from comfy.quant_ops import QuantizedTensor
 
 
 class SimpleModel(torch.nn.Module):
@@ -104,14 +104,14 @@ class TestMixedPrecisionOps(unittest.TestCase):
 
         # Verify weights are wrapped in QuantizedTensor
         self.assertIsInstance(model.layer1.weight, QuantizedTensor)
-        self.assertEqual(model.layer1.weight._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(model.layer1.weight._layout_type, "TensorCoreFP8Layout")
 
         # Layer 2 should NOT be quantized
         self.assertNotIsInstance(model.layer2.weight, QuantizedTensor)
 
         # Layer 3 should be quantized
         self.assertIsInstance(model.layer3.weight, QuantizedTensor)
-        self.assertEqual(model.layer3.weight._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(model.layer3.weight._layout_type, "TensorCoreFP8Layout")
 
         # Verify scales were loaded
         self.assertEqual(model.layer1.weight._layout_params['scale'].item(), 2.0)
@@ -155,7 +155,7 @@ class TestMixedPrecisionOps(unittest.TestCase):
         # Verify layer1.weight is a QuantizedTensor with scale preserved
         self.assertIsInstance(state_dict2["layer1.weight"], QuantizedTensor)
         self.assertEqual(state_dict2["layer1.weight"]._layout_params['scale'].item(), 3.0)
-        self.assertEqual(state_dict2["layer1.weight"]._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(state_dict2["layer1.weight"]._layout_type, "TensorCoreFP8Layout")
 
         # Verify non-quantized layers are standard tensors
         self.assertNotIsInstance(state_dict2["layer2.weight"], QuantizedTensor)
diff --git a/tests-unit/comfy_quant/test_quant_registry.py b/tests-unit/comfy_quant/test_quant_registry.py
index 477811029..9cb54ede8 100644
--- a/tests-unit/comfy_quant/test_quant_registry.py
+++ b/tests-unit/comfy_quant/test_quant_registry.py
@@ -25,14 +25,14 @@ class TestQuantizedTensor(unittest.TestCase):
         scale = torch.tensor(2.0)
         layout_params = {'scale': scale, 'orig_dtype': torch.bfloat16}
 
-        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        qt = QuantizedTensor(fp8_data, "TensorCoreFP8Layout", layout_params)
 
         self.assertIsInstance(qt, QuantizedTensor)
         self.assertEqual(qt.shape, (256, 128))
         self.assertEqual(qt.dtype, torch.float8_e4m3fn)
         self.assertEqual(qt._layout_params['scale'], scale)
         self.assertEqual(qt._layout_params['orig_dtype'], torch.bfloat16)
-        self.assertEqual(qt._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(qt._layout_type, "TensorCoreFP8Layout")
 
     def test_dequantize(self):
         """Test explicit dequantization"""
@@ -41,7 +41,7 @@ class TestQuantizedTensor(unittest.TestCase):
         scale = torch.tensor(3.0)
         layout_params = {'scale': scale, 'orig_dtype': torch.float32}
 
-        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        qt = QuantizedTensor(fp8_data, "TensorCoreFP8Layout", layout_params)
         dequantized = qt.dequantize()
 
         self.assertEqual(dequantized.dtype, torch.float32)
@@ -54,7 +54,7 @@ class TestQuantizedTensor(unittest.TestCase):
 
         qt = QuantizedTensor.from_float(
             float_tensor,
-            TensorCoreFP8Layout,
+            "TensorCoreFP8Layout",
             scale=scale,
             dtype=torch.float8_e4m3fn
         )
@@ -77,28 +77,28 @@ class TestGenericUtilities(unittest.TestCase):
         fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
         scale = torch.tensor(1.5)
         layout_params = {'scale': scale, 'orig_dtype': torch.float32}
-        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        qt = QuantizedTensor(fp8_data, "TensorCoreFP8Layout", layout_params)
 
         # Detach should return a new QuantizedTensor
         qt_detached = qt.detach()
 
         self.assertIsInstance(qt_detached, QuantizedTensor)
         self.assertEqual(qt_detached.shape, qt.shape)
-        self.assertEqual(qt_detached._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(qt_detached._layout_type, "TensorCoreFP8Layout")
 
     def test_clone(self):
         """Test clone operation on quantized tensor"""
         fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
         scale = torch.tensor(1.5)
         layout_params = {'scale': scale, 'orig_dtype': torch.float32}
-        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        qt = QuantizedTensor(fp8_data, "TensorCoreFP8Layout", layout_params)
 
         # Clone should return a new QuantizedTensor
         qt_cloned = qt.clone()
 
         self.assertIsInstance(qt_cloned, QuantizedTensor)
         self.assertEqual(qt_cloned.shape, qt.shape)
-        self.assertEqual(qt_cloned._layout_type, TensorCoreFP8Layout)
+        self.assertEqual(qt_cloned._layout_type, "TensorCoreFP8Layout")
 
         # Verify it's a deep copy
         self.assertIsNot(qt_cloned._qdata, qt._qdata)
@@ -109,7 +109,7 @@ class TestGenericUtilities(unittest.TestCase):
         fp8_data = torch.randn(10, 20, dtype=torch.float32).to(torch.float8_e4m3fn)
         scale = torch.tensor(1.5)
         layout_params = {'scale': scale, 'orig_dtype': torch.float32}
-        qt = QuantizedTensor(fp8_data, TensorCoreFP8Layout, layout_params)
+        qt = QuantizedTensor(fp8_data, "TensorCoreFP8Layout", layout_params)
 
         # Moving to same device should work (CPU to CPU)
         qt_cpu = qt.to('cpu')
@@ -169,7 +169,7 @@ class TestFallbackMechanism(unittest.TestCase):
         scale = torch.tensor(1.0)
         a_q = QuantizedTensor.from_float(
             a_fp32,
-            TensorCoreFP8Layout,
+            "TensorCoreFP8Layout",
             scale=scale,
             dtype=torch.float8_e4m3fn
         )