Compare commits

..

No commits in common. "b8afb60ee88f921a6331e631986fa608d1d66989" and "83a0fa107baf0f70549dc2363f8504fda32bb510" have entirely different histories.

3 changed files with 15 additions and 5 deletions

View File

@ -449,7 +449,7 @@ class fp8_ops(manual_cast):
return None
def forward_comfy_cast_weights(self, input):
if len(self.weight_function) == 0 and len(self.bias_function) == 0:
if not self.training:
try:
out = fp8_linear(self, input)
if out is not None:

View File

@ -241,6 +241,9 @@ class QuantizedTensor(torch.Tensor):
def storage(self):
return self._qdata.storage()
def untyped_storage(self):
return self._qdata.untyped_storage()
# ==============================================================================
# Generic Utilities (Layout-Agnostic Operations)
# ==============================================================================
@ -252,6 +255,12 @@ def _create_transformed_qtensor(qt, transform_fn):
def _handle_device_transfer(qt, target_device, target_dtype=None, target_layout=None, op_name="to"):
if target_dtype is not None and target_dtype != qt.dtype:
logging.warning(
f"QuantizedTensor: dtype conversion requested to {target_dtype}, "
f"but not supported for quantized tensors. Ignoring dtype."
)
if target_layout is not None and target_layout != torch.strided:
logging.warning(
f"QuantizedTensor: layout change requested to {target_layout}, "
@ -271,8 +280,6 @@ def _handle_device_transfer(qt, target_device, target_dtype=None, target_layout=
logging.debug(f"QuantizedTensor.{op_name}: Moving from {current_device} to {target_device}")
new_q_data = qt._qdata.to(device=target_device)
new_params = _move_layout_params_to_device(qt._layout_params, target_device)
if target_dtype is not None:
new_params["orig_dtype"] = target_dtype
new_qt = QuantizedTensor(new_q_data, qt._layout_type, new_params)
logging.debug(f"QuantizedTensor.{op_name}: Created new tensor on {target_device}")
return new_qt
@ -396,7 +403,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn, stochastic_rounding=0, inplace_ops=False):
orig_dtype = tensor.dtype
if isinstance(scale, str) and scale == "recalculate":
if scale == "recalculate":
scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max
if scale is not None:

View File

@ -55,9 +55,12 @@ class OvisTEModel(sd1_clip.SD1ClipModel):
return out, pooled, {}
def te(dtype_llama=None, llama_quantization_metadata=None):
def te(dtype_llama=None, llama_scaled_fp8=None, llama_quantization_metadata=None):
class OvisTEModel_(OvisTEModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
model_options = model_options.copy()
model_options["scaled_fp8"] = llama_scaled_fp8
if dtype_llama is not None:
dtype = dtype_llama
if llama_quantization_metadata is not None: