mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-11 14:50:49 +08:00
544 lines
25 KiB
Python
544 lines
25 KiB
Python
import os
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from comfy.sd import load_checkpoint_guess_config, load_checkpoint
|
|
from comfy.model_patcher import ModelPatcher
|
|
import folder_paths
|
|
|
|
# ------------------------ Optimized Quantization Logic -------------------------
|
|
def quantize_input_for_int8_matmul(input_tensor, weight_scale):
|
|
"""Quantize input tensor for optimized int8 matrix multiplication"""
|
|
# Calculate input scale per batch/sequence dimension
|
|
input_scale = input_tensor.abs().amax(dim=-1, keepdim=True) / 127.0
|
|
input_scale = torch.clamp(input_scale, min=1e-8)
|
|
|
|
# Quantize input to int8
|
|
quantized_input = torch.clamp(
|
|
(input_tensor / input_scale).round(), -128, 127
|
|
).to(torch.int8)
|
|
|
|
# Combine input and weight scales
|
|
combined_scale = input_scale * weight_scale
|
|
|
|
# Flatten tensors for matrix multiplication if needed
|
|
original_shape = input_tensor.shape
|
|
if input_tensor.dim() > 2:
|
|
quantized_input = quantized_input.flatten(0, -2).contiguous()
|
|
combined_scale = combined_scale.flatten(0, -2).contiguous()
|
|
# Ensure scale precision for accurate computation
|
|
if combined_scale.dtype == torch.float16:
|
|
combined_scale = combined_scale.to(torch.float32)
|
|
|
|
return quantized_input, combined_scale, original_shape
|
|
|
|
def optimized_int8_matmul(input_tensor, quantized_weight, weight_scale, bias=None):
|
|
"""Optimized int8 matrix multiplication using torch._int_mm"""
|
|
batch_size = input_tensor.numel() // input_tensor.shape[-1]
|
|
|
|
# Performance threshold: only use optimized path for larger matrices
|
|
# This prevents overhead from dominating small computations
|
|
if batch_size >= 32 and input_tensor.shape[-1] >= 32:
|
|
# Quantize input tensor for int8 computation
|
|
q_input, combined_scale, orig_shape = quantize_input_for_int8_matmul(
|
|
input_tensor, weight_scale
|
|
)
|
|
|
|
# Perform optimized int8 matrix multiplication
|
|
# This is significantly faster than standard floating-point operations
|
|
result = torch._int_mm(q_input, quantized_weight)
|
|
|
|
# Dequantize result back to floating point
|
|
result = result.to(combined_scale.dtype) * combined_scale
|
|
|
|
# Reshape result back to original input dimensions
|
|
if len(orig_shape) > 2:
|
|
new_shape = list(orig_shape[:-1]) + [quantized_weight.shape[-1]]
|
|
result = result.reshape(new_shape)
|
|
|
|
# Add bias if present
|
|
if bias is not None:
|
|
result = result + bias
|
|
|
|
return result
|
|
else:
|
|
# Fall back to standard dequantization for small matrices
|
|
# This avoids quantization overhead when it's not beneficial
|
|
dequantized_weight = quantized_weight.to(input_tensor.dtype) * weight_scale
|
|
return F.linear(input_tensor, dequantized_weight, bias)
|
|
|
|
def make_optimized_quantized_forward(quant_dtype="float32", use_int8_matmul=True):
|
|
"""Create an optimized quantized forward function for neural network layers"""
|
|
def forward(self, x):
|
|
# Determine computation precision
|
|
dtype = torch.float32 if quant_dtype == "float32" else torch.float16
|
|
|
|
# Get input device for consistent placement
|
|
device = x.device
|
|
|
|
# Move quantized weights and scales to input device AND dtype
|
|
qW = self.int8_weight.to(device)
|
|
scale = self.scale.to(device, dtype=dtype)
|
|
|
|
# Handle zero point for asymmetric quantization
|
|
if hasattr(self, 'zero_point') and self.zero_point is not None:
|
|
zp = self.zero_point.to(device, dtype=dtype)
|
|
else:
|
|
zp = None
|
|
|
|
# Ensure input is in correct precision
|
|
x = x.to(dtype)
|
|
|
|
# Prepare bias if present - ENSURE IT'S ON THE CORRECT DEVICE
|
|
bias = None
|
|
if self.bias is not None:
|
|
bias = self.bias.to(device, dtype=dtype)
|
|
|
|
# Apply LoRA adaptation if present (before main computation for better accuracy)
|
|
lora_output = None
|
|
if hasattr(self, "lora_down") and hasattr(self, "lora_up") and hasattr(self, "lora_alpha"):
|
|
# Ensure LoRA weights are on correct device
|
|
lora_down = self.lora_down.to(device)
|
|
lora_up = self.lora_up.to(device)
|
|
lora_output = lora_up(lora_down(x)) * self.lora_alpha
|
|
|
|
# Choose computation path based on layer type and optimization settings
|
|
if isinstance(self, nn.Linear):
|
|
# Linear layers can use optimized int8 matmul
|
|
if (use_int8_matmul and zp is None and
|
|
hasattr(self, '_use_optimized_matmul') and self._use_optimized_matmul):
|
|
# Use optimized path (only for symmetric quantization)
|
|
result = optimized_int8_matmul(x, qW, scale, bias)
|
|
else:
|
|
# Standard dequantization path
|
|
if zp is not None:
|
|
# Asymmetric quantization: subtract zero point then scale
|
|
W = (qW.to(dtype) - zp) * scale
|
|
else:
|
|
# Symmetric quantization: just scale
|
|
W = qW.to(dtype) * scale
|
|
result = F.linear(x, W, bias)
|
|
|
|
elif isinstance(self, nn.Conv2d):
|
|
# Convolution layers use standard dequantization
|
|
if zp is not None:
|
|
W = (qW.to(dtype) - zp) * scale
|
|
else:
|
|
W = qW.to(dtype) * scale
|
|
result = F.conv2d(x, W, bias, self.stride, self.padding, self.dilation, self.groups)
|
|
|
|
else:
|
|
# Fallback for unsupported layer types
|
|
return x
|
|
|
|
# Add LoRA output if computed
|
|
if lora_output is not None:
|
|
result = result + lora_output
|
|
|
|
return result
|
|
|
|
return forward
|
|
|
|
def quantize_weight(weight: torch.Tensor, num_bits=8, use_asymmetric=False):
|
|
"""Quantize weights with support for both symmetric and asymmetric quantization"""
|
|
# Determine reduction dimensions (preserve output channels)
|
|
reduce_dim = 1 if weight.ndim == 2 else [i for i in range(weight.ndim) if i != 0]
|
|
|
|
if use_asymmetric:
|
|
# Asymmetric quantization: use full range [0, 255] for uint8
|
|
min_val = weight.amin(dim=reduce_dim, keepdim=True)
|
|
max_val = weight.amax(dim=reduce_dim, keepdim=True)
|
|
scale = torch.clamp((max_val - min_val) / 255.0, min=1e-8)
|
|
zero_point = torch.clamp((-min_val / scale).round(), 0, 255).to(torch.uint8)
|
|
qweight = torch.clamp((weight / scale + zero_point).round(), 0, 255).to(torch.uint8)
|
|
else:
|
|
# Symmetric quantization: use range [-127, 127] for int8
|
|
w_max = weight.abs().amax(dim=reduce_dim, keepdim=True)
|
|
scale = torch.clamp(w_max / 127.0, min=1e-8)
|
|
qweight = torch.clamp((weight / scale).round(), -128, 127).to(torch.int8)
|
|
zero_point = None
|
|
|
|
return qweight, scale.to(torch.float16), zero_point
|
|
|
|
def apply_optimized_quantization(model, use_asymmetric=False, quant_dtype="float32",
|
|
use_int8_matmul=True):
|
|
"""Apply quantization with optimized inference paths to a neural network model"""
|
|
quant_count = 0
|
|
|
|
def _quantize_module(module, prefix=""):
|
|
nonlocal quant_count
|
|
for name, child in module.named_children():
|
|
full_name = f"{prefix}.{name}" if prefix else name
|
|
|
|
# Skip text encoder and CLIP-related modules to avoid conditioning issues
|
|
if any(skip_name in full_name.lower() for skip_name in
|
|
['text_encoder', 'clip', 'embedder', 'conditioner']):
|
|
print(f"⏭️ Skipping {full_name} (text/conditioning module)")
|
|
_quantize_module(child, full_name)
|
|
continue
|
|
|
|
|
|
if isinstance(child, (nn.Linear, nn.Conv2d)):
|
|
try:
|
|
# Extract and quantize weights
|
|
W = child.weight.data.float()
|
|
qW, scale, zp = quantize_weight(W, use_asymmetric=use_asymmetric)
|
|
|
|
# Store original device info before removing weight
|
|
original_device = child.weight.device
|
|
|
|
# Remove original weight parameter to save memory
|
|
del child._parameters["weight"]
|
|
|
|
# Register quantized parameters as buffers (non-trainable)
|
|
# Keep them on CPU initially to save GPU memory
|
|
child.register_buffer("int8_weight", qW.to(original_device))
|
|
child.register_buffer("scale", scale.to(original_device))
|
|
if zp is not None:
|
|
child.register_buffer("zero_point", zp.to(original_device))
|
|
else:
|
|
child.zero_point = None
|
|
|
|
# Configure optimization settings for this layer
|
|
if isinstance(child, nn.Linear) and not use_asymmetric and use_int8_matmul:
|
|
# Enable optimized matmul for symmetric quantized linear layers
|
|
child._use_optimized_matmul = True
|
|
# Transpose weight for optimized matmul layout
|
|
child.int8_weight = child.int8_weight.transpose(0, 1).contiguous()
|
|
# Adjust scale dimensions for matmul
|
|
child.scale = child.scale.squeeze(-1)
|
|
else:
|
|
child._use_optimized_matmul = False
|
|
|
|
# Assign optimized forward function
|
|
child.forward = make_optimized_quantized_forward(
|
|
quant_dtype, use_int8_matmul
|
|
).__get__(child)
|
|
|
|
quant_count += 1
|
|
opt_status = "optimized" if child._use_optimized_matmul else "standard"
|
|
# print(f"✅ Quantized {full_name} ({opt_status})")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Failed to quantize {full_name}: {str(e)}")
|
|
|
|
# Recursively process child modules
|
|
_quantize_module(child, full_name)
|
|
|
|
_quantize_module(model)
|
|
print(f"✅ Successfully quantized {quant_count} layers with optimized inference")
|
|
return model
|
|
|
|
# ---------------------- ComfyUI Node Implementations ------------------------
|
|
|
|
class CheckpointLoaderQuantized2:
|
|
"""Original checkpoint loader with quantization"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"ckpt_name": (folder_paths.get_filename_list("checkpoints"), ),
|
|
"enable_quant": ("BOOLEAN", {"default": True}),
|
|
"use_asymmetric": ("BOOLEAN", {"default": False}),
|
|
"quant_dtype": (["float32", "float16"], {"default": "float32"}),
|
|
"use_int8_matmul": ("BOOLEAN", {"default": True}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
|
|
FUNCTION = "load_quantized"
|
|
CATEGORY = "CFZ/loaders"
|
|
OUTPUT_NODE = False
|
|
|
|
def load_quantized(self, ckpt_name, enable_quant, use_asymmetric, quant_dtype,
|
|
use_int8_matmul):
|
|
"""Load and optionally quantize a checkpoint with optimized inference"""
|
|
ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
|
|
|
|
if not os.path.exists(ckpt_path):
|
|
raise FileNotFoundError(f"Checkpoint {ckpt_name} not found at {ckpt_path}")
|
|
|
|
# Load checkpoint using ComfyUI's standard loading mechanism
|
|
model_patcher, clip, vae, _ = load_checkpoint_guess_config(
|
|
ckpt_path,
|
|
output_vae=True,
|
|
output_clip=True,
|
|
embedding_directory=folder_paths.get_folder_paths("embeddings")
|
|
)
|
|
|
|
if enable_quant:
|
|
# Determine quantization configuration
|
|
quant_mode = "Asymmetric" if use_asymmetric else "Symmetric"
|
|
matmul_mode = "Optimized Int8" if use_int8_matmul and not use_asymmetric else "Standard"
|
|
|
|
print(f"🔧 Applying {quant_mode} 8-bit quantization to {ckpt_name}")
|
|
print(f" MatMul: {matmul_mode}, Forward: Optimized (dtype={quant_dtype})")
|
|
|
|
# Apply quantization with optimizations
|
|
apply_optimized_quantization(
|
|
model_patcher.model,
|
|
use_asymmetric=use_asymmetric,
|
|
quant_dtype=quant_dtype,
|
|
use_int8_matmul=use_int8_matmul
|
|
)
|
|
else:
|
|
print(f"🔧 Loading {ckpt_name} without quantization")
|
|
|
|
return (model_patcher, clip, vae)
|
|
|
|
|
|
class ModelQuantizationPatcher:
|
|
"""Quantization patcher that can be applied to any model in the workflow"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"model": ("MODEL",),
|
|
"use_asymmetric": ("BOOLEAN", {"default": False}),
|
|
"quant_dtype": (["float32", "float16"], {"default": "float32"}),
|
|
"use_int8_matmul": ("BOOLEAN", {"default": True}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("MODEL",)
|
|
FUNCTION = "patch_model"
|
|
CATEGORY = "CFZ/model patches"
|
|
OUTPUT_NODE = False
|
|
|
|
def patch_model(self, model, use_asymmetric, quant_dtype, use_int8_matmul):
|
|
"""Apply quantization to an existing model"""
|
|
# Clone the model to avoid modifying the original
|
|
import copy
|
|
quantized_model = copy.deepcopy(model)
|
|
|
|
# Determine quantization configuration
|
|
quant_mode = "Asymmetric" if use_asymmetric else "Symmetric"
|
|
matmul_mode = "Optimized Int8" if use_int8_matmul and not use_asymmetric else "Standard"
|
|
|
|
print(f"🔧 Applying {quant_mode} 8-bit quantization to model")
|
|
print(f" MatMul: {matmul_mode}, Forward: Optimized (dtype={quant_dtype})")
|
|
|
|
# Apply quantization with optimizations
|
|
apply_optimized_quantization(
|
|
quantized_model.model,
|
|
use_asymmetric=use_asymmetric,
|
|
quant_dtype=quant_dtype,
|
|
use_int8_matmul=use_int8_matmul
|
|
)
|
|
|
|
return (quantized_model,)
|
|
|
|
|
|
class UNetQuantizationPatcher:
|
|
"""Specialized quantization patcher for UNet models loaded separately"""
|
|
@classmethod
|
|
def INPUT_TYPES(cls):
|
|
return {
|
|
"required": {
|
|
"model": ("MODEL",),
|
|
"use_asymmetric": ("BOOLEAN", {"default": False}),
|
|
"quant_dtype": (["float32", "float16"], {"default": "float32"}),
|
|
"use_int8_matmul": ("BOOLEAN", {"default": True}),
|
|
"skip_input_blocks": ("BOOLEAN", {"default": False}),
|
|
"skip_output_blocks": ("BOOLEAN", {"default": False}),
|
|
"show_memory_usage": ("BOOLEAN", {"default": True}),
|
|
}
|
|
}
|
|
|
|
RETURN_TYPES = ("MODEL",)
|
|
FUNCTION = "patch_unet"
|
|
CATEGORY = "CFZ/model patches"
|
|
OUTPUT_NODE = False
|
|
|
|
def get_model_memory_usage(self, model, force_calculation=False):
|
|
"""Calculate memory usage of model parameters (CPU + GPU)"""
|
|
total_memory = 0
|
|
param_count = 0
|
|
gpu_memory = 0
|
|
|
|
# Count all parameters (CPU + GPU)
|
|
for param in model.parameters():
|
|
memory_bytes = param.data.element_size() * param.data.nelement()
|
|
total_memory += memory_bytes
|
|
param_count += param.data.nelement()
|
|
|
|
if param.data.is_cuda:
|
|
gpu_memory += memory_bytes
|
|
|
|
# Also check for quantized buffers
|
|
for name, buffer in model.named_buffers():
|
|
if 'int8_weight' in name or 'scale' in name or 'zero_point' in name:
|
|
memory_bytes = buffer.element_size() * buffer.nelement()
|
|
total_memory += memory_bytes
|
|
|
|
if buffer.is_cuda:
|
|
gpu_memory += memory_bytes
|
|
|
|
# If force_calculation is True and nothing on GPU, return total memory as estimate
|
|
if force_calculation and gpu_memory == 0:
|
|
return total_memory, param_count, total_memory
|
|
|
|
return total_memory, param_count, gpu_memory
|
|
|
|
def format_memory_size(self, bytes_size):
|
|
"""Format memory size in human readable format"""
|
|
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
if bytes_size < 1024.0:
|
|
return f"{bytes_size:.2f} {unit}"
|
|
bytes_size /= 1024.0
|
|
return f"{bytes_size:.2f} TB"
|
|
|
|
def patch_unet(self, model, use_asymmetric, quant_dtype, use_int8_matmul,
|
|
skip_input_blocks, skip_output_blocks, show_memory_usage):
|
|
"""Apply selective quantization to UNet model with block-level control"""
|
|
import copy
|
|
|
|
# Measure original memory usage
|
|
if show_memory_usage:
|
|
original_memory, original_params, original_gpu = self.get_model_memory_usage(model.model, force_calculation=True)
|
|
print(f"📊 Original Model Memory Usage:")
|
|
print(f" Parameters: {original_params:,}")
|
|
print(f" Total Size: {self.format_memory_size(original_memory)}")
|
|
if original_gpu > 0:
|
|
print(f" GPU Memory: {self.format_memory_size(original_gpu)}")
|
|
else:
|
|
print(f" GPU Memory: Not loaded (will use ~{self.format_memory_size(original_memory)} when loaded)")
|
|
|
|
quantized_model = copy.deepcopy(model)
|
|
|
|
# Determine quantization configuration
|
|
quant_mode = "Asymmetric" if use_asymmetric else "Symmetric"
|
|
matmul_mode = "Optimized Int8" if use_int8_matmul and not use_asymmetric else "Standard"
|
|
|
|
print(f"🔧 Applying {quant_mode} 8-bit quantization to UNet")
|
|
print(f" MatMul: {matmul_mode}, Forward: Optimized (dtype={quant_dtype})")
|
|
|
|
if skip_input_blocks or skip_output_blocks:
|
|
print(f" Skipping: Input blocks={skip_input_blocks}, Output blocks={skip_output_blocks}")
|
|
|
|
# Apply quantization with selective skipping
|
|
self._apply_selective_quantization(
|
|
quantized_model.model,
|
|
use_asymmetric=use_asymmetric,
|
|
quant_dtype=quant_dtype,
|
|
use_int8_matmul=use_int8_matmul,
|
|
skip_input_blocks=skip_input_blocks,
|
|
skip_output_blocks=skip_output_blocks
|
|
)
|
|
|
|
# Measure quantized memory usage
|
|
if show_memory_usage:
|
|
quantized_memory, quantized_params, quantized_gpu = self.get_model_memory_usage(quantized_model.model, force_calculation=True)
|
|
memory_saved = original_memory - quantized_memory
|
|
memory_reduction_pct = (memory_saved / original_memory) * 100 if original_memory > 0 else 0
|
|
|
|
print(f"📊 Quantized Model Memory Usage:")
|
|
print(f" Parameters: {quantized_params:,}")
|
|
print(f" Total Size: {self.format_memory_size(quantized_memory)}")
|
|
if quantized_gpu > 0:
|
|
print(f" GPU Memory: {self.format_memory_size(quantized_gpu)}")
|
|
else:
|
|
print(f" GPU Memory: Not loaded (will use ~{self.format_memory_size(quantized_memory)} when loaded)")
|
|
print(f" Memory Saved: {self.format_memory_size(memory_saved)} ({memory_reduction_pct:.1f}%)")
|
|
|
|
# Show CUDA memory info if available
|
|
if torch.cuda.is_available():
|
|
allocated = torch.cuda.memory_allocated()
|
|
reserved = torch.cuda.memory_reserved()
|
|
print(f"📊 Total GPU Memory Status:")
|
|
print(f" Currently Allocated: {self.format_memory_size(allocated)}")
|
|
print(f" Reserved by PyTorch: {self.format_memory_size(reserved)}")
|
|
|
|
return (quantized_model,)
|
|
|
|
def _apply_selective_quantization(self, model, use_asymmetric=False, quant_dtype="float32",
|
|
use_int8_matmul=True, skip_input_blocks=False,
|
|
skip_output_blocks=False):
|
|
"""Apply quantization with selective block skipping for UNet"""
|
|
quant_count = 0
|
|
|
|
def _quantize_module(module, prefix=""):
|
|
nonlocal quant_count
|
|
for name, child in module.named_children():
|
|
full_name = f"{prefix}.{name}" if prefix else name
|
|
|
|
# Skip blocks based on user preference
|
|
if skip_input_blocks and "input_blocks" in full_name:
|
|
print(f"⏭️ Skipping {full_name} (input block)")
|
|
_quantize_module(child, full_name)
|
|
continue
|
|
|
|
if skip_output_blocks and "output_blocks" in full_name:
|
|
print(f"⏭️ Skipping {full_name} (output block)")
|
|
_quantize_module(child, full_name)
|
|
continue
|
|
|
|
# Skip text encoder and CLIP-related modules
|
|
if any(skip_name in full_name.lower() for skip_name in
|
|
['text_encoder', 'clip', 'embedder', 'conditioner']):
|
|
print(f"⏭️ Skipping {full_name} (text/conditioning module)")
|
|
_quantize_module(child, full_name)
|
|
continue
|
|
|
|
if isinstance(child, (nn.Linear, nn.Conv2d)):
|
|
try:
|
|
# Extract and quantize weights
|
|
W = child.weight.data.float()
|
|
qW, scale, zp = quantize_weight(W, use_asymmetric=use_asymmetric)
|
|
|
|
# Store original device info before removing weight
|
|
original_device = child.weight.device
|
|
|
|
# Remove original weight parameter to save memory
|
|
del child._parameters["weight"]
|
|
|
|
# Register quantized parameters as buffers (non-trainable)
|
|
child.register_buffer("int8_weight", qW.to(original_device))
|
|
child.register_buffer("scale", scale.to(original_device))
|
|
if zp is not None:
|
|
child.register_buffer("zero_point", zp.to(original_device))
|
|
else:
|
|
child.zero_point = None
|
|
|
|
# Configure optimization settings for this layer
|
|
if isinstance(child, nn.Linear) and not use_asymmetric and use_int8_matmul:
|
|
# Enable optimized matmul for symmetric quantized linear layers
|
|
child._use_optimized_matmul = True
|
|
# Transpose weight for optimized matmul layout
|
|
child.int8_weight = child.int8_weight.transpose(0, 1).contiguous()
|
|
# Adjust scale dimensions for matmul
|
|
child.scale = child.scale.squeeze(-1)
|
|
else:
|
|
child._use_optimized_matmul = False
|
|
|
|
# Assign optimized forward function
|
|
child.forward = make_optimized_quantized_forward(
|
|
quant_dtype, use_int8_matmul
|
|
).__get__(child)
|
|
|
|
quant_count += 1
|
|
|
|
except Exception as e:
|
|
print(f"❌ Failed to quantize {full_name}: {str(e)}")
|
|
|
|
# Recursively process child modules
|
|
_quantize_module(child, full_name)
|
|
|
|
_quantize_module(model)
|
|
print(f"✅ Successfully quantized {quant_count} layers with selective patching")
|
|
|
|
# ------------------------- Node Registration -------------------------------
|
|
NODE_CLASS_MAPPINGS = {
|
|
"CheckpointLoaderQuantized2": CheckpointLoaderQuantized2,
|
|
"ModelQuantizationPatcher": ModelQuantizationPatcher,
|
|
"UNetQuantizationPatcher": UNetQuantizationPatcher,
|
|
}
|
|
|
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
|
"CheckpointLoaderQuantized2": "CFZ Checkpoint Loader (Optimized)",
|
|
"ModelQuantizationPatcher": "CFZ Model Quantization Patcher",
|
|
"UNetQuantizationPatcher": "CFZ UNet Quantization Patcher",
|
|
}
|
|
|
|
__all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']
|