mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-26 22:30:19 +08:00
Update cfz_patcher.py
This commit is contained in:
parent
8beae68f3f
commit
e6367f7183
@ -350,25 +350,35 @@ class UNetQuantizationPatcher:
|
|||||||
CATEGORY = "Model Patching"
|
CATEGORY = "Model Patching"
|
||||||
OUTPUT_NODE = False
|
OUTPUT_NODE = False
|
||||||
|
|
||||||
def get_model_memory_usage(self, model):
|
def get_model_memory_usage(self, model, force_calculation=False):
|
||||||
"""Calculate actual memory usage of model parameters"""
|
"""Calculate memory usage of model parameters (CPU + GPU)"""
|
||||||
total_memory = 0
|
total_memory = 0
|
||||||
param_count = 0
|
param_count = 0
|
||||||
|
gpu_memory = 0
|
||||||
|
|
||||||
|
# Count all parameters (CPU + GPU)
|
||||||
for param in model.parameters():
|
for param in model.parameters():
|
||||||
|
memory_bytes = param.data.element_size() * param.data.nelement()
|
||||||
|
total_memory += memory_bytes
|
||||||
|
param_count += param.data.nelement()
|
||||||
|
|
||||||
if param.data.is_cuda:
|
if param.data.is_cuda:
|
||||||
# Get actual memory usage on GPU
|
gpu_memory += memory_bytes
|
||||||
memory_bytes = param.data.element_size() * param.data.nelement()
|
|
||||||
total_memory += memory_bytes
|
|
||||||
param_count += param.data.nelement()
|
|
||||||
|
|
||||||
# Also check for quantized buffers
|
# Also check for quantized buffers
|
||||||
for name, buffer in model.named_buffers():
|
for name, buffer in model.named_buffers():
|
||||||
if buffer.is_cuda and ('int8_weight' in name or 'scale' in name or 'zero_point' in name):
|
if 'int8_weight' in name or 'scale' in name or 'zero_point' in name:
|
||||||
memory_bytes = buffer.element_size() * buffer.nelement()
|
memory_bytes = buffer.element_size() * buffer.nelement()
|
||||||
total_memory += memory_bytes
|
total_memory += memory_bytes
|
||||||
|
|
||||||
|
if buffer.is_cuda:
|
||||||
|
gpu_memory += memory_bytes
|
||||||
|
|
||||||
return total_memory, param_count
|
# If force_calculation is True and nothing on GPU, return total memory as estimate
|
||||||
|
if force_calculation and gpu_memory == 0:
|
||||||
|
return total_memory, param_count, total_memory
|
||||||
|
|
||||||
|
return total_memory, param_count, gpu_memory
|
||||||
|
|
||||||
def format_memory_size(self, bytes_size):
|
def format_memory_size(self, bytes_size):
|
||||||
"""Format memory size in human readable format"""
|
"""Format memory size in human readable format"""
|
||||||
@ -385,10 +395,14 @@ class UNetQuantizationPatcher:
|
|||||||
|
|
||||||
# Measure original memory usage
|
# Measure original memory usage
|
||||||
if show_memory_usage:
|
if show_memory_usage:
|
||||||
original_memory, original_params = self.get_model_memory_usage(model.model)
|
original_memory, original_params, original_gpu = self.get_model_memory_usage(model.model, force_calculation=True)
|
||||||
print(f"📊 Original Model Memory Usage:")
|
print(f"📊 Original Model Memory Usage:")
|
||||||
print(f" Parameters: {original_params:,}")
|
print(f" Parameters: {original_params:,}")
|
||||||
print(f" VRAM Usage: {self.format_memory_size(original_memory)}")
|
print(f" Total Size: {self.format_memory_size(original_memory)}")
|
||||||
|
if original_gpu > 0:
|
||||||
|
print(f" GPU Memory: {self.format_memory_size(original_gpu)}")
|
||||||
|
else:
|
||||||
|
print(f" GPU Memory: Not loaded (will use ~{self.format_memory_size(original_memory)} when loaded)")
|
||||||
|
|
||||||
quantized_model = copy.deepcopy(model)
|
quantized_model = copy.deepcopy(model)
|
||||||
|
|
||||||
@ -414,27 +428,26 @@ class UNetQuantizationPatcher:
|
|||||||
|
|
||||||
# Measure quantized memory usage
|
# Measure quantized memory usage
|
||||||
if show_memory_usage:
|
if show_memory_usage:
|
||||||
# Force GPU memory allocation by moving model to device if needed
|
quantized_memory, quantized_params, quantized_gpu = self.get_model_memory_usage(quantized_model.model, force_calculation=True)
|
||||||
if torch.cuda.is_available():
|
|
||||||
device = next(quantized_model.model.parameters()).device
|
|
||||||
quantized_model.model.to(device)
|
|
||||||
|
|
||||||
quantized_memory, quantized_params = self.get_model_memory_usage(quantized_model.model)
|
|
||||||
memory_saved = original_memory - quantized_memory
|
memory_saved = original_memory - quantized_memory
|
||||||
memory_reduction_pct = (memory_saved / original_memory) * 100 if original_memory > 0 else 0
|
memory_reduction_pct = (memory_saved / original_memory) * 100 if original_memory > 0 else 0
|
||||||
|
|
||||||
print(f"📊 Quantized Model Memory Usage:")
|
print(f"📊 Quantized Model Memory Usage:")
|
||||||
print(f" Parameters: {quantized_params:,}")
|
print(f" Parameters: {quantized_params:,}")
|
||||||
print(f" VRAM Usage: {self.format_memory_size(quantized_memory)}")
|
print(f" Total Size: {self.format_memory_size(quantized_memory)}")
|
||||||
|
if quantized_gpu > 0:
|
||||||
|
print(f" GPU Memory: {self.format_memory_size(quantized_gpu)}")
|
||||||
|
else:
|
||||||
|
print(f" GPU Memory: Not loaded (will use ~{self.format_memory_size(quantized_memory)} when loaded)")
|
||||||
print(f" Memory Saved: {self.format_memory_size(memory_saved)} ({memory_reduction_pct:.1f}%)")
|
print(f" Memory Saved: {self.format_memory_size(memory_saved)} ({memory_reduction_pct:.1f}%)")
|
||||||
|
|
||||||
# Show CUDA memory info if available
|
# Show CUDA memory info if available
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
allocated = torch.cuda.memory_allocated()
|
allocated = torch.cuda.memory_allocated()
|
||||||
reserved = torch.cuda.memory_reserved()
|
reserved = torch.cuda.memory_reserved()
|
||||||
print(f"📊 Total GPU Memory:")
|
print(f"📊 Total GPU Memory Status:")
|
||||||
print(f" Allocated: {self.format_memory_size(allocated)}")
|
print(f" Currently Allocated: {self.format_memory_size(allocated)}")
|
||||||
print(f" Reserved: {self.format_memory_size(reserved)}")
|
print(f" Reserved by PyTorch: {self.format_memory_size(reserved)}")
|
||||||
|
|
||||||
return (quantized_model,)
|
return (quantized_model,)
|
||||||
|
|
||||||
@ -527,4 +540,4 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
|||||||
"UNetQuantizationPatcher": "CFZ UNet Quantization Patcher",
|
"UNetQuantizationPatcher": "CFZ UNet Quantization Patcher",
|
||||||
}
|
}
|
||||||
|
|
||||||
__all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']
|
__all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user