unload partial

This commit is contained in:
strint 2025-10-20 18:27:38 +08:00
parent 21ebcada1d
commit 4ac827d564

View File

@ -516,9 +516,17 @@ class LoadedModel:
logging.info(f"offload_device: {self.model.offload_device}") logging.info(f"offload_device: {self.model.offload_device}")
available_memory = get_free_memory(self.model.offload_device) available_memory = get_free_memory(self.model.offload_device)
logging.info(f"before unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB") logging.info(f"before unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
if available_memory < memory_to_free: reserved_memory = 1024*1024*1024 # 1GB reserved memory for other usage
logging.error(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Required: {memory_to_free/(1024*1024*1024)} GB") if available_memory < reserved_memory:
logging.error(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB")
return False return False
else:
offload_memory = available_memory - reserved_memory
if offload_memory < memory_to_free:
memory_to_free = offload_memory
logging.info(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB, Offload: {offload_memory/(1024*1024*1024)} GB")
logging.info(f"Set memory_to_free to {memory_to_free/(1024*1024*1024)} GB")
try: try:
if memory_to_free is not None: if memory_to_free is not None:
if memory_to_free < self.model.loaded_size(): if memory_to_free < self.model.loaded_size():