debug error

This commit is contained in:
strint 2025-10-16 16:45:08 +08:00
parent 55ac7d333c
commit 6e33ee391a

View File

@ -509,12 +509,29 @@ class LoadedModel:
return False
def model_unload(self, memory_to_free=None, unpatch_weights=True):
if memory_to_free is not None:
if memory_to_free < self.model.loaded_size():
freed = self.model.partially_unload(self.model.offload_device, memory_to_free)
if freed >= memory_to_free:
return False
self.model.detach(unpatch_weights)
logging.info(f"model_unload: {self.model.model.__class__.__name__}")
logging.info(f"memory_to_free: {memory_to_free/(1024*1024*1024)} GB")
logging.info(f"unpatch_weights: {unpatch_weights}")
logging.info(f"loaded_size: {self.model.loaded_size()/(1024*1024*1024)} GB")
logging.info(f"offload_device: {self.model.offload_device}")
available_memory = get_free_memory(self.model.offload_device)
logging.info(f"before unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
try:
if memory_to_free is not None:
if memory_to_free < self.model.loaded_size():
logging.info("Do partially unload")
freed = self.model.partially_unload(self.model.offload_device, memory_to_free)
logging.info(f"partially_unload freed: {freed/(1024*1024*1024)} GB")
if freed >= memory_to_free:
return False
logging.info("Do full unload")
self.model.detach(unpatch_weights)
logging.info("Do full unload done")
except Exception as e:
logging.error(f"Error in model_unload: {e}")
available_memory = get_free_memory(self.model.offload_device)
logging.info(f"after error, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
return False
self.model_finalizer.detach()
self.model_finalizer = None
self.real_model = None
@ -567,6 +584,7 @@ def minimum_inference_memory():
return (1024 * 1024 * 1024) * 0.8 + extra_reserved_memory()
def free_memory(memory_required, device, keep_loaded=[]):
logging.info("start to free mem")
cleanup_models_gc()
unloaded_model = []
can_unload = []
@ -587,7 +605,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
if free_mem > memory_required:
break
memory_to_free = memory_required - free_mem
logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
logging.info(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
if current_loaded_models[i].model_unload(memory_to_free):
unloaded_model.append(i)
@ -604,6 +622,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
return unloaded_models
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
logging.info(f"start to load models")
cleanup_models_gc()
global vram_state
@ -625,6 +644,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
models_to_load = []
for x in models:
logging.info(f"loading model: {x.model.__class__.__name__}")
loaded_model = LoadedModel(x)
try:
loaded_model_index = current_loaded_models.index(loaded_model)