model_management: close free_memory/cleanup_models finalizer race

Pin the patcher (model = shift_model.model; continue if None) in both free_memory
loops so a reentrant cleanup_models, popping current_loaded_models mid-iteration,
cannot leave shift_model.model None for the is_dynamic()/model_unload path. Remove
unloaded entries by identity (model is None post-unload, so LoadedModel.__eq__ is
unsafe), tolerating an entry a finalizer already popped.

Guard the real_model deref in is_dead() and cleanup_models(): real_model is the
plain None from __init__ until model_load(), so an entry observed pre-load makes
real_model() a None() call. cleanup_models() also removes by identity, mirroring
free_memory, and tolerates reentrant pops.
This commit is contained in:
liminfei-amd 2026-06-15 16:49:19 +08:00
parent 59d851dde5
commit 3e8ef6355b

View File

@ -765,7 +765,9 @@ class LoadedModel:
self._patcher_finalizer.detach()
def is_dead(self):
return self.real_model() is not None and self.model is None
# real_model is plain None until model_load(); guard so a pre-load entry can't raise.
rm = self.real_model
return rm is not None and rm() is not None and self.model is None
def use_more_memory(extra_memory, loaded_models, device):
@ -808,35 +810,38 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
for i in range(len(current_loaded_models) -1, -1, -1):
shift_model = current_loaded_models[i]
# Pin the patcher; LoadedModel.model is a weakref deref a mid-loop finalizer can None.
model = shift_model.model
if model is None:
continue
if device is None or shift_model.device == device:
if shift_model not in keep_loaded and not shift_model.is_dead():
# Carry the LoadedModel object, not its index: model_unload() can drop the last
# reference to a real_model, whose weakref.finalize fires cleanup_models()
# reentrantly and pops current_loaded_models mid-loop, leaving a cached index stale.
# i stays only as a unique sort tiebreaker (LoadedModels aren't orderable).
can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i, shift_model))
# Carry the object so a reentrant cleanup_models pop can't stale the index; i is just a sort tiebreaker.
can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(model), shift_model.model_memory(), i, shift_model))
shift_model.currently_used = False
can_unload_sorted = sorted(can_unload)
for x in can_unload_sorted:
shift_model = x[-1]
# Pin: shift_model.model still re-derefs the weakref a reentrant cleanup_models can None.
model = shift_model.model
if model is None:
continue
memory_to_free = 1e32
if not DISABLE_SMART_MEMORY or device is None:
memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
if shift_model.model.is_dynamic() and for_dynamic:
if model.is_dynamic() and for_dynamic:
#don't actually unload dynamic models for the sake of other dynamic models
#as that works on-demand.
memory_required -= shift_model.model.loaded_size()
memory_required -= model.loaded_size()
memory_to_free = 0
if memory_to_free > 0 and shift_model.model_unload(memory_to_free):
logging.debug(f"Unloading {shift_model.model.model.__class__.__name__}")
logging.debug(f"Unloading {model.model.__class__.__name__}")
unloaded_model.append(shift_model)
for shift_model in unloaded_model:
unloaded_models.append(shift_model)
# Drop this exact object by identity: LoadedModel.__eq__ compares the wrapped model
# (None after unload), so `in`/list.remove could match the wrong wrapper or miss it.
# cleanup_models() may already have popped it reentrantly, so tolerate its absence.
# Remove by identity (model is None post-unload, so __eq__ is unsafe); tolerate a reentrant pop.
for idx in range(len(current_loaded_models) - 1, -1, -1):
if current_loaded_models[idx] is shift_model:
current_loaded_models.pop(idx)
@ -999,12 +1004,17 @@ def archive_model_dtypes(model):
def cleanup_models():
to_delete = []
for i in range(len(current_loaded_models)):
if current_loaded_models[i].real_model() is None:
to_delete = [i] + to_delete
# real_model can still be the plain None from __init__ here; guard the call.
rm = current_loaded_models[i].real_model
if rm is not None and rm() is None:
to_delete.append(current_loaded_models[i])
for i in to_delete:
x = current_loaded_models.pop(i)
del x
for lm in to_delete:
# Remove by identity, mirroring free_memory; tolerate a reentrant pop.
for idx in range(len(current_loaded_models) - 1, -1, -1):
if current_loaded_models[idx] is lm:
current_loaded_models.pop(idx)
break
def dtype_size(dtype):
dtype_size = 4