mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-23 16:29:25 +08:00
model_management: fix free_memory index race with reentrant cleanup_models (#14365)
free_memory() caches absolute indices into the unlocked current_loaded_models list and dereferences them later in the same function. model_unload() can drop the last reference to a real_model, whose weakref.finalize(real_model, cleanup_models) then runs cleanup_models() reentrantly on the SAME thread and pops dead entries from current_loaded_models mid-loop. The cached index goes stale, so the next iteration raises IndexError (or, when the index now points at a collected wrapper, AttributeError: 'NoneType' has no attribute 'is_dynamic') at the current_loaded_models[i] dereference. This is reproducible single-threaded on current master with no background threads: it is reentrant GC reclamation, not concurrency. Fix, scoped to free_memory only: carry the LoadedModel object through can_unload instead of its index (i stays as a sort tiebreaker), operate on that object, and remove unloaded entries by identity after the loop, tolerating any a reentrant cleanup_models() already popped. cleanup_models() and the LoadedModel accessors are left unchanged. Signed-off-by: liminfei-amd <91481003+liminfei-amd@users.noreply.github.com>
This commit is contained in:
parent
431a1888d3
commit
59d851dde5
@ -810,26 +810,37 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
|
|||||||
shift_model = current_loaded_models[i]
|
shift_model = current_loaded_models[i]
|
||||||
if device is None or shift_model.device == device:
|
if device is None or shift_model.device == device:
|
||||||
if shift_model not in keep_loaded and not shift_model.is_dead():
|
if shift_model not in keep_loaded and not shift_model.is_dead():
|
||||||
can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
|
# Carry the LoadedModel object, not its index: model_unload() can drop the last
|
||||||
|
# reference to a real_model, whose weakref.finalize fires cleanup_models()
|
||||||
|
# reentrantly and pops current_loaded_models mid-loop, leaving a cached index stale.
|
||||||
|
# i stays only as a unique sort tiebreaker (LoadedModels aren't orderable).
|
||||||
|
can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i, shift_model))
|
||||||
shift_model.currently_used = False
|
shift_model.currently_used = False
|
||||||
|
|
||||||
can_unload_sorted = sorted(can_unload)
|
can_unload_sorted = sorted(can_unload)
|
||||||
for x in can_unload_sorted:
|
for x in can_unload_sorted:
|
||||||
i = x[-1]
|
shift_model = x[-1]
|
||||||
memory_to_free = 1e32
|
memory_to_free = 1e32
|
||||||
if not DISABLE_SMART_MEMORY or device is None:
|
if not DISABLE_SMART_MEMORY or device is None:
|
||||||
memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
|
memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
|
||||||
if current_loaded_models[i].model.is_dynamic() and for_dynamic:
|
if shift_model.model.is_dynamic() and for_dynamic:
|
||||||
#don't actually unload dynamic models for the sake of other dynamic models
|
#don't actually unload dynamic models for the sake of other dynamic models
|
||||||
#as that works on-demand.
|
#as that works on-demand.
|
||||||
memory_required -= current_loaded_models[i].model.loaded_size()
|
memory_required -= shift_model.model.loaded_size()
|
||||||
memory_to_free = 0
|
memory_to_free = 0
|
||||||
if memory_to_free > 0 and current_loaded_models[i].model_unload(memory_to_free):
|
if memory_to_free > 0 and shift_model.model_unload(memory_to_free):
|
||||||
logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
|
logging.debug(f"Unloading {shift_model.model.model.__class__.__name__}")
|
||||||
unloaded_model.append(i)
|
unloaded_model.append(shift_model)
|
||||||
|
|
||||||
for i in sorted(unloaded_model, reverse=True):
|
for shift_model in unloaded_model:
|
||||||
unloaded_models.append(current_loaded_models.pop(i))
|
unloaded_models.append(shift_model)
|
||||||
|
# Drop this exact object by identity: LoadedModel.__eq__ compares the wrapped model
|
||||||
|
# (None after unload), so `in`/list.remove could match the wrong wrapper or miss it.
|
||||||
|
# cleanup_models() may already have popped it reentrantly, so tolerate its absence.
|
||||||
|
for idx in range(len(current_loaded_models) - 1, -1, -1):
|
||||||
|
if current_loaded_models[idx] is shift_model:
|
||||||
|
current_loaded_models.pop(idx)
|
||||||
|
break
|
||||||
|
|
||||||
if not for_dynamic and pins_required > 0:
|
if not for_dynamic and pins_required > 0:
|
||||||
ensure_pin_budget(pins_required)
|
ensure_pin_budget(pins_required)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user