Revert deprecation of non-dynamic smart memory (CORE-152 (revert)) (#14183)

* mm: re-instantate smart memory for VRAM

* mm: restore non-dynamic smart memory

By popular demand. We aren't quite ready for the deprecation as non
dynamic enabled GPUs and some high-vram custom model loader setups
prefer the old full hands on.
This commit is contained in:
rattus 2026-05-31 05:20:33 +10:00 committed by GitHub
parent e154da83b1
commit f7297bc5a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -813,9 +813,9 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
for x in can_unload_sorted:
i = x[-1]
memory_to_free = 1e32
if current_loaded_models[i].model.is_dynamic() and (not DISABLE_SMART_MEMORY or device is None):
if not DISABLE_SMART_MEMORY or device is None:
memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
if for_dynamic:
if current_loaded_models[i].model.is_dynamic() and for_dynamic:
#don't actually unload dynamic models for the sake of other dynamic models
#as that works on-demand.
memory_required -= current_loaded_models[i].model.loaded_size()
@ -827,6 +827,10 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
for i in sorted(unloaded_model, reverse=True):
unloaded_models.append(current_loaded_models.pop(i))
if not for_dynamic and pins_required > 0:
ensure_pin_budget(pins_required)
ensure_pin_registerable(pins_required)
if len(unloaded_model) > 0:
soft_empty_cache()
elif device is not None:
@ -889,15 +893,19 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
model_to_unload.model_finalizer.detach()
total_memory_required = {}
total_pins_required = {}
for loaded_model in models_to_load:
device = loaded_model.device
total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device)
if not loaded_model.model.is_dynamic():
total_pins_required[device] = total_pins_required.get(device, 0) + loaded_model.model_memory()
for device in total_memory_required:
if device != torch.device("cpu"):
free_memory(total_memory_required[device] * 1.1 + extra_mem,
device,
for_dynamic=free_for_dynamic)
for_dynamic=free_for_dynamic,
pins_required=total_pins_required.get(device, 0))
for device in total_memory_required:
if device != torch.device("cpu"):