Fix device mismatch: update LoadedModel.device when _switch_parent swaps to parent patcher
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled

When a multigpu clone ModelPatcher is garbage collected, LoadedModel._switch_parent
switches the weakref to point at the parent (main) ModelPatcher. However, it was not
updating LoadedModel.device, leaving it with the old clone's device (e.g., cuda:1).
On subsequent runs, this stale device was passed to ModelPatcherDynamic.load(), causing
an assertion failure (device_to != self.load_device).

Amp-Thread-ID: https://ampcode.com/threads/T-019d3f5c-28c5-72c9-abed-34681f1b54ba
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Jedrzej Kosinski 2026-03-30 08:56:33 -07:00
parent 20803749c3
commit b418fb1582
2 changed files with 1 additions and 7 deletions

View File

@ -577,6 +577,7 @@ class LoadedModel:
model = self._parent_model()
if model is not None:
self._set_model(model)
self.device = model.load_device
@property
def model(self):
@ -639,8 +640,6 @@ class LoadedModel:
return True
def model_use_more_vram(self, extra_memory, force_patch_weights=False):
if self.device != self.model.load_device:
logging.error(f"LoadedModel device mismatch: self.device={self.device}, model.load_device={self.model.load_device}, model_class={self.model.model.__class__.__name__}, is_multigpu={getattr(self.model, 'is_multigpu_base_clone', False)}, id(model)={id(self.model)}")
return self.model.partially_load(self.device, extra_memory, force_patch_weights=force_patch_weights)
def __eq__(self, other):
@ -780,19 +779,16 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
if loaded_model_index is not None:
loaded = current_loaded_models[loaded_model_index]
loaded.currently_used = True
logging.info(f"[MULTIGPU_DBG] Reusing LoadedModel for {x.model.__class__.__name__}: LoadedModel.device={loaded.device}, model.load_device={loaded.model.load_device}, is_multigpu={getattr(loaded.model, 'is_multigpu_base_clone', False)}, id(patcher)={id(loaded.model)}, id(inner)={id(loaded.model.model)}")
models_to_load.append(loaded)
else:
if hasattr(x, "model"):
logging.info(f"Requested to load {x.model.__class__.__name__}")
logging.info(f"[MULTIGPU_DBG] New LoadedModel for {x.model.__class__.__name__}: LoadedModel.device={loaded_model.device}, model.load_device={x.load_device}, is_multigpu={getattr(x, 'is_multigpu_base_clone', False)}, id(patcher)={id(x)}, id(inner)={id(x.model)}")
models_to_load.append(loaded_model)
for loaded_model in models_to_load:
to_unload = []
for i in range(len(current_loaded_models)):
if loaded_model.model.is_clone(current_loaded_models[i].model):
logging.info(f"[MULTIGPU_DBG] is_clone match: unloading idx={i}, LoadedModel.device={current_loaded_models[i].device}, model.load_device={current_loaded_models[i].model.load_device}, id(inner)={id(current_loaded_models[i].model.model)}")
to_unload = [i] + to_unload
for i in to_unload:
model_to_unload = current_loaded_models.pop(i)

View File

@ -1646,8 +1646,6 @@ class ModelPatcherDynamic(ModelPatcher):
#now.
assert not full_load
if device_to != self.load_device:
logging.error(f"ModelPatcherDynamic.load device mismatch: device_to={device_to}, self.load_device={self.load_device}, model_class={self.model.__class__.__name__}, is_multigpu_base_clone={getattr(self, 'is_multigpu_base_clone', False)}, id(self)={id(self)}")
assert device_to == self.load_device
num_patches = 0