mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-11 20:27:44 +08:00
revert threaded model loader change
This change was only needed to get around the pytorch 2.7 mempool bugs, and should have been reverted along with #12260. This fixes a different memory leak where pytorch gets confused about cache emptying.
This commit is contained in:
parent
a0302cc6a8
commit
6198f7562e
@ -19,7 +19,7 @@
|
|||||||
import psutil
|
import psutil
|
||||||
import logging
|
import logging
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
|
from comfy.cli_args import args, PerformanceFeature
|
||||||
import threading
|
import threading
|
||||||
import torch
|
import torch
|
||||||
import sys
|
import sys
|
||||||
@ -651,7 +651,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
|
|||||||
soft_empty_cache()
|
soft_empty_cache()
|
||||||
return unloaded_models
|
return unloaded_models
|
||||||
|
|
||||||
def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
||||||
cleanup_models_gc()
|
cleanup_models_gc()
|
||||||
global vram_state
|
global vram_state
|
||||||
|
|
||||||
@ -747,26 +747,6 @@ def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, m
|
|||||||
current_loaded_models.insert(0, loaded_model)
|
current_loaded_models.insert(0, loaded_model)
|
||||||
return
|
return
|
||||||
|
|
||||||
def load_models_gpu_thread(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load):
|
|
||||||
with torch.inference_mode():
|
|
||||||
load_models_gpu_orig(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
|
|
||||||
soft_empty_cache()
|
|
||||||
|
|
||||||
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
|
||||||
#Deliberately load models outside of the Aimdo mempool so they can be retained accross
|
|
||||||
#nodes. Use a dummy thread to do it as pytorch documents that mempool contexts are
|
|
||||||
#thread local. So exploit that to escape context
|
|
||||||
if enables_dynamic_vram():
|
|
||||||
t = threading.Thread(
|
|
||||||
target=load_models_gpu_thread,
|
|
||||||
args=(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
|
|
||||||
)
|
|
||||||
t.start()
|
|
||||||
t.join()
|
|
||||||
else:
|
|
||||||
load_models_gpu_orig(models, memory_required=memory_required, force_patch_weights=force_patch_weights,
|
|
||||||
minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
|
|
||||||
|
|
||||||
def load_model_gpu(model):
|
def load_model_gpu(model):
|
||||||
return load_models_gpu([model])
|
return load_models_gpu([model])
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user