mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-20 19:42:59 +08:00
better partial unload
This commit is contained in:
parent
c312733b8c
commit
dc7c77e78c
@ -26,6 +26,14 @@ import importlib
|
|||||||
import platform
|
import platform
|
||||||
import weakref
|
import weakref
|
||||||
import gc
|
import gc
|
||||||
|
import os
|
||||||
|
|
||||||
|
def get_mmap_mem_threshold_gb():
|
||||||
|
mmap_mem_threshold_gb = int(os.environ.get("MMAP_MEM_THRESHOLD_GB", "0"))
|
||||||
|
return mmap_mem_threshold_gb
|
||||||
|
|
||||||
|
def get_free_disk():
|
||||||
|
return psutil.disk_usage("/").free
|
||||||
|
|
||||||
class VRAMState(Enum):
|
class VRAMState(Enum):
|
||||||
DISABLED = 0 #No vram present: no need to move models to vram
|
DISABLED = 0 #No vram present: no need to move models to vram
|
||||||
@ -524,9 +532,7 @@ class LoadedModel:
|
|||||||
logging.debug(f"unpatch_weights: {unpatch_weights}")
|
logging.debug(f"unpatch_weights: {unpatch_weights}")
|
||||||
logging.debug(f"loaded_size: {self.model.loaded_size()/(1024*1024*1024)} GB")
|
logging.debug(f"loaded_size: {self.model.loaded_size()/(1024*1024*1024)} GB")
|
||||||
logging.debug(f"offload_device: {self.model.offload_device}")
|
logging.debug(f"offload_device: {self.model.offload_device}")
|
||||||
available_memory = get_free_memory(self.model.offload_device)
|
|
||||||
logging.debug(f"before unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
|
|
||||||
# reserved_memory = 1024*1024*1024 # 1GB reserved memory for other usage
|
|
||||||
# if available_memory < reserved_memory:
|
# if available_memory < reserved_memory:
|
||||||
# logging.warning(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB")
|
# logging.warning(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB")
|
||||||
# return False
|
# return False
|
||||||
@ -537,30 +543,42 @@ class LoadedModel:
|
|||||||
# memory_to_free = offload_memory
|
# memory_to_free = offload_memory
|
||||||
# logging.info(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB, Offload: {offload_memory/(1024*1024*1024)} GB")
|
# logging.info(f"Not enough cpu memory to unload. Available: {available_memory/(1024*1024*1024)} GB, Reserved: {reserved_memory/(1024*1024*1024)} GB, Offload: {offload_memory/(1024*1024*1024)} GB")
|
||||||
# logging.info(f"Set memory_to_free to {memory_to_free/(1024*1024*1024)} GB")
|
# logging.info(f"Set memory_to_free to {memory_to_free/(1024*1024*1024)} GB")
|
||||||
try:
|
|
||||||
if memory_to_free is not None:
|
if memory_to_free is None:
|
||||||
if memory_to_free < self.model.loaded_size():
|
# free the full model
|
||||||
logging.debug("Do partially unload")
|
memory_to_free = self.model.loaded_size()
|
||||||
freed = self.model.partially_unload(self.model.offload_device, memory_to_free)
|
|
||||||
logging.debug(f"partially_unload freed vram: {freed/(1024*1024*1024)} GB")
|
available_memory = get_free_memory(self.model.offload_device)
|
||||||
if freed >= memory_to_free:
|
logging.debug(f"before unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
|
||||||
return False
|
|
||||||
|
mmap_mem_threshold = get_mmap_mem_threshold_gb() * 1024 * 1024 * 1024 # this is reserved memory for other system usage
|
||||||
|
if memory_to_free > available_memory - mmap_mem_threshold or memory_to_free < self.model.loaded_size():
|
||||||
|
partially_unload = True
|
||||||
|
else:
|
||||||
|
partially_unload = False
|
||||||
|
|
||||||
|
if partially_unload:
|
||||||
|
logging.debug("Do partially unload")
|
||||||
|
freed = self.model.partially_unload(self.model.offload_device, memory_to_free)
|
||||||
|
logging.debug(f"partially_unload freed vram: {freed/(1024*1024*1024)} GB")
|
||||||
|
if freed < memory_to_free:
|
||||||
|
logging.warning(f"Partially unload not enough memory, freed {freed/(1024*1024*1024)} GB, memory_to_free {memory_to_free/(1024*1024*1024)} GB")
|
||||||
|
else:
|
||||||
logging.debug("Do full unload")
|
logging.debug("Do full unload")
|
||||||
self.model.detach(unpatch_weights)
|
self.model.detach(unpatch_weights)
|
||||||
logging.debug("Do full unload done")
|
logging.debug("Do full unload done")
|
||||||
except Exception as e:
|
self.model_finalizer.detach()
|
||||||
logging.error(f"Error in model_unload: {e}")
|
self.model_finalizer = None
|
||||||
available_memory = get_free_memory(self.model.offload_device)
|
self.real_model = None
|
||||||
logging.info(f"after error, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
|
|
||||||
return False
|
available_memory = get_free_memory(self.model.offload_device)
|
||||||
finally:
|
logging.debug(f"after unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
|
||||||
available_memory = get_free_memory(self.model.offload_device)
|
|
||||||
logging.debug(f"after unload, available_memory of offload device {self.model.offload_device}: {available_memory/(1024*1024*1024)} GB")
|
if partially_unload:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
self.model_finalizer.detach()
|
|
||||||
self.model_finalizer = None
|
|
||||||
self.real_model = None
|
|
||||||
return True
|
|
||||||
|
|
||||||
def model_use_more_vram(self, extra_memory, force_patch_weights=False):
|
def model_use_more_vram(self, extra_memory, force_patch_weights=False):
|
||||||
return self.model.partially_load(self.device, extra_memory, force_patch_weights=force_patch_weights)
|
return self.model.partially_load(self.device, extra_memory, force_patch_weights=force_patch_weights)
|
||||||
|
|||||||
@ -40,11 +40,11 @@ import comfy.patcher_extension
|
|||||||
import comfy.utils
|
import comfy.utils
|
||||||
from comfy.comfy_types import UnetWrapperFunction
|
from comfy.comfy_types import UnetWrapperFunction
|
||||||
from comfy.patcher_extension import CallbacksMP, PatcherInjection, WrappersMP
|
from comfy.patcher_extension import CallbacksMP, PatcherInjection, WrappersMP
|
||||||
from comfy.model_management import get_free_memory
|
from comfy.model_management import get_free_memory, get_mmap_mem_threshold_gb, get_free_disk
|
||||||
|
|
||||||
def need_mmap() -> bool:
|
def need_mmap() -> bool:
|
||||||
free_cpu_mem = get_free_memory(torch.device("cpu"))
|
free_cpu_mem = get_free_memory(torch.device("cpu"))
|
||||||
mmap_mem_threshold_gb = int(os.environ.get("MMAP_MEM_THRESHOLD_GB", "0"))
|
mmap_mem_threshold_gb = get_mmap_mem_threshold_gb()
|
||||||
if free_cpu_mem < mmap_mem_threshold_gb * 1024 * 1024 * 1024:
|
if free_cpu_mem < mmap_mem_threshold_gb * 1024 * 1024 * 1024:
|
||||||
logging.debug(f"Enabling mmap, current free cpu memory {free_cpu_mem/(1024*1024*1024)} GB < {mmap_mem_threshold_gb} GB")
|
logging.debug(f"Enabling mmap, current free cpu memory {free_cpu_mem/(1024*1024*1024)} GB < {mmap_mem_threshold_gb} GB")
|
||||||
return True
|
return True
|
||||||
@ -972,6 +972,9 @@ class ModelPatcher:
|
|||||||
if move_weight:
|
if move_weight:
|
||||||
cast_weight = self.force_cast_weights
|
cast_weight = self.force_cast_weights
|
||||||
if need_mmap():
|
if need_mmap():
|
||||||
|
if get_free_disk() < module_mem:
|
||||||
|
logging.warning(f"Not enough disk space to offload {n} to mmap, current free disk space {get_free_disk()/(1024*1024*1024)} GB < {module_mem/(1024*1024*1024)} GB")
|
||||||
|
break
|
||||||
# offload to mmap
|
# offload to mmap
|
||||||
model_to_mmap(m)
|
model_to_mmap(m)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user