From 673c63572d7cf171b44f5658d24eb31de44e6d05 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Fri, 13 Jun 2025 15:12:52 +0300 Subject: [PATCH 1/3] fix(memory, docker): support for reading cgroup data Signed-off-by: bigcat88 --- comfy/model_management.py | 44 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 054291432..067b61b62 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -25,6 +25,9 @@ import sys import platform import weakref import gc +from pathlib import Path +from functools import lru_cache + class VRAMState(Enum): DISABLED = 0 #No vram present: no need to move models to vram @@ -177,7 +180,7 @@ def get_total_memory(dev=None, torch_total_too=False): dev = get_torch_device() if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): - mem_total = psutil.virtual_memory().total + mem_total = _cgroup_limit_bytes() or psutil.virtual_memory().total mem_total_torch = mem_total else: if directml_enabled: @@ -218,6 +221,35 @@ def mac_version(): except: return None + +_CG = Path("/sys/fs/cgroup") +if (_CG / "memory.max").exists(): # cgroup v2 + _LIMIT_F = _CG / "memory.max" + _USED_F = _CG / "memory.current" +else: # cgroup v1 + _LIMIT_F = _CG / "memory/memory.limit_in_bytes" + _USED_F = _CG / "memory/memory.usage_in_bytes" + + +@lru_cache(maxsize=None) # the hard limit never changes +def _cgroup_limit_bytes(): + return _read_int(_LIMIT_F) + + +def _cgroup_used_bytes(): + return _read_int(_USED_F) + + +def _read_int(p: Path): + try: + v = int(p.read_text().strip()) + if v == 0 or v >= (1 << 60): + return None # 'max' in v2 shows up as 2**63-1 or 0, treat both as unlimited + return v + except (FileNotFoundError, PermissionError, ValueError): + return None + + total_vram = get_total_memory(get_torch_device()) / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram)) @@ -1081,7 +1113,15 @@ def get_free_memory(dev=None, torch_free_too=False): if dev is None: dev = get_torch_device() - if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): + if hasattr(dev, 'type') and dev.type == 'cpu': + limit = _cgroup_limit_bytes() + used = _cgroup_used_bytes() if limit is not None else None + if limit is not None and used is not None: + mem_free_total = max(limit - used, 0) + else: + mem_free_total = psutil.virtual_memory().available + mem_free_torch = mem_free_total + elif hasattr(dev, 'type') and dev.type == 'mps': mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total else: From 290cfd16a2f259c5ddc2c4f0663bc49e063dab18 Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Sun, 15 Jun 2025 08:07:43 +0300 Subject: [PATCH 2/3] removed use of the lru_cache Signed-off-by: bigcat88 --- comfy/model_management.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 067b61b62..edea5aede 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -26,7 +26,6 @@ import platform import weakref import gc from pathlib import Path -from functools import lru_cache class VRAMState(Enum): @@ -180,7 +179,7 @@ def get_total_memory(dev=None, torch_total_too=False): dev = get_torch_device() if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): - mem_total = _cgroup_limit_bytes() or psutil.virtual_memory().total + mem_total = _CGROUP_LIMIT_BYTES or psutil.virtual_memory().total mem_total_torch = mem_total else: if directml_enabled: @@ -230,17 +229,7 @@ else: # cgroup v1 _LIMIT_F = _CG / "memory/memory.limit_in_bytes" _USED_F = _CG / "memory/memory.usage_in_bytes" - -@lru_cache(maxsize=None) # the hard limit never changes -def _cgroup_limit_bytes(): - return _read_int(_LIMIT_F) - - -def _cgroup_used_bytes(): - return _read_int(_USED_F) - - -def _read_int(p: Path): +def _get_cgroup_value(p: Path): try: v = int(p.read_text().strip()) if v == 0 or v >= (1 << 60): @@ -249,6 +238,7 @@ def _read_int(p: Path): except (FileNotFoundError, PermissionError, ValueError): return None +_CGROUP_LIMIT_BYTES = _get_cgroup_value(_LIMIT_F) total_vram = get_total_memory(get_torch_device()) / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) @@ -1114,10 +1104,9 @@ def get_free_memory(dev=None, torch_free_too=False): dev = get_torch_device() if hasattr(dev, 'type') and dev.type == 'cpu': - limit = _cgroup_limit_bytes() - used = _cgroup_used_bytes() if limit is not None else None - if limit is not None and used is not None: - mem_free_total = max(limit - used, 0) + used = _get_cgroup_value(_USED_F) if _CGROUP_LIMIT_BYTES is not None else None + if _CGROUP_LIMIT_BYTES is not None and used is not None: + mem_free_total = max(_CGROUP_LIMIT_BYTES - used, 0) else: mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total From 7eebd59c692cc423bcede0f56ee2b81c1248c5c6 Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Sun, 15 Jun 2025 08:30:00 +0300 Subject: [PATCH 3/3] use cgroup in "total_ram" report Signed-off-by: bigcat88 --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index edea5aede..3100b72a0 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -241,7 +241,7 @@ def _get_cgroup_value(p: Path): _CGROUP_LIMIT_BYTES = _get_cgroup_value(_LIMIT_F) total_vram = get_total_memory(get_torch_device()) / (1024 * 1024) -total_ram = psutil.virtual_memory().total / (1024 * 1024) +total_ram = (_CGROUP_LIMIT_BYTES or psutil.virtual_memory().total) / (1024 * 1024) logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram)) try: