From 3ad3248ad772945ed5219d54f5ea59d9dc4376d9 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 19 Dec 2024 16:04:56 -0500 Subject: [PATCH 1/5] Fix lowvram bug when using a model multiple times in a row. The memory system would load an extra 64MB each time until either the model was completely in memory or OOM. --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index f6ca252e3..d5d77d7d7 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -505,7 +505,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM) and not force_full_load: model_size = loaded_model.model_memory_required(torch_dev) current_free_mem = get_free_memory(torch_dev) - lowvram_model_memory = max(64 * (1024 * 1024), (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) + lowvram_model_memory = max(1, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) if model_size <= lowvram_model_memory: #only switch to lowvram if really necessary lowvram_model_memory = 0 From 2dda7c11a345a6b52c4ed43dbcecb1320bc539c0 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 19 Dec 2024 16:21:56 -0500 Subject: [PATCH 2/5] More proper fix for the memory issue. --- comfy/model_management.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index d5d77d7d7..6f667dfc5 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -314,6 +314,9 @@ class LoadedModel: def model_memory(self): return self.model.model_size() + def model_loaded_memory(self): + return self.model.loaded_size() + def model_offloaded_memory(self): return self.model.model_size() - self.model.loaded_size() @@ -504,8 +507,10 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu lowvram_model_memory = 0 if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM) and not force_full_load: model_size = loaded_model.model_memory_required(torch_dev) - current_free_mem = get_free_memory(torch_dev) - lowvram_model_memory = max(1, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) + loaded_memory = loaded_model.model_loaded_memory() + current_free_mem = get_free_memory(torch_dev) + loaded_memory + lowvram_model_memory = max(64 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory())) + lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory) if model_size <= lowvram_model_memory: #only switch to lowvram if really necessary lowvram_model_memory = 0 From 3cacd3fca54dc8928ae27dcff6c89f1d40c34038 Mon Sep 17 00:00:00 2001 From: catboxanon <122327233+catboxanon@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:01:56 -0500 Subject: [PATCH 3/5] Support preview images embedded in safetensors metadata (#6119) * Support preview images embedded in safetensors metadata * Add unit test for safetensors embedded image previews --- app/model_manager.py | 29 ++++++++--- tests-unit/app_test/model_manager_test.py | 62 +++++++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 tests-unit/app_test/model_manager_test.py diff --git a/app/model_manager.py b/app/model_manager.py index 475970d14..650bfa768 100644 --- a/app/model_manager.py +++ b/app/model_manager.py @@ -1,10 +1,13 @@ from __future__ import annotations import os +import base64 +import json import time import logging import folder_paths import glob +import comfy.utils from aiohttp import web from PIL import Image from io import BytesIO @@ -59,13 +62,13 @@ class ModelFileManager: folder = folders[0][path_index] full_filename = os.path.join(folder, filename) - preview_files = self.get_model_previews(full_filename) - default_preview_file = preview_files[0] if len(preview_files) > 0 else None - if default_preview_file is None or not os.path.isfile(default_preview_file): + previews = self.get_model_previews(full_filename) + default_preview = previews[0] if len(previews) > 0 else None + if default_preview is None or (isinstance(default_preview, str) and not os.path.isfile(default_preview)): return web.Response(status=404) try: - with Image.open(default_preview_file) as img: + with Image.open(default_preview) as img: img_bytes = BytesIO() img.save(img_bytes, format="WEBP") img_bytes.seek(0) @@ -143,7 +146,7 @@ class ModelFileManager: return [{"name": f, "pathIndex": pathIndex} for f in result], dirs, time.perf_counter() - def get_model_previews(self, filepath: str) -> list[str]: + def get_model_previews(self, filepath: str) -> list[str | BytesIO]: dirname = os.path.dirname(filepath) if not os.path.exists(dirname): @@ -152,8 +155,10 @@ class ModelFileManager: basename = os.path.splitext(filepath)[0] match_files = glob.glob(f"{basename}.*", recursive=False) image_files = filter_files_content_types(match_files, "image") + safetensors_file = next(filter(lambda x: x.endswith(".safetensors"), match_files), None) + safetensors_metadata = {} - result: list[str] = [] + result: list[str | BytesIO] = [] for filename in image_files: _basename = os.path.splitext(filename)[0] @@ -161,6 +166,18 @@ class ModelFileManager: result.append(filename) if _basename == f"{basename}.preview": result.append(filename) + + if safetensors_file: + safetensors_filepath = os.path.join(dirname, safetensors_file) + header = comfy.utils.safetensors_header(safetensors_filepath, max_size=8*1024*1024) + if header: + safetensors_metadata = json.loads(header) + safetensors_images = safetensors_metadata.get("__metadata__", {}).get("ssmd_cover_images", None) + if safetensors_images: + safetensors_images = json.loads(safetensors_images) + for image in safetensors_images: + result.append(BytesIO(base64.b64decode(image))) + return result def __exit__(self, exc_type, exc_value, traceback): diff --git a/tests-unit/app_test/model_manager_test.py b/tests-unit/app_test/model_manager_test.py new file mode 100644 index 000000000..ae59206f6 --- /dev/null +++ b/tests-unit/app_test/model_manager_test.py @@ -0,0 +1,62 @@ +import pytest +import base64 +import json +import struct +from io import BytesIO +from PIL import Image +from aiohttp import web +from unittest.mock import patch +from app.model_manager import ModelFileManager + +pytestmark = ( + pytest.mark.asyncio +) # This applies the asyncio mark to all test functions in the module + +@pytest.fixture +def model_manager(): + return ModelFileManager() + +@pytest.fixture +def app(model_manager): + app = web.Application() + routes = web.RouteTableDef() + model_manager.add_routes(routes) + app.add_routes(routes) + return app + +async def test_get_model_preview_safetensors(aiohttp_client, app, tmp_path): + img = Image.new('RGB', (100, 100), 'white') + img_byte_arr = BytesIO() + img.save(img_byte_arr, format='PNG') + img_byte_arr.seek(0) + img_b64 = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8') + + safetensors_file = tmp_path / "test_model.safetensors" + header_bytes = json.dumps({ + "__metadata__": { + "ssmd_cover_images": json.dumps([img_b64]) + } + }).encode('utf-8') + length_bytes = struct.pack(' Date: Thu, 19 Dec 2024 22:52:37 -0500 Subject: [PATCH 4/5] Fix tiled hunyuan video VAE encode issue. Some shapes like 1024x1024 with tile_size 256 and overlap 64 had issues. --- comfy/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/utils.py b/comfy/utils.py index ab1e3cd5a..3ddbfd90c 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -807,7 +807,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am upscaled = [] for d in range(dims): - pos = max(0, min(s.shape[d + 2] - (overlap[d] + 1), it[d])) + pos = max(0, min(s.shape[d + 2] - overlap[d], it[d])) l = min(tile[d], s.shape[d + 2] - pos) s_in = s_in.narrow(d + 2, pos, l) upscaled.append(round(get_scale(d, pos))) From cac68ca8139270e9f7a8b316419443a1e1078e45 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 19 Dec 2024 23:14:03 -0500 Subject: [PATCH 5/5] Fix some more video tiled encode issues. The downscale_ratio formula for the temporal had issues with some frame numbers. --- comfy/sd.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index 85393ef0d..b5cf296c8 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -12,6 +12,7 @@ from .ldm.audio.autoencoder import AudioOobleckVAE import comfy.ldm.genmo.vae.model import comfy.ldm.lightricks.vae.causal_video_autoencoder import yaml +import math import comfy.utils @@ -336,7 +337,7 @@ class VAE: self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype) self.memory_used_encode = lambda shape, dtype: (1.5 * max(shape[2], 7) * shape[3] * shape[4] * (6 * 8 * 8)) * model_management.dtype_size(dtype) self.upscale_ratio = (lambda a: max(0, a * 6 - 5), 8, 8) - self.downscale_ratio = (lambda a: max(0, (a + 3) / 6), 8, 8) + self.downscale_ratio = (lambda a: max(0, math.floor((a + 5) / 6)), 8, 8) self.working_dtypes = [torch.float16, torch.float32] elif "decoder.up_blocks.0.res_blocks.0.conv1.conv.weight" in sd: #lightricks ltxv self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE() @@ -345,14 +346,14 @@ class VAE: self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype) self.memory_used_encode = lambda shape, dtype: (70 * max(shape[2], 7) * shape[3] * shape[4]) * model_management.dtype_size(dtype) self.upscale_ratio = (lambda a: max(0, a * 8 - 7), 32, 32) - self.downscale_ratio = (lambda a: max(0, (a + 4) / 8), 32, 32) + self.downscale_ratio = (lambda a: max(0, math.floor((a + 7) / 8)), 32, 32) self.working_dtypes = [torch.bfloat16, torch.float32] elif "decoder.conv_in.conv.weight" in sd: ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} ddconfig["conv3d"] = True ddconfig["time_compress"] = 4 self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8) - self.downscale_ratio = (lambda a: max(0, (a + 2) / 4), 8, 8) + self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8) self.latent_dim = 3 self.latent_channels = ddconfig['z_channels'] = sd["decoder.conv_in.conv.weight"].shape[1] self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])