diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 79ecbd682..81f29f098 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -136,8 +136,9 @@ parser.add_argument("--deterministic", action="store_true", help="Make pytorch u class PerformanceFeature(enum.Enum): Fp16Accumulation = "fp16_accumulation" Fp8MatrixMultiplication = "fp8_matrix_mult" + CublasOps = "cublas_ops" -parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult") +parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult cublas_ops") parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 5b8d8000d..6388d3faf 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1422,3 +1422,101 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt().nan_to_num(nan=0.0) old_denoised = denoised return x + +@torch.no_grad() +def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5): + ''' + SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 2 + Arxiv: https://arxiv.org/abs/2305.14267 + ''' + extra_args = {} if extra_args is None else extra_args + seed = extra_args.get("seed", None) + noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + inject_noise = eta > 0 and s_noise > 0 + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + x = denoised + else: + t, t_next = -sigmas[i].log(), -sigmas[i + 1].log() + h = t_next - t + h_eta = h * (eta + 1) + s = t + r * h + fac = 1 / (2 * r) + sigma_s = s.neg().exp() + + coeff_1, coeff_2 = (-r * h_eta).expm1(), (-h_eta).expm1() + if inject_noise: + noise_coeff_1 = (-2 * r * h * eta).expm1().neg().sqrt() + noise_coeff_2 = ((-2 * r * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt() + noise_1, noise_2 = noise_sampler(sigmas[i], sigma_s), noise_sampler(sigma_s, sigmas[i + 1]) + + # Step 1 + x_2 = (coeff_1 + 1) * x - coeff_1 * denoised + if inject_noise: + x_2 = x_2 + sigma_s * (noise_coeff_1 * noise_1) * s_noise + denoised_2 = model(x_2, sigma_s * s_in, **extra_args) + + # Step 2 + denoised_d = (1 - fac) * denoised + fac * denoised_2 + x = (coeff_2 + 1) * x - coeff_2 * denoised_d + if inject_noise: + x = x + sigmas[i + 1] * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise + return x + +@torch.no_grad() +def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3): + ''' + SEEDS-3 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 3 + Arxiv: https://arxiv.org/abs/2305.14267 + ''' + extra_args = {} if extra_args is None else extra_args + seed = extra_args.get("seed", None) + noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + inject_noise = eta > 0 and s_noise > 0 + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + x = denoised + else: + t, t_next = -sigmas[i].log(), -sigmas[i + 1].log() + h = t_next - t + h_eta = h * (eta + 1) + s_1 = t + r_1 * h + s_2 = t + r_2 * h + sigma_s_1, sigma_s_2 = s_1.neg().exp(), s_2.neg().exp() + + coeff_1, coeff_2, coeff_3 = (-r_1 * h_eta).expm1(), (-r_2 * h_eta).expm1(), (-h_eta).expm1() + if inject_noise: + noise_coeff_1 = (-2 * r_1 * h * eta).expm1().neg().sqrt() + noise_coeff_2 = ((-2 * r_1 * h * eta).expm1() - (-2 * r_2 * h * eta).expm1()).sqrt() + noise_coeff_3 = ((-2 * r_2 * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt() + noise_1, noise_2, noise_3 = noise_sampler(sigmas[i], sigma_s_1), noise_sampler(sigma_s_1, sigma_s_2), noise_sampler(sigma_s_2, sigmas[i + 1]) + + # Step 1 + x_2 = (coeff_1 + 1) * x - coeff_1 * denoised + if inject_noise: + x_2 = x_2 + sigma_s_1 * (noise_coeff_1 * noise_1) * s_noise + denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args) + + # Step 2 + x_3 = (coeff_2 + 1) * x - coeff_2 * denoised + (r_2 / r_1) * (coeff_2 / (r_2 * h_eta) + 1) * (denoised_2 - denoised) + if inject_noise: + x_3 = x_3 + sigma_s_2 * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise + denoised_3 = model(x_3, sigma_s_2 * s_in, **extra_args) + + # Step 3 + x = (coeff_3 + 1) * x - coeff_3 * denoised + (1. / r_2) * (coeff_3 / h_eta + 1) * (denoised_3 - denoised) + if inject_noise: + x = x + sigmas[i + 1] * (noise_coeff_3 * noise_1 + noise_coeff_2 * noise_2 + noise_coeff_1 * noise_3) * s_noise + return x diff --git a/comfy/ops.py b/comfy/ops.py index ced461011..9a5c1ee99 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -357,6 +357,25 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None return scaled_fp8_op +CUBLAS_IS_AVAILABLE = False +try: + from cublas_ops import CublasLinear + CUBLAS_IS_AVAILABLE = True +except ImportError: + pass + +if CUBLAS_IS_AVAILABLE: + class cublas_ops(disable_weight_init): + class Linear(CublasLinear, disable_weight_init.Linear): + def reset_parameters(self): + return None + + def forward_comfy_cast_weights(self, input): + return super().forward(input) + + def forward(self, *args, **kwargs): + return super().forward(*args, **kwargs) + def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None): fp8_compute = comfy.model_management.supports_fp8_compute(load_device) if scaled_fp8 is not None: @@ -369,6 +388,15 @@ def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_ ): return fp8_ops + if ( + PerformanceFeature.CublasOps in args.fast and + CUBLAS_IS_AVAILABLE and + weight_dtype == torch.float16 and + (compute_dtype == torch.float16 or compute_dtype is None) + ): + logging.info("Using cublas ops") + return cublas_ops + if compute_dtype is None or weight_dtype == compute_dtype: return disable_weight_init diff --git a/comfy/samplers.py b/comfy/samplers.py index 10728bd1f..27dfce45a 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -710,7 +710,7 @@ KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_c "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu", "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp", - "gradient_estimation", "er_sde"] + "gradient_estimation", "er_sde", "seeds_2", "seeds_3"] class KSAMPLER(Sampler): def __init__(self, sampler_function, extra_options={}, inpaint_options={}): diff --git a/folder_paths.py b/folder_paths.py index 72c70f594..9a525e5a1 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -85,6 +85,7 @@ cache_helper = CacheHelper() extension_mimetypes_cache = { "webp" : "image", + "fbx" : "model", } def map_legacy(folder_name: str) -> str: @@ -140,11 +141,14 @@ def get_directory_by_type(type_name: str) -> str | None: return get_input_directory() return None -def filter_files_content_types(files: list[str], content_types: Literal["image", "video", "audio"]) -> list[str]: +def filter_files_content_types(files: list[str], content_types: Literal["image", "video", "audio", "model"]) -> list[str]: """ Example: files = os.listdir(folder_paths.get_input_directory()) - filter_files_content_types(files, ["image", "audio", "video"]) + videos = filter_files_content_types(files, ["video"]) + + Note: + - 'model' in MIME context refers to 3D models, not files containing trained weights and parameters """ global extension_mimetypes_cache result = [] diff --git a/main.py b/main.py index 4780a9c69..ac9d24b7b 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,7 @@ from app.logger import setup_logger import itertools import utils.extra_config import logging +import sys if __name__ == "__main__": #NOTE: These do not do anything on core ComfyUI which should already have no communication with the internet, they are for custom nodes. @@ -301,6 +302,7 @@ def start_comfyui(asyncio_loop=None): if __name__ == "__main__": # Running directly, just start ComfyUI. + logging.info("Python version: {}".format(sys.version)) logging.info("ComfyUI version: {}".format(comfyui_version.__version__)) event_loop, _, start_all_func = start_comfyui() diff --git a/nodes.py b/nodes.py index 8c1720c1a..e2893e83a 100644 --- a/nodes.py +++ b/nodes.py @@ -1654,6 +1654,7 @@ class LoadImage: def INPUT_TYPES(s): input_dir = folder_paths.get_input_directory() files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))] + files = folder_paths.filter_files_content_types(files, ["image"]) return {"required": {"image": (sorted(files), {"image_upload": True})}, } diff --git a/tests-unit/folder_paths_test/filter_by_content_types_test.py b/tests-unit/folder_paths_test/filter_by_content_types_test.py index 423677a60..683f9fc11 100644 --- a/tests-unit/folder_paths_test/filter_by_content_types_test.py +++ b/tests-unit/folder_paths_test/filter_by_content_types_test.py @@ -1,14 +1,17 @@ import pytest import os import tempfile -from folder_paths import filter_files_content_types +from folder_paths import filter_files_content_types, extension_mimetypes_cache +from unittest.mock import patch + @pytest.fixture(scope="module") def file_extensions(): return { 'image': ['gif', 'heif', 'ico', 'jpeg', 'jpg', 'png', 'pnm', 'ppm', 'svg', 'tiff', 'webp', 'xbm', 'xpm'], 'audio': ['aif', 'aifc', 'aiff', 'au', 'flac', 'm4a', 'mp2', 'mp3', 'ogg', 'snd', 'wav'], - 'video': ['avi', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ogv', 'qt', 'webm', 'wmv'] + 'video': ['avi', 'm2v', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ogv', 'qt', 'webm', 'wmv'], + 'model': ['gltf', 'glb', 'obj', 'fbx', 'stl'] } @@ -22,7 +25,18 @@ def mock_dir(file_extensions): yield directory -def test_categorizes_all_correctly(mock_dir, file_extensions): +@pytest.fixture +def patched_mimetype_cache(file_extensions): + # Mock model file extensions since they may not be in the test-runner system's mimetype cache + new_cache = extension_mimetypes_cache.copy() + for extension in file_extensions["model"]: + new_cache[extension] = "model" + + with patch("folder_paths.extension_mimetypes_cache", new_cache): + yield + + +def test_categorizes_all_correctly(mock_dir, file_extensions, patched_mimetype_cache): files = os.listdir(mock_dir) for content_type, extensions in file_extensions.items(): filtered_files = filter_files_content_types(files, [content_type]) @@ -30,7 +44,7 @@ def test_categorizes_all_correctly(mock_dir, file_extensions): assert f"sample_{content_type}.{extension}" in filtered_files -def test_categorizes_all_uniquely(mock_dir, file_extensions): +def test_categorizes_all_uniquely(mock_dir, file_extensions, patched_mimetype_cache): files = os.listdir(mock_dir) for content_type, extensions in file_extensions.items(): filtered_files = filter_files_content_types(files, [content_type])