diff --git a/README.md b/README.md index 5125bad14..dc2389266 100644 --- a/README.md +++ b/README.md @@ -433,7 +433,7 @@ See also: [https://www.comfy.org/](https://www.comfy.org/) ## Frontend Development -As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). This repository now hosts the compiled JS (from TS/Vue) under the `web/` directory. +As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). The compiled JS files (from TS/Vue) are published to [pypi](https://pypi.org/project/comfyui-frontend-package) and installed as a dependency in ComfyUI. ### Reporting Issues and Requesting Features diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 33e838f0b..9bda414d1 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -111,7 +111,7 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.") cache_group = parser.add_mutually_exclusive_group() -cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).") +cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 10%% of system RAM (min 2GB, max 10GB), inactive 100%% of system RAM (max 96GB).") cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.") cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.") cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.") diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index a68cb8439..55360535a 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -741,12 +741,12 @@ optimized_attention = attention_basic if model_management.sage_attention_enabled(): logging.info("Using sage attention") optimized_attention = attention_sage -elif model_management.xformers_enabled(): - logging.info("Using xformers attention") - optimized_attention = attention_xformers elif model_management.flash_attention_enabled(): logging.info("Using Flash Attention") optimized_attention = attention_flash +elif model_management.xformers_enabled(): + logging.info("Using xformers attention") + optimized_attention = attention_xformers elif model_management.pytorch_attention_enabled(): logging.info("Using pytorch attention") optimized_attention = attention_pytorch diff --git a/comfy/model_management.py b/comfy/model_management.py index 3bce128b2..b01c4d7fa 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1329,7 +1329,7 @@ def get_aimdo_cast_buffer(offload_stream, device): def get_pin_buffer(offload_stream): pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None) if pin_buffer is None: - pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3)) + pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3), mark_cold=False) STREAM_PIN_BUFFERS[offload_stream] = pin_buffer elif offload_stream is not None: event = getattr(pin_buffer, "_comfy_event", None) diff --git a/main.py b/main.py index fe824439c..3e206c38f 100644 --- a/main.py +++ b/main.py @@ -286,8 +286,8 @@ def prompt_worker(q, server_instance): cache_ram = 0 cache_ram_inactive = 0 if not args.cache_classic and not args.cache_none and args.cache_lru <= 0: - cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0)) - cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0)) + cache_ram = min(10.0, max(2.0, comfy.model_management.total_ram * 0.10 / 1024.0)) + cache_ram_inactive = min(96.0, comfy.model_management.total_ram / 1024.0) if len(args.cache_ram) > 0: cache_ram = args.cache_ram[0] if len(args.cache_ram) > 1: diff --git a/openapi.yaml b/openapi.yaml index 2347bd659..502e518c7 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -9585,16 +9585,9 @@ components: description: List of plan features BillingStatus: - type: object + type: string x-runtime: [cloud] - description: "[cloud-only] Overall billing and subscription status." - properties: - subscription: - $ref: "#/components/schemas/BillingSubscription" - balance: - $ref: "#/components/schemas/BillingBalance" - has_payment_method: - type: boolean + description: "[cloud-only] Overall billing/payment lifecycle status." enum: - awaiting_payment_method - pending_payment