From 0af123022de374a091d7bf6ca6ad767fa6dcc69d Mon Sep 17 00:00:00 2001
From: Comfy Org PR Bot <snomiao+comfy-pr@gmail.com>
Date: Sun, 24 May 2026 09:27:52 +0900
Subject: [PATCH 1/6] Bump comfyui-frontend-package to 1.44.19 (#14074)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e20b6e044..b70c21e1e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.43.18
+comfyui-frontend-package==1.44.19
 comfyui-workflow-templates==0.9.82
 comfyui-embedded-docs==0.5.0
 torch

From 08d809d128df9c6b6800dbb4198cf11cabc5422e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 23 May 2026 17:44:28 -0700
Subject: [PATCH 2/6] Fix --use-flash-attention ignored when xformers
 installed. (#14083)

---
 comfy/ldm/modules/attention.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index a68cb8439..55360535a 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -741,12 +741,12 @@ optimized_attention = attention_basic
 if model_management.sage_attention_enabled():
     logging.info("Using sage attention")
     optimized_attention = attention_sage
-elif model_management.xformers_enabled():
-    logging.info("Using xformers attention")
-    optimized_attention = attention_xformers
 elif model_management.flash_attention_enabled():
     logging.info("Using Flash Attention")
     optimized_attention = attention_flash
+elif model_management.xformers_enabled():
+    logging.info("Using xformers attention")
+    optimized_attention = attention_xformers
 elif model_management.pytorch_attention_enabled():
     logging.info("Using pytorch attention")
     optimized_attention = attention_pytorch

From 32a7092c52d2cee053fded50a6e12c7e275b195e Mon Sep 17 00:00:00 2001
From: Robin Huang <robin.j.huang@gmail.com>
Date: Sat, 23 May 2026 19:48:31 -0700
Subject: [PATCH 3/6] fix: correct description of where compiled FE files live
 (#14013)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5125bad14..dc2389266 100644
--- a/README.md
+++ b/README.md
@@ -433,7 +433,7 @@ See also: [https://www.comfy.org/](https://www.comfy.org/)
 
 ## Frontend Development
 
-As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). This repository now hosts the compiled JS (from TS/Vue) under the `web/` directory.
+As of August 15, 2024, we have transitioned to a new frontend, which is now hosted in a separate repository: [ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend). The compiled JS files (from TS/Vue) are published to [pypi](https://pypi.org/project/comfyui-frontend-package) and installed as a dependency in ComfyUI.
 
 ### Reporting Issues and Requesting Features
 

From ea62dc11c9a10dae52186fdcc3da033eb46018a1 Mon Sep 17 00:00:00 2001
From: Matt Miller <mattmiller@comfy.org>
Date: Sat, 23 May 2026 19:58:35 -0700
Subject: [PATCH 4/6] openapi: fix invalid BillingStatus schema (object + enum
 hybrid) (#14071)

---
 openapi.yaml | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index 2347bd659..502e518c7 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -9585,16 +9585,9 @@ components:
           description: List of plan features
 
     BillingStatus:
-      type: object
+      type: string
       x-runtime: [cloud]
-      description: "[cloud-only] Overall billing and subscription status."
-      properties:
-        subscription:
-          $ref: "#/components/schemas/BillingSubscription"
-        balance:
-          $ref: "#/components/schemas/BillingBalance"
-        has_payment_method:
-          type: boolean
+      description: "[cloud-only] Overall billing/payment lifecycle status."
       enum:
         - awaiting_payment_method
         - pending_payment

From 39f963b4b02522b0103fe7ca53fa8d1a0d17ceae Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Mon, 25 May 2026 08:25:59 +1000
Subject: [PATCH 5/6] mark loads to pins as cold immediately (#14088)

This does the posix_fadvise to kick pins out of the disk cache (to
avoid a double copy in RAM).
---
 comfy/model_management.py | 2 +-
 requirements.txt          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3894dfa9c..cd8772d3a 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1217,7 +1217,7 @@ def get_aimdo_cast_buffer(offload_stream, device):
 def get_pin_buffer(offload_stream):
     pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
     if pin_buffer is None:
-        pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3))
+        pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3), mark_cold=False)
         STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
     elif offload_stream is not None:
         event = getattr(pin_buffer, "_comfy_event", None)
diff --git a/requirements.txt b/requirements.txt
index b70c21e1e..a22fa50ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.4.3
+comfy-aimdo==0.4.5
 requests
 simpleeval>=1.0.0
 blake3

From b30e980a206607d1a9d56b7a6f7df3999d68438a Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Mon, 25 May 2026 08:26:50 +1000
Subject: [PATCH 6/6] cache-ram: lower thresholds (#14089)

Use the RAM right up to the wire as the community is bit accustomed too.

This trades off headroom for the case where large chunky intermediates
arrive and potenitally hits pagefile/swap, but a lot of people have
"it just fits" workflows out there, so strike a compromise with
75->90%.

Disable the incative cache for all but the very high RAM users.
---
 comfy/cli_args.py | 2 +-
 main.py           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 9d88c8517..47b8174f4 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -111,7 +111,7 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent
 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
 
 cache_group = parser.add_mutually_exclusive_group()
-cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 25%% of system RAM (min 4GB, max 32GB), inactive 75%% of system RAM (min 12GB, max 96GB).")
+cache_group.add_argument("--cache-ram", nargs='*', type=float, default=[], metavar="GB", help="Use RAM pressure caching with the specified headroom thresholds. This is the default caching mode. The first value sets the active-cache threshold; the optional second value sets the inactive-cache/pin threshold. Defaults when no values are provided: active 10%% of system RAM (min 2GB, max 10GB), inactive 100%% of system RAM (max 96GB).")
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
diff --git a/main.py b/main.py
index 1e47cab84..f23074942 100644
--- a/main.py
+++ b/main.py
@@ -286,8 +286,8 @@ def prompt_worker(q, server_instance):
     cache_ram = 0
     cache_ram_inactive = 0
     if not args.cache_classic and not args.cache_none and args.cache_lru <= 0:
-        cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
-        cache_ram_inactive = min(96.0, max(12.0, comfy.model_management.total_ram * 0.75 / 1024.0))
+        cache_ram = min(10.0, max(2.0, comfy.model_management.total_ram * 0.10 / 1024.0))
+        cache_ram_inactive = min(96.0, comfy.model_management.total_ram / 1024.0)
         if len(args.cache_ram) > 0:
             cache_ram = args.cache_ram[0]
         if len(args.cache_ram) > 1: