From 2015bbb54a118bccdfb1c94d77072eea49ce58a3 Mon Sep 17 00:00:00 2001
From: Matt Miller <mattmiller@comfy.org>
Date: Thu, 21 May 2026 14:41:17 -0700
Subject: [PATCH] fix(assets): cap cursors by encoded wire size, not just char
 count
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Char-count guards on value/id can still let multibyte or escape-heavy
inputs blow past MAX_ENCODED_CURSOR_LENGTH once UTF-8 + escape expansion
+ base64url runs. A 512-character name of 'é' (2 bytes UTF-8) or '<'
(serializes to the 6-byte '<' escape) passes the char check, mints
a ~1500-byte cursor, then 400s when handed back on the next request.

Compute the final encoded form and reject it before returning if it
exceeds the wire cap. Adds regression tests for both inflation paths.
---
 app/assets/services/cursor.py                  | 11 ++++++++++-
 tests-unit/assets_test/services/test_cursor.py | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/app/assets/services/cursor.py b/app/assets/services/cursor.py
index 1bd8af15a..41ad99573 100644
--- a/app/assets/services/cursor.py
+++ b/app/assets/services/cursor.py
@@ -92,7 +92,16 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
            .replace("\u2028", "\\u2028")
            .replace("\u2029", "\\u2029")
     )
-    return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
+    encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
+    # Final wire-size guard: the per-field caps above are char-counted, but the
+    # wire cap applies to the base64url of the UTF-8-encoded, escape-expanded
+    # payload. A value full of multibyte or HTML-significant characters (e.g.
+    # 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even
+    # though it passes the char-count check. Refuse to mint a cursor the decoder
+    # on the next request would reject.
+    if len(encoded) > MAX_ENCODED_CURSOR_LENGTH:
+        raise InvalidCursorError("encoded cursor exceeds maximum length")
+    return encoded
 
 
 def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
diff --git a/tests-unit/assets_test/services/test_cursor.py b/tests-unit/assets_test/services/test_cursor.py
index f015b9708..db114ad47 100644
--- a/tests-unit/assets_test/services/test_cursor.py
+++ b/tests-unit/assets_test/services/test_cursor.py
@@ -215,6 +215,21 @@ class TestEncoderDecoderSymmetry:
         with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
             encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
 
+    def test_encoder_rejects_multibyte_value_over_wire_cap(self):
+        """A value that passes the char-count cap can still inflate past the
+        wire cap once UTF-8-encoded. Asset name made of 512 × multibyte
+        characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not
+        minted into a cursor the next request will 400."""
+        with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
+            encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte")
+
+    def test_encoder_rejects_escape_heavy_value_over_wire_cap(self):
+        """Same wire-cap concern via escape expansion: each `<` serializes to
+        the six-byte sequence `\\u003c`, so 512 of them blow past the encoded
+        cap even though the raw char count is within the per-field limit."""
+        with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
+            encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape")
+
 
 class TestOrderBinding:
     def test_order_baked_into_payload(self):