Merge branch 'master' into matt/be-891-local-add-job_ids-filter-param-to-get-apiassets

This commit is contained in:
Matt Miller 2026-06-09 21:52:34 -07:00 committed by GitHub
commit 7da72900c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 46 additions and 24 deletions

View File

@ -534,18 +534,14 @@ async def update_asset_route(request: web.Request) -> web.Response:
@_require_assets_feature_enabled
async def delete_asset_route(request: web.Request) -> web.Response:
reference_id = str(uuid.UUID(request.match_info["id"]))
delete_content_param = request.query.get("delete_content")
delete_content = (
False
if delete_content_param is None
else delete_content_param.lower() not in {"0", "false", "no"}
)
try:
# Deleting an asset is a soft delete of the reference; the underlying
# content is preserved (it may be shared with other references).
deleted = delete_asset_reference(
reference_id=reference_id,
owner_id=USER_MANAGER.get_request_user_id(request),
delete_content_if_orphan=delete_content,
delete_content_if_orphan=False,
)
except Exception:
logging.exception(

View File

@ -160,6 +160,16 @@ def delete_asset_reference(
owner_id: str,
delete_content_if_orphan: bool = True,
) -> bool:
"""Delete an asset reference.
With ``delete_content_if_orphan=False`` (a soft delete), the reference is
hidden and the underlying content is preserved. With ``True``, the content
is also removed once it becomes orphaned.
Note: the public DELETE /api/assets/{id} endpoint always soft-deletes
(passes ``False``); the orphan-reclamation path is intentionally
internal-only, retained for a future GC/admin caller.
"""
with create_session() as session:
if not delete_content_if_orphan:
# Soft delete: mark the reference as deleted but keep everything

View File

@ -6,6 +6,7 @@ import subprocess
import sys
import tempfile
import time
import uuid
from pathlib import Path
from typing import Callable, Iterator, Optional
@ -188,9 +189,17 @@ def _post_multipart_asset(
@pytest.fixture
def make_asset_bytes() -> Callable[[str, int], bytes]:
# Salt content per test so it never collides with assets left over from
# earlier tests. Delete is now always a soft delete (content is preserved),
# so the suite can no longer rely on hard-deleting content for isolation.
# Deterministic within a test: the same (name, size) yields the same bytes.
salt = uuid.uuid4().bytes
def _make(name: str, size: int = 8192) -> bytes:
seed = sum(ord(c) for c in name) % 251
return bytes((i * 31 + seed) % 256 for i in range(size))
body = bytearray((i * 31 + seed) % 256 for i in range(size))
body[: len(salt)] = salt[:size]
return bytes(body)
return _make
@ -212,7 +221,7 @@ def asset_factory(http: requests.Session, api_base: str):
for aid in created:
with contextlib.suppress(Exception):
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
@pytest.fixture
@ -227,7 +236,11 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas
if tags is None:
tags = ["models", "checkpoints", "unit-tests", "alpha"]
meta = {"purpose": "test", "epoch": 1, "flags": ["x", "y"], "nullable": None}
files = {"file": (name, b"A" * 4096, "application/octet-stream")}
# Unique content per test so the seed always creates a fresh asset (201).
# Delete is now always a soft delete, so content from a prior test survives
# and would otherwise dedup this upload into an existing asset (200).
content = uuid.uuid4().bytes + b"A" * (4096 - 16)
files = {"file": (name, content, "application/octet-stream")}
form_data = {
"tags": json.dumps(tags),
"name": name,
@ -260,4 +273,4 @@ def autoclean_unit_test_assets(http: requests.Session, api_base: str):
break
for aid in ids:
with contextlib.suppress(Exception):
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)

View File

@ -45,8 +45,8 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
assert "user_metadata" in detail
assert "filename" in detail["user_metadata"]
# DELETE (hard delete to also remove underlying asset and file)
rd = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
# Soft delete — the reference is hidden, content is preserved
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
assert rd.status_code == 204
# GET again -> 404
@ -60,7 +60,7 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede
aid = seeded_asset["id"]
asset_hash = seeded_asset["asset_hash"]
# Soft-delete (default, no delete_content param)
# Soft delete — the reference is hidden, content is preserved
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
assert rd.status_code == 204
@ -81,11 +81,10 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede
ids = [a["id"] for a in rl.json().get("assets", [])]
assert aid not in ids
# Clean up: hard-delete the soft-deleted reference and orphaned asset
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
# The reference is already soft-deleted; content is preserved.
def test_delete_upon_reference_count(
def test_soft_delete_preserves_asset_identity_across_references(
http: requests.Session, api_base: str, seeded_asset: dict
):
# Create a second reference to the same asset via from-hash
@ -119,16 +118,20 @@ def test_delete_upon_reference_count(
rh2 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
assert rh2.status_code == 200 # asset identity preserved (soft delete)
# Re-associate via from-hash, then hard-delete -> orphan content removed
# Re-associate via from-hash: it must reuse the same preserved content
# (created_new False AND the same hash), proving the soft deletes did not
# destroy the underlying asset. Then soft-delete again -> still preserved.
r3 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
assert r3.status_code == 201, r3.json()
assert r3.json()["created_new"] is False
assert r3.json()["asset_hash"] == src_hash # reused the surviving content
aid3 = r3.json()["id"]
rd3 = http.delete(f"{api_base}/api/assets/{aid3}?delete_content=true", timeout=120)
rd3 = http.delete(f"{api_base}/api/assets/{aid3}", timeout=120)
assert rd3.status_code == 204
rh3 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
assert rh3.status_code == 404 # orphan content removed
assert rh3.status_code == 200 # content preserved (soft delete)
def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset: dict):
@ -249,7 +252,7 @@ def test_concurrent_delete_same_asset_info_single_204(
# Hit the same endpoint N times in parallel.
n_tests = 4
url = f"{api_base}/api/assets/{aid}?delete_content=false"
url = f"{api_base}/api/assets/{aid}"
def _do_delete(delete_url):
with requests.Session() as s:

View File

@ -117,7 +117,7 @@ def test_download_missing_file_returns_404(
assert body["error"]["code"] == "FILE_NOT_FOUND"
finally:
# We created asset without the "unit-tests" tag(see `autoclean_unit_test_assets`), we need to clear it manually.
dr = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
dr = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
dr.content

View File

@ -69,8 +69,8 @@ def test_tags_empty_usage(http: requests.Session, api_base: str, asset_factory,
used_names = [t["name"] for t in body2["tags"]]
assert custom_tag in used_names
# Hard-delete the asset so the tag usage drops to zero
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}?delete_content=true", timeout=120)
# Delete the asset reference so the tag usage drops to zero
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}", timeout=120)
assert rd.status_code == 204
# Now the custom tag must not be returned when include_zero=false