diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 544a614f2..7ef462f5c 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -533,18 +533,14 @@ async def update_asset_route(request: web.Request) -> web.Response: @_require_assets_feature_enabled async def delete_asset_route(request: web.Request) -> web.Response: reference_id = str(uuid.UUID(request.match_info["id"])) - delete_content_param = request.query.get("delete_content") - delete_content = ( - False - if delete_content_param is None - else delete_content_param.lower() not in {"0", "false", "no"} - ) try: + # Deleting an asset is a soft delete of the reference; the underlying + # content is preserved (it may be shared with other references). deleted = delete_asset_reference( reference_id=reference_id, owner_id=USER_MANAGER.get_request_user_id(request), - delete_content_if_orphan=delete_content, + delete_content_if_orphan=False, ) except Exception: logging.exception( diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index 1072c95fa..d4e4fc61c 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -160,6 +160,16 @@ def delete_asset_reference( owner_id: str, delete_content_if_orphan: bool = True, ) -> bool: + """Delete an asset reference. + + With ``delete_content_if_orphan=False`` (a soft delete), the reference is + hidden and the underlying content is preserved. With ``True``, the content + is also removed once it becomes orphaned. + + Note: the public DELETE /api/assets/{id} endpoint always soft-deletes + (passes ``False``); the orphan-reclamation path is intentionally + internal-only, retained for a future GC/admin caller. + """ with create_session() as session: if not delete_content_if_orphan: # Soft delete: mark the reference as deleted but keep everything diff --git a/tests-unit/assets_test/conftest.py b/tests-unit/assets_test/conftest.py index 9867b4e14..4aa20372f 100644 --- a/tests-unit/assets_test/conftest.py +++ b/tests-unit/assets_test/conftest.py @@ -6,6 +6,7 @@ import subprocess import sys import tempfile import time +import uuid from pathlib import Path from typing import Callable, Iterator, Optional @@ -188,9 +189,17 @@ def _post_multipart_asset( @pytest.fixture def make_asset_bytes() -> Callable[[str, int], bytes]: + # Salt content per test so it never collides with assets left over from + # earlier tests. Delete is now always a soft delete (content is preserved), + # so the suite can no longer rely on hard-deleting content for isolation. + # Deterministic within a test: the same (name, size) yields the same bytes. + salt = uuid.uuid4().bytes + def _make(name: str, size: int = 8192) -> bytes: seed = sum(ord(c) for c in name) % 251 - return bytes((i * 31 + seed) % 256 for i in range(size)) + body = bytearray((i * 31 + seed) % 256 for i in range(size)) + body[: len(salt)] = salt[:size] + return bytes(body) return _make @@ -212,7 +221,7 @@ def asset_factory(http: requests.Session, api_base: str): for aid in created: with contextlib.suppress(Exception): - http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30) + http.delete(f"{api_base}/api/assets/{aid}", timeout=30) @pytest.fixture @@ -227,7 +236,11 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas if tags is None: tags = ["models", "checkpoints", "unit-tests", "alpha"] meta = {"purpose": "test", "epoch": 1, "flags": ["x", "y"], "nullable": None} - files = {"file": (name, b"A" * 4096, "application/octet-stream")} + # Unique content per test so the seed always creates a fresh asset (201). + # Delete is now always a soft delete, so content from a prior test survives + # and would otherwise dedup this upload into an existing asset (200). + content = uuid.uuid4().bytes + b"A" * (4096 - 16) + files = {"file": (name, content, "application/octet-stream")} form_data = { "tags": json.dumps(tags), "name": name, @@ -260,4 +273,4 @@ def autoclean_unit_test_assets(http: requests.Session, api_base: str): break for aid in ids: with contextlib.suppress(Exception): - http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30) + http.delete(f"{api_base}/api/assets/{aid}", timeout=30) diff --git a/tests-unit/assets_test/test_crud.py b/tests-unit/assets_test/test_crud.py index fd2e9a098..36abb60ee 100644 --- a/tests-unit/assets_test/test_crud.py +++ b/tests-unit/assets_test/test_crud.py @@ -45,8 +45,8 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse assert "user_metadata" in detail assert "filename" in detail["user_metadata"] - # DELETE (hard delete to also remove underlying asset and file) - rd = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120) + # Soft delete — the reference is hidden, content is preserved + rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120) assert rd.status_code == 204 # GET again -> 404 @@ -60,7 +60,7 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede aid = seeded_asset["id"] asset_hash = seeded_asset["asset_hash"] - # Soft-delete (default, no delete_content param) + # Soft delete — the reference is hidden, content is preserved rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120) assert rd.status_code == 204 @@ -81,11 +81,10 @@ def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seede ids = [a["id"] for a in rl.json().get("assets", [])] assert aid not in ids - # Clean up: hard-delete the soft-deleted reference and orphaned asset - http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120) + # The reference is already soft-deleted; content is preserved. -def test_delete_upon_reference_count( +def test_soft_delete_preserves_asset_identity_across_references( http: requests.Session, api_base: str, seeded_asset: dict ): # Create a second reference to the same asset via from-hash @@ -119,16 +118,20 @@ def test_delete_upon_reference_count( rh2 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120) assert rh2.status_code == 200 # asset identity preserved (soft delete) - # Re-associate via from-hash, then hard-delete -> orphan content removed + # Re-associate via from-hash: it must reuse the same preserved content + # (created_new False AND the same hash), proving the soft deletes did not + # destroy the underlying asset. Then soft-delete again -> still preserved. r3 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120) assert r3.status_code == 201, r3.json() + assert r3.json()["created_new"] is False + assert r3.json()["asset_hash"] == src_hash # reused the surviving content aid3 = r3.json()["id"] - rd3 = http.delete(f"{api_base}/api/assets/{aid3}?delete_content=true", timeout=120) + rd3 = http.delete(f"{api_base}/api/assets/{aid3}", timeout=120) assert rd3.status_code == 204 rh3 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120) - assert rh3.status_code == 404 # orphan content removed + assert rh3.status_code == 200 # content preserved (soft delete) def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset: dict): @@ -249,7 +252,7 @@ def test_concurrent_delete_same_asset_info_single_204( # Hit the same endpoint N times in parallel. n_tests = 4 - url = f"{api_base}/api/assets/{aid}?delete_content=false" + url = f"{api_base}/api/assets/{aid}" def _do_delete(delete_url): with requests.Session() as s: diff --git a/tests-unit/assets_test/test_downloads.py b/tests-unit/assets_test/test_downloads.py index 672ba9728..42c64a5fd 100644 --- a/tests-unit/assets_test/test_downloads.py +++ b/tests-unit/assets_test/test_downloads.py @@ -117,7 +117,7 @@ def test_download_missing_file_returns_404( assert body["error"]["code"] == "FILE_NOT_FOUND" finally: # We created asset without the "unit-tests" tag(see `autoclean_unit_test_assets`), we need to clear it manually. - dr = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120) + dr = http.delete(f"{api_base}/api/assets/{aid}", timeout=120) dr.content diff --git a/tests-unit/assets_test/test_tags_api.py b/tests-unit/assets_test/test_tags_api.py index 595bf29c6..9729b7d03 100644 --- a/tests-unit/assets_test/test_tags_api.py +++ b/tests-unit/assets_test/test_tags_api.py @@ -69,8 +69,8 @@ def test_tags_empty_usage(http: requests.Session, api_base: str, asset_factory, used_names = [t["name"] for t in body2["tags"]] assert custom_tag in used_names - # Hard-delete the asset so the tag usage drops to zero - rd = http.delete(f"{api_base}/api/assets/{_asset['id']}?delete_content=true", timeout=120) + # Delete the asset reference so the tag usage drops to zero + rd = http.delete(f"{api_base}/api/assets/{_asset['id']}", timeout=120) assert rd.status_code == 204 # Now the custom tag must not be returned when include_zero=false