diff --git a/app/frontend_management.py b/app/frontend_management.py index fcf94c558..cdae228dc 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -228,11 +228,26 @@ class FrontendManager: def _auto_managed_metadata_path(cls, repo_owner: str, repo_name: str) -> Path: return cls._provider_dir(repo_owner, repo_name) / cls.AUTO_MANAGED_METADATA_FILENAME + # A version directory name must look like a simple semver-ish token. We + # use this as a defensive allowlist when interpreting metadata so a + # malformed or tampered ``.auto_managed.json`` cannot point cleanup at + # paths outside the provider directory (e.g. ``../somewhere``). + _VERSION_DIRNAME_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") + + @classmethod + def _is_safe_version_dirname(cls, name: str) -> bool: + if not isinstance(name, str): + return False + if name in (".", "..") or "/" in name or "\\" in name or "\x00" in name: + return False + return bool(cls._VERSION_DIRNAME_PATTERN.match(name)) + @classmethod def _read_auto_managed_versions(cls, repo_owner: str, repo_name: str) -> list[str]: """Return the list of versions previously downloaded under an - auto-managed specifier for this provider. Missing or unreadable - metadata is treated as an empty list.""" + auto-managed specifier for this provider. Missing, unreadable, or + otherwise malformed metadata is treated as an empty list so a bad + file never blocks startup or directs cleanup at unrelated paths.""" metadata_path = cls._auto_managed_metadata_path(repo_owner, repo_name) if not metadata_path.exists(): return [] @@ -246,20 +261,31 @@ class FrontendManager: exc, ) return [] + if not isinstance(data, dict): + logging.warning( + "Frontend auto-managed metadata at %s has unexpected shape; ignoring.", + metadata_path, + ) + return [] versions = data.get("auto_managed", []) if not isinstance(versions, list): return [] - return [str(v) for v in versions if isinstance(v, (str, int))] + # Filter out anything that doesn't look like a safe version dirname + # so a tampered file can't point us at, say, ``../../etc``. + return [v for v in versions if cls._is_safe_version_dirname(v)] @classmethod def _write_auto_managed_versions( cls, repo_owner: str, repo_name: str, versions: list[str] ) -> None: """Persist the auto-managed version list atomically. Deduped and - sorted for stability so the file is friendly to diffs.""" + sorted for stability so the file is friendly to diffs. Any entry that + doesn't look like a safe version dirname is dropped before write so + the on-disk metadata always contains valid values.""" metadata_path = cls._auto_managed_metadata_path(repo_owner, repo_name) metadata_path.parent.mkdir(parents=True, exist_ok=True) - payload = {"auto_managed": sorted(set(versions))} + safe_versions = [v for v in versions if cls._is_safe_version_dirname(v)] + payload = {"auto_managed": sorted(set(safe_versions))} # Atomic write via temp file + rename so a crashed process can't leave # a half-written metadata file behind. tmp_path = metadata_path.with_suffix(metadata_path.suffix + ".tmp") @@ -294,12 +320,54 @@ class FrontendManager: return provider_dir = cls._provider_dir(repo_owner, repo_name) + try: + provider_dir_resolved = provider_dir.resolve() + except OSError as exc: + logging.warning( + "Could not resolve provider directory %s for cleanup: %s", + provider_dir, + exc, + ) + return + for stale_version in tracked: if stale_version == keep_version: continue + # ``_read_auto_managed_versions`` already filters tracked entries + # through ``_is_safe_version_dirname``, but re-check here so that + # this helper is also safe when called with externally-supplied + # version lists (and so a defense-in-depth audit can confirm the + # rmtree target lives under the provider directory). + if not cls._is_safe_version_dirname(stale_version): + logging.warning( + "Refusing to clean up suspicious frontend version name: %r", + stale_version, + ) + continue stale_path = provider_dir / stale_version if not stale_path.exists(): continue + try: + stale_resolved = stale_path.resolve() + except OSError as exc: + logging.warning( + "Could not resolve stale frontend path %s: %s", + stale_path, + exc, + ) + continue + # Ensure the resolved target lives strictly under the resolved + # provider directory (so symlinks / path tricks can't escape). + if ( + stale_resolved == provider_dir_resolved + or provider_dir_resolved not in stale_resolved.parents + ): + logging.warning( + "Refusing to remove path outside provider dir: %s (provider=%s)", + stale_resolved, + provider_dir_resolved, + ) + continue try: shutil.rmtree(stale_path) logging.info( diff --git a/tests-unit/app_test/frontend_manager_test.py b/tests-unit/app_test/frontend_manager_test.py index f62f902a5..2e9a31f19 100644 --- a/tests-unit/app_test/frontend_manager_test.py +++ b/tests-unit/app_test/frontend_manager_test.py @@ -331,6 +331,68 @@ def test_read_auto_managed_versions_corrupt(custom_frontends_root): assert FrontendManager._read_auto_managed_versions("o", "r") == [] +@pytest.mark.parametrize("payload", ["[]", "null", '"oops"', "42"]) +def test_read_auto_managed_versions_non_dict_root(custom_frontends_root, payload): + """Valid JSON whose root isn't a dict must not raise — bug pointed out in + Oracle review: ``data.get(...)`` would throw on non-dict roots.""" + provider_dir = custom_frontends_root / "o_r" + provider_dir.mkdir() + (provider_dir / FrontendManager.AUTO_MANAGED_METADATA_FILENAME).write_text(payload) + assert FrontendManager._read_auto_managed_versions("o", "r") == [] + + +def test_read_auto_managed_versions_drops_unsafe_names(custom_frontends_root): + """Tampered metadata containing path-traversal-y names must be ignored + at read time so cleanup never even sees them.""" + provider_dir = custom_frontends_root / "o_r" + provider_dir.mkdir() + (provider_dir / FrontendManager.AUTO_MANAGED_METADATA_FILENAME).write_text( + '{"auto_managed": ["1.0.0", "../escape", "/etc", "..", ".", "ok-1.2"]}' + ) + assert FrontendManager._read_auto_managed_versions("o", "r") == ["1.0.0", "ok-1.2"] + + +def test_write_auto_managed_versions_drops_unsafe_names(custom_frontends_root): + """Even if a caller passes a tainted list, the file we persist must + only contain safe names.""" + FrontendManager._write_auto_managed_versions( + "o", "r", ["1.0.0", "../escape", "/etc/passwd", "..", "."] + ) + assert FrontendManager._read_auto_managed_versions("o", "r") == ["1.0.0"] + + +def test_prune_refuses_to_delete_outside_provider_dir( + custom_frontends_root, monkeypatch +): + """Defense in depth: even if a hostile name slips past the read/write + filters somehow, ``_prune_auto_managed_versions`` must refuse to rmtree + anything outside the provider directory.""" + # Set up a sibling directory that must NOT be touched. + sibling = custom_frontends_root / "do-not-touch" + sibling.mkdir() + (sibling / "marker").write_text("keep me") + + provider_dir = custom_frontends_root / "o_r" + provider_dir.mkdir() + _make_version_dir(custom_frontends_root, "o", "r", "1.0.0") + + # Bypass the read filter to simulate a corrupt-but-parseable list slipping + # through (e.g. older code wrote it before this hardening landed). + monkeypatch.setattr( + FrontendManager, + "_read_auto_managed_versions", + classmethod(lambda cls, owner, repo: ["1.0.0", "../do-not-touch"]), + ) + + FrontendManager._prune_auto_managed_versions("o", "r", keep_version="2.0.0") + + # Sibling untouched. + assert sibling.exists() + assert (sibling / "marker").read_text() == "keep me" + # In-bounds folder still gets cleaned. + assert not (provider_dir / "1.0.0").exists() + + def test_prune_auto_managed_versions_removes_stale_and_keeps_pinned( custom_frontends_root, ):