Add output cleanup policy

2026-04-27 14:17:44 +02:00
parent 93cebeb002
commit b8069d6771
9 changed files with 227 additions and 4 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,6 +12,9 @@ class TestConfig:
    MAX_CONTENT_LENGTH = 100 * 1024 * 1024
    PREVIEW_RECORD_LIMIT = 5
    OUTPUT_DIRECTORY = "test-outputs"
+    OUTPUT_RETENTION_HOURS = 24
+    CLEANUP_ON_STARTUP = False
+    CLEANUP_AFTER_DOWNLOAD = False


@pytest.fixture()
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -1,4 +1,6 @@
 import io
+import json
+from pathlib import Path

 from app import create_app

@@ -132,6 +134,95 @@ def test_download_route_returns_generated_file(client):
    download_response.close()


+def test_download_route_can_cleanup_files_after_download(tmp_path):
+    class CleanupAfterDownloadConfig:
+        TESTING = True
+        SECRET_KEY = "test-secret"
+        MAX_CONTENT_LENGTH = 100 * 1024 * 1024
+        PREVIEW_RECORD_LIMIT = 5
+        OUTPUT_DIRECTORY = tmp_path / "download-cleanup-outputs"
+        OUTPUT_RETENTION_HOURS = 24
+        CLEANUP_ON_STARTUP = False
+        CLEANUP_AFTER_DOWNLOAD = True
+
+    app = create_app(CleanupAfterDownloadConfig)
+    client = app.test_client()
+    log_file = io.BytesIO(SAMPLE_LOG.encode("utf-8"))
+
+    convert_response = client.post(
+        "/convert",
+        data={
+            "mode": "vendor",
+            "output_format": "csv",
+            "sort_by": "datetime",
+            "order": "asc",
+            "policy_cs": "",
+            "policy_ci": "",
+            "severity_cs": "",
+            "severity_ci": "",
+            "log_file": (log_file, "sample.log"),
+        },
+        content_type="multipart/form-data",
+    )
+    log_file.close()
+
+    html = convert_response.data.decode("utf-8")
+    marker = "/download/"
+    start = html.index(marker) + len(marker)
+    end = html.index('"', start)
+    result_id = html[start:end]
+    metadata_path = Path(app.config["OUTPUT_DIRECTORY"]) / f"{result_id}.json"
+
+    download_response = client.get(f"/download/{result_id}")
+    download_response.close()
+    convert_response.close()
+
+    assert not metadata_path.exists()
+
+
+def test_cleanup_on_startup_removes_expired_outputs(tmp_path):
+    output_dir = tmp_path / "startup-cleanup-outputs"
+    output_dir.mkdir(parents=True)
+    result_id = "expired-result"
+    file_path = output_dir / f"{result_id}.csv"
+    metadata_path = output_dir / f"{result_id}.json"
+    file_path.write_text("header\nvalue\n", encoding="utf-8")
+    metadata_path.write_text(
+        json.dumps(
+            {
+                "result_id": result_id,
+                "file_path": str(file_path),
+                "download_name": "waf-report.csv",
+                "mimetype": "text/csv; charset=utf-8",
+            }
+        ),
+        encoding="utf-8",
+    )
+    old_timestamp = 946684800
+    file_path.touch()
+    metadata_path.touch()
+    Path(file_path).touch()
+    import os
+
+    os.utime(file_path, (old_timestamp, old_timestamp))
+    os.utime(metadata_path, (old_timestamp, old_timestamp))
+
+    class StartupCleanupConfig:
+        TESTING = True
+        SECRET_KEY = "test-secret"
+        MAX_CONTENT_LENGTH = 100 * 1024 * 1024
+        PREVIEW_RECORD_LIMIT = 5
+        OUTPUT_DIRECTORY = output_dir
+        OUTPUT_RETENTION_HOURS = 1
+        CLEANUP_ON_STARTUP = True
+        CLEANUP_AFTER_DOWNLOAD = False
+
+    create_app(StartupCleanupConfig)
+
+    assert not file_path.exists()
+    assert not metadata_path.exists()
+
+
 def test_default_upload_limit_is_100_mib(app):
    assert app.config["MAX_CONTENT_LENGTH"] == 100 * 1024 * 1024

--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -1,6 +1,8 @@
+import json
+import os
 from pathlib import Path

-from app.services.storage import persist_result
+from app.services.storage import cleanup_expired_outputs, delete_result_files, persist_result


 def test_persist_result_writes_csv_and_collects_preview(tmp_path: Path):
@@ -32,3 +34,42 @@ def test_persist_result_writes_csv_and_collects_preview(tmp_path: Path):
    assert "v015xxxxdate,time,policy,severity_level" in written
    assert "2024-05-01,10:00:00,Prod Policy,high" in written
    assert export_result.preview(1).count("\n") == 1
+
+
+def test_delete_result_files_removes_output_and_metadata(tmp_path: Path):
+    result_id = "delete-me"
+    output_file = tmp_path / f"{result_id}.txt"
+    metadata_file = tmp_path / f"{result_id}.json"
+    output_file.write_text("content", encoding="utf-8")
+    metadata_file.write_text("{}", encoding="utf-8")
+
+    delete_result_files(output_dir=tmp_path, result_id=result_id)
+
+    assert not output_file.exists()
+    assert not metadata_file.exists()
+
+
+def test_cleanup_expired_outputs_removes_only_old_results(tmp_path: Path):
+    old_result_id = "old-result"
+    new_result_id = "new-result"
+    old_output = tmp_path / f"{old_result_id}.csv"
+    old_metadata = tmp_path / f"{old_result_id}.json"
+    new_output = tmp_path / f"{new_result_id}.csv"
+    new_metadata = tmp_path / f"{new_result_id}.json"
+
+    old_output.write_text("old", encoding="utf-8")
+    new_output.write_text("new", encoding="utf-8")
+    old_metadata.write_text(json.dumps({"result_id": old_result_id, "file_path": str(old_output)}), encoding="utf-8")
+    new_metadata.write_text(json.dumps({"result_id": new_result_id, "file_path": str(new_output)}), encoding="utf-8")
+
+    old_timestamp = 946684800
+    os.utime(old_output, (old_timestamp, old_timestamp))
+    os.utime(old_metadata, (old_timestamp, old_timestamp))
+
+    deleted_results = cleanup_expired_outputs(output_dir=tmp_path, retention_hours=1)
+
+    assert deleted_results == 1
+    assert not old_output.exists()
+    assert not old_metadata.exists()
+    assert new_output.exists()
+    assert new_metadata.exists()