Add output cleanup policy
This commit is contained in:
@@ -5,6 +5,7 @@ from werkzeug.exceptions import RequestEntityTooLarge
|
||||
|
||||
from app.config import Config
|
||||
from app.routes import main_blueprint
|
||||
from app.services.storage import cleanup_expired_outputs
|
||||
|
||||
|
||||
def _format_size_limit(size_limit_bytes: int) -> str:
|
||||
@@ -27,6 +28,12 @@ def create_app(config_class: type[Config] = Config) -> Flask:
|
||||
app.config["OUTPUT_DIRECTORY"] = output_dir
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if app.config.get("CLEANUP_ON_STARTUP", False):
|
||||
cleanup_expired_outputs(
|
||||
output_dir=output_dir,
|
||||
retention_hours=app.config.get("OUTPUT_RETENTION_HOURS", 24),
|
||||
)
|
||||
|
||||
app.register_blueprint(main_blueprint)
|
||||
|
||||
@app.errorhandler(RequestEntityTooLarge)
|
||||
|
||||
@@ -2,6 +2,14 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _get_bool_setting(name: str, default: bool) -> bool:
|
||||
"""Parse conventional boolean environment values."""
|
||||
value = os.environ.get(name)
|
||||
if value is None:
|
||||
return default
|
||||
return value.strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _get_max_content_length() -> int:
|
||||
"""Resolve the upload size limit from environment settings."""
|
||||
upload_limit_mb = os.environ.get("MAX_UPLOAD_SIZE_MB")
|
||||
@@ -25,3 +33,6 @@ class Config:
|
||||
OUTPUT_DIRECTORY = Path(
|
||||
os.environ.get("OUTPUT_DIRECTORY", Path("instance") / "outputs")
|
||||
)
|
||||
OUTPUT_RETENTION_HOURS = int(os.environ.get("OUTPUT_RETENTION_HOURS", 24))
|
||||
CLEANUP_ON_STARTUP = _get_bool_setting("CLEANUP_ON_STARTUP", True)
|
||||
CLEANUP_AFTER_DOWNLOAD = _get_bool_setting("CLEANUP_AFTER_DOWNLOAD", False)
|
||||
|
||||
@@ -12,12 +12,13 @@ from flask import (
|
||||
url_for,
|
||||
)
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from werkzeug.wsgi import ClosingIterator
|
||||
|
||||
from app.constants import MODES, OUTPUT_FORMATS, SORTABLE_FIELDS, SORT_ORDERS
|
||||
from app.services.conversion import convert_uploaded_log
|
||||
from app.services.parser import LogParseError
|
||||
from app.services.processing import ProcessingError, ProcessingOptions
|
||||
from app.services.storage import load_result_metadata
|
||||
from app.services.storage import delete_result_files, load_result_metadata
|
||||
|
||||
main_blueprint = Blueprint("main", __name__)
|
||||
|
||||
@@ -139,10 +140,17 @@ def download(result_id: str):
|
||||
flash("Requested output file could not be found.", "danger")
|
||||
return redirect(url_for("main.index"))
|
||||
|
||||
return send_file(
|
||||
response = send_file(
|
||||
Path(metadata["file_path"]),
|
||||
as_attachment=True,
|
||||
download_name=metadata["download_name"],
|
||||
mimetype=metadata["mimetype"],
|
||||
max_age=0,
|
||||
)
|
||||
if current_app.config.get("CLEANUP_AFTER_DOWNLOAD", False):
|
||||
output_dir = current_app.config["OUTPUT_DIRECTORY"]
|
||||
response.response = ClosingIterator(
|
||||
response.response,
|
||||
[lambda: delete_result_files(output_dir=output_dir, result_id=result_id)],
|
||||
)
|
||||
return response
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.exporter import ExportResult, write_export
|
||||
@@ -14,6 +15,12 @@ class ResultMetadata:
|
||||
mimetype: str
|
||||
|
||||
|
||||
def _result_paths(output_dir: Path, result_id: str) -> tuple[Path, Path]:
|
||||
"""Build the sidecar metadata and output file search pattern for a result id."""
|
||||
metadata_path = output_dir / f"{result_id}.json"
|
||||
return metadata_path, output_dir / f"{result_id}"
|
||||
|
||||
|
||||
def persist_result(
|
||||
output_dir: Path,
|
||||
records: list[dict[str, str]],
|
||||
@@ -50,8 +57,53 @@ def persist_result(
|
||||
|
||||
def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None:
|
||||
"""Load sidecar metadata for a generated file."""
|
||||
metadata_path = output_dir / f"{result_id}.json"
|
||||
metadata_path, _base_path = _result_paths(output_dir, result_id)
|
||||
if not metadata_path.exists():
|
||||
return None
|
||||
|
||||
return json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def delete_result_files(output_dir: Path, result_id: str) -> None:
|
||||
"""Delete a generated output file and its metadata sidecar if they still exist."""
|
||||
metadata_path, base_path = _result_paths(output_dir, result_id)
|
||||
for output_file in output_dir.glob(f"{base_path.name}.*"):
|
||||
if output_file.name == metadata_path.name:
|
||||
continue
|
||||
output_file.unlink(missing_ok=True)
|
||||
metadata_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def cleanup_expired_outputs(output_dir: Path, retention_hours: int) -> int:
|
||||
"""Delete generated output sets older than the configured retention window."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=retention_hours)
|
||||
deleted_results = 0
|
||||
|
||||
for metadata_path in output_dir.glob("*.json"):
|
||||
try:
|
||||
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
payload = {}
|
||||
|
||||
result_id = payload.get("result_id") or metadata_path.stem
|
||||
file_path = Path(payload["file_path"]) if "file_path" in payload else None
|
||||
newest_mtime = _newest_mtime(metadata_path, file_path)
|
||||
if newest_mtime is None or newest_mtime >= cutoff:
|
||||
continue
|
||||
|
||||
delete_result_files(output_dir=output_dir, result_id=result_id)
|
||||
deleted_results += 1
|
||||
|
||||
return deleted_results
|
||||
|
||||
|
||||
def _newest_mtime(metadata_path: Path, file_path: Path | None) -> datetime | None:
|
||||
"""Return the newest modification time across the metadata and output file."""
|
||||
mtimes: list[datetime] = []
|
||||
if metadata_path.exists():
|
||||
mtimes.append(datetime.fromtimestamp(metadata_path.stat().st_mtime, tz=timezone.utc))
|
||||
if file_path is not None and file_path.exists():
|
||||
mtimes.append(datetime.fromtimestamp(file_path.stat().st_mtime, tz=timezone.utc))
|
||||
if not mtimes:
|
||||
return None
|
||||
return max(mtimes)
|
||||
|
||||
Reference in New Issue
Block a user