110 lines
3.8 KiB
Python
110 lines
3.8 KiB
Python
import json
|
|
import uuid
|
|
from dataclasses import asdict, dataclass
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
from app.services.exporter import ExportResult, write_export
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ResultMetadata:
|
|
result_id: str
|
|
file_path: str
|
|
download_name: str
|
|
mimetype: str
|
|
|
|
|
|
def _result_paths(output_dir: Path, result_id: str) -> tuple[Path, Path]:
|
|
"""Build the sidecar metadata and output file search pattern for a result id."""
|
|
metadata_path = output_dir / f"{result_id}.json"
|
|
return metadata_path, output_dir / f"{result_id}"
|
|
|
|
|
|
def persist_result(
|
|
output_dir: Path,
|
|
records: list[dict[str, str]],
|
|
union_keys: list[str],
|
|
mode: str,
|
|
output_format: str,
|
|
preview_record_limit: int,
|
|
) -> tuple[ResultMetadata, ExportResult]:
|
|
"""Persist generated output and sidecar metadata in a temporary directory."""
|
|
result_id = uuid.uuid4().hex
|
|
extension = "txt" if output_format == "text" else "csv"
|
|
mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8"
|
|
|
|
file_path = output_dir / f"{result_id}.{extension}"
|
|
metadata_path = output_dir / f"{result_id}.json"
|
|
|
|
export_result = write_export(
|
|
file_path=file_path,
|
|
records=records,
|
|
union_keys=union_keys,
|
|
mode=mode,
|
|
output_format=output_format,
|
|
preview_record_limit=preview_record_limit,
|
|
)
|
|
metadata = ResultMetadata(
|
|
result_id=result_id,
|
|
file_path=str(file_path),
|
|
download_name=f"waf-report.{extension}",
|
|
mimetype=mimetype,
|
|
)
|
|
metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8")
|
|
return metadata, export_result
|
|
|
|
|
|
def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None:
|
|
"""Load sidecar metadata for a generated file."""
|
|
metadata_path, _base_path = _result_paths(output_dir, result_id)
|
|
if not metadata_path.exists():
|
|
return None
|
|
|
|
return json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
|
|
|
|
def delete_result_files(output_dir: Path, result_id: str) -> None:
|
|
"""Delete a generated output file and its metadata sidecar if they still exist."""
|
|
metadata_path, base_path = _result_paths(output_dir, result_id)
|
|
for output_file in output_dir.glob(f"{base_path.name}.*"):
|
|
if output_file.name == metadata_path.name:
|
|
continue
|
|
output_file.unlink(missing_ok=True)
|
|
metadata_path.unlink(missing_ok=True)
|
|
|
|
|
|
def cleanup_expired_outputs(output_dir: Path, retention_hours: int) -> int:
|
|
"""Delete generated output sets older than the configured retention window."""
|
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=retention_hours)
|
|
deleted_results = 0
|
|
|
|
for metadata_path in output_dir.glob("*.json"):
|
|
try:
|
|
payload = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
except (OSError, json.JSONDecodeError):
|
|
payload = {}
|
|
|
|
result_id = payload.get("result_id") or metadata_path.stem
|
|
file_path = Path(payload["file_path"]) if "file_path" in payload else None
|
|
newest_mtime = _newest_mtime(metadata_path, file_path)
|
|
if newest_mtime is None or newest_mtime >= cutoff:
|
|
continue
|
|
|
|
delete_result_files(output_dir=output_dir, result_id=result_id)
|
|
deleted_results += 1
|
|
|
|
return deleted_results
|
|
|
|
|
|
def _newest_mtime(metadata_path: Path, file_path: Path | None) -> datetime | None:
|
|
"""Return the newest modification time across the metadata and output file."""
|
|
mtimes: list[datetime] = []
|
|
if metadata_path.exists():
|
|
mtimes.append(datetime.fromtimestamp(metadata_path.stat().st_mtime, tz=timezone.utc))
|
|
if file_path is not None and file_path.exists():
|
|
mtimes.append(datetime.fromtimestamp(file_path.stat().st_mtime, tz=timezone.utc))
|
|
if not mtimes:
|
|
return None
|
|
return max(mtimes)
|