import json import uuid from dataclasses import asdict, dataclass from datetime import datetime, timedelta, timezone from pathlib import Path from app.services.exporter import ExportResult, write_export @dataclass(slots=True) class ResultMetadata: result_id: str file_path: str download_name: str mimetype: str def _result_paths(output_dir: Path, result_id: str) -> tuple[Path, Path]: """Build the sidecar metadata and output file search pattern for a result id.""" metadata_path = output_dir / f"{result_id}.json" return metadata_path, output_dir / f"{result_id}" def persist_result( output_dir: Path, records: list[dict[str, str]], union_keys: list[str], mode: str, output_format: str, preview_record_limit: int, ) -> tuple[ResultMetadata, ExportResult]: """Persist generated output and sidecar metadata in a temporary directory.""" result_id = uuid.uuid4().hex extension = "txt" if output_format == "text" else "csv" mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8" file_path = output_dir / f"{result_id}.{extension}" metadata_path = output_dir / f"{result_id}.json" export_result = write_export( file_path=file_path, records=records, union_keys=union_keys, mode=mode, output_format=output_format, preview_record_limit=preview_record_limit, ) metadata = ResultMetadata( result_id=result_id, file_path=str(file_path), download_name=f"waf-report.{extension}", mimetype=mimetype, ) metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8") return metadata, export_result def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None: """Load sidecar metadata for a generated file.""" metadata_path, _base_path = _result_paths(output_dir, result_id) if not metadata_path.exists(): return None return json.loads(metadata_path.read_text(encoding="utf-8")) def delete_result_files(output_dir: Path, result_id: str) -> None: """Delete a generated output file and its metadata sidecar if they still exist.""" metadata_path, base_path = _result_paths(output_dir, result_id) for output_file in output_dir.glob(f"{base_path.name}.*"): if output_file.name == metadata_path.name: continue output_file.unlink(missing_ok=True) metadata_path.unlink(missing_ok=True) def cleanup_expired_outputs(output_dir: Path, retention_hours: int) -> int: """Delete generated output sets older than the configured retention window.""" cutoff = datetime.now(timezone.utc) - timedelta(hours=retention_hours) deleted_results = 0 for metadata_path in output_dir.glob("*.json"): try: payload = json.loads(metadata_path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): payload = {} result_id = payload.get("result_id") or metadata_path.stem file_path = Path(payload["file_path"]) if "file_path" in payload else None newest_mtime = _newest_mtime(metadata_path, file_path) if newest_mtime is None or newest_mtime >= cutoff: continue delete_result_files(output_dir=output_dir, result_id=result_id) deleted_results += 1 return deleted_results def _newest_mtime(metadata_path: Path, file_path: Path | None) -> datetime | None: """Return the newest modification time across the metadata and output file.""" mtimes: list[datetime] = [] if metadata_path.exists(): mtimes.append(datetime.fromtimestamp(metadata_path.stat().st_mtime, tz=timezone.utc)) if file_path is not None and file_path.exists(): mtimes.append(datetime.fromtimestamp(file_path.stat().st_mtime, tz=timezone.utc)) if not mtimes: return None return max(mtimes)