Build Flask WAF log converter app

2026-04-24 14:40:32 +02:00
parent f9579bd253
commit 355d61f11f
23 changed files with 1053 additions and 1 deletions
--- a/app/services/init.py
+++ b/app/services/init.py
@@ -0,0 +1 @@
+"""Service layer for parsing, processing, exporting, and file storage."""
--- a/app/services/exporter.py
+++ b/app/services/exporter.py
@@ -0,0 +1,69 @@
+import csv
+import io
+from dataclasses import dataclass
+
+from app.constants import VENDOR_FIELDS
+
+
+@dataclass(slots=True)
+class ExportResult:
+    content: str
+    columns: list[str]
+    output_format: str
+
+    def preview(self, record_limit: int) -> str:
+        """Build a small preview string for the result page."""
+        if self.output_format == "text":
+            marker = f"--- record {record_limit + 1} ---"
+            if marker in self.content:
+                return self.content.split(marker, 1)[0].rstrip()
+            return self.content
+
+        lines = self.content.splitlines()
+        if len(lines) <= record_limit + 1:
+            return self.content
+        return "\n".join(lines[: record_limit + 1])
+
+
+def build_export(
+    records: list[dict[str, str]],
+    union_keys: list[str],
+    mode: str,
+    output_format: str,
+) -> ExportResult:
+    columns = VENDOR_FIELDS if mode == "vendor" else union_keys
+
+    if output_format == "text":
+        return ExportResult(
+            content=_render_text(records, columns),
+            columns=columns,
+            output_format=output_format,
+        )
+
+    return ExportResult(
+        content=_render_csv(records, columns),
+        columns=columns,
+        output_format=output_format,
+    )
+
+
+def _render_text(records: list[dict[str, str]], columns: list[str]) -> str:
+    max_key_length = max((len(column) for column in columns), default=0)
+    chunks: list[str] = []
+
+    for index, record in enumerate(records, start=1):
+        chunks.append(f"--- record {index} ---")
+        for column in columns:
+            value = record.get(column, "")
+            chunks.append(f"  {column.ljust(max_key_length)} = {value}")
+
+    return "\n".join(chunks)
+
+
+def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str:
+    buffer = io.StringIO()
+    writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore")
+    writer.writeheader()
+    for record in records:
+        writer.writerow({column: record.get(column, "") for column in columns})
+    return buffer.getvalue()
--- a/app/services/parser.py
+++ b/app/services/parser.py
@@ -0,0 +1,47 @@
+import shlex
+from collections import OrderedDict
+from io import BufferedIOBase, TextIOBase
+
+
+class LogParseError(ValueError):
+    """Raised when the uploaded log file cannot be parsed."""
+
+
+def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
+    """Parse a UTF-8 log file where each line contains shell-like key/value tokens."""
+    raw_bytes = stream.read()
+    if isinstance(raw_bytes, str):
+        content = raw_bytes
+    else:
+        content = raw_bytes.decode("utf-8")
+
+    records: list[dict[str, str]] = []
+    seen_keys: OrderedDict[str, None] = OrderedDict()
+
+    for line_number, raw_line in enumerate(content.splitlines(), start=1):
+        line = raw_line.strip()
+        if not line:
+            continue
+
+        try:
+            tokens = shlex.split(line, posix=True)
+        except ValueError as exc:
+            raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc
+
+        record: dict[str, str] = {}
+        for token in tokens:
+            if "=" not in token:
+                raise LogParseError(
+                    f"Line {line_number}: token '{token}' is missing '='."
+                )
+
+            key, value = token.split("=", 1)
+            if not key:
+                raise LogParseError(f"Line {line_number}: empty key is not allowed.")
+
+            record[key] = value
+            seen_keys.setdefault(key, None)
+
+        records.append(record)
+
+    return records, list(seen_keys.keys())
--- a/app/services/processing.py
+++ b/app/services/processing.py
@@ -0,0 +1,78 @@
+from dataclasses import dataclass
+from datetime import datetime
+
+from app.constants import SEVERITY_RANKING
+
+
+class ProcessingError(ValueError):
+    """Raised when records cannot be processed according to the selected options."""
+
+
+@dataclass(slots=True)
+class ProcessingOptions:
+    policy_cs: str
+    policy_ci: str
+    severity_cs: str
+    severity_ci: str
+    sort_by: str
+    order: str
+    mode: str
+
+
+def filter_records(
+    records: list[dict[str, str]], options: ProcessingOptions
+) -> list[dict[str, str]]:
+    """Apply user-selected filters to parsed records."""
+    filtered: list[dict[str, str]] = []
+
+    for record in records:
+        policy_value = record.get("policy", "")
+        severity_value = record.get("severity_level", "")
+
+        if options.policy_cs and options.policy_cs not in policy_value:
+            continue
+        if options.policy_ci and options.policy_ci.lower() not in policy_value.lower():
+            continue
+        if options.severity_cs and options.severity_cs not in severity_value:
+            continue
+        if options.severity_ci and options.severity_ci.lower() not in severity_value.lower():
+            continue
+
+        filtered.append(record)
+
+    return filtered
+
+
+def sort_records(
+    records: list[dict[str, str]], options: ProcessingOptions
+) -> list[dict[str, str]]:
+    """Sort records by datetime or severity using the requested order."""
+    reverse = options.order == "desc"
+
+    if options.sort_by == "datetime":
+        key_func = _datetime_key
+    elif options.sort_by == "severity":
+        key_func = _severity_key
+    else:
+        raise ProcessingError("Unsupported sort field.")
+
+    return sorted(records, key=key_func, reverse=reverse)
+
+
+def _datetime_key(record: dict[str, str]) -> tuple[int, datetime]:
+    date_value = record.get("v015xxxxdate", "").strip()
+    time_value = record.get("time", "").strip()
+    if not date_value or not time_value:
+        return (1, datetime.max)
+
+    try:
+        parsed = datetime.strptime(f"{date_value} {time_value}", "%Y-%m-%d %H:%M:%S")
+    except ValueError:
+        return (1, datetime.max)
+    return (0, parsed)
+
+
+def _severity_key(record: dict[str, str]) -> tuple[int, str]:
+    raw_value = record.get("severity_level", "").strip().lower()
+    rank = SEVERITY_RANKING.get(raw_value, 0)
+    return (rank, raw_value)
--- a/app/services/storage.py
+++ b/app/services/storage.py
@@ -0,0 +1,43 @@
+import json
+import uuid
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+from app.services.exporter import ExportResult
+
+
+@dataclass(slots=True)
+class ResultMetadata:
+    result_id: str
+    file_path: str
+    download_name: str
+    mimetype: str
+
+
+def persist_result(output_dir: Path, export_result: ExportResult) -> ResultMetadata:
+    """Persist generated output and sidecar metadata in a temporary directory."""
+    result_id = uuid.uuid4().hex
+    extension = "txt" if export_result.output_format == "text" else "csv"
+    mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8"
+
+    file_path = output_dir / f"{result_id}.{extension}"
+    metadata_path = output_dir / f"{result_id}.json"
+
+    file_path.write_text(export_result.content, encoding="utf-8")
+    metadata = ResultMetadata(
+        result_id=result_id,
+        file_path=str(file_path),
+        download_name=f"waf-report.{extension}",
+        mimetype=mimetype,
+    )
+    metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8")
+    return metadata
+
+
+def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None:
+    """Load sidecar metadata for a generated file."""
+    metadata_path = output_dir / f"{result_id}.json"
+    if not metadata_path.exists():
+        return None
+
+    return json.loads(metadata_path.read_text(encoding="utf-8"))
				`@@ -0,0 +1 @@`
				`"""Service layer for parsing, processing, exporting, and file storage."""`