Build Flask WAF log converter app

This commit is contained in:
Alfredo Di Stasio
2026-04-24 14:40:32 +02:00
parent f9579bd253
commit 355d61f11f
23 changed files with 1053 additions and 1 deletions

1
app/services/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Service layer for parsing, processing, exporting, and file storage."""

69
app/services/exporter.py Normal file
View File

@@ -0,0 +1,69 @@
import csv
import io
from dataclasses import dataclass
from app.constants import VENDOR_FIELDS
@dataclass(slots=True)
class ExportResult:
content: str
columns: list[str]
output_format: str
def preview(self, record_limit: int) -> str:
"""Build a small preview string for the result page."""
if self.output_format == "text":
marker = f"--- record {record_limit + 1} ---"
if marker in self.content:
return self.content.split(marker, 1)[0].rstrip()
return self.content
lines = self.content.splitlines()
if len(lines) <= record_limit + 1:
return self.content
return "\n".join(lines[: record_limit + 1])
def build_export(
records: list[dict[str, str]],
union_keys: list[str],
mode: str,
output_format: str,
) -> ExportResult:
columns = VENDOR_FIELDS if mode == "vendor" else union_keys
if output_format == "text":
return ExportResult(
content=_render_text(records, columns),
columns=columns,
output_format=output_format,
)
return ExportResult(
content=_render_csv(records, columns),
columns=columns,
output_format=output_format,
)
def _render_text(records: list[dict[str, str]], columns: list[str]) -> str:
max_key_length = max((len(column) for column in columns), default=0)
chunks: list[str] = []
for index, record in enumerate(records, start=1):
chunks.append(f"--- record {index} ---")
for column in columns:
value = record.get(column, "")
chunks.append(f" {column.ljust(max_key_length)} = {value}")
return "\n".join(chunks)
def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str:
buffer = io.StringIO()
writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore")
writer.writeheader()
for record in records:
writer.writerow({column: record.get(column, "") for column in columns})
return buffer.getvalue()

47
app/services/parser.py Normal file
View File

@@ -0,0 +1,47 @@
import shlex
from collections import OrderedDict
from io import BufferedIOBase, TextIOBase
class LogParseError(ValueError):
"""Raised when the uploaded log file cannot be parsed."""
def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
"""Parse a UTF-8 log file where each line contains shell-like key/value tokens."""
raw_bytes = stream.read()
if isinstance(raw_bytes, str):
content = raw_bytes
else:
content = raw_bytes.decode("utf-8")
records: list[dict[str, str]] = []
seen_keys: OrderedDict[str, None] = OrderedDict()
for line_number, raw_line in enumerate(content.splitlines(), start=1):
line = raw_line.strip()
if not line:
continue
try:
tokens = shlex.split(line, posix=True)
except ValueError as exc:
raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc
record: dict[str, str] = {}
for token in tokens:
if "=" not in token:
raise LogParseError(
f"Line {line_number}: token '{token}' is missing '='."
)
key, value = token.split("=", 1)
if not key:
raise LogParseError(f"Line {line_number}: empty key is not allowed.")
record[key] = value
seen_keys.setdefault(key, None)
records.append(record)
return records, list(seen_keys.keys())

View File

@@ -0,0 +1,78 @@
from dataclasses import dataclass
from datetime import datetime
from app.constants import SEVERITY_RANKING
class ProcessingError(ValueError):
"""Raised when records cannot be processed according to the selected options."""
@dataclass(slots=True)
class ProcessingOptions:
policy_cs: str
policy_ci: str
severity_cs: str
severity_ci: str
sort_by: str
order: str
mode: str
def filter_records(
records: list[dict[str, str]], options: ProcessingOptions
) -> list[dict[str, str]]:
"""Apply user-selected filters to parsed records."""
filtered: list[dict[str, str]] = []
for record in records:
policy_value = record.get("policy", "")
severity_value = record.get("severity_level", "")
if options.policy_cs and options.policy_cs not in policy_value:
continue
if options.policy_ci and options.policy_ci.lower() not in policy_value.lower():
continue
if options.severity_cs and options.severity_cs not in severity_value:
continue
if options.severity_ci and options.severity_ci.lower() not in severity_value.lower():
continue
filtered.append(record)
return filtered
def sort_records(
records: list[dict[str, str]], options: ProcessingOptions
) -> list[dict[str, str]]:
"""Sort records by datetime or severity using the requested order."""
reverse = options.order == "desc"
if options.sort_by == "datetime":
key_func = _datetime_key
elif options.sort_by == "severity":
key_func = _severity_key
else:
raise ProcessingError("Unsupported sort field.")
return sorted(records, key=key_func, reverse=reverse)
def _datetime_key(record: dict[str, str]) -> tuple[int, datetime]:
date_value = record.get("v015xxxxdate", "").strip()
time_value = record.get("time", "").strip()
if not date_value or not time_value:
return (1, datetime.max)
try:
parsed = datetime.strptime(f"{date_value} {time_value}", "%Y-%m-%d %H:%M:%S")
except ValueError:
return (1, datetime.max)
return (0, parsed)
def _severity_key(record: dict[str, str]) -> tuple[int, str]:
raw_value = record.get("severity_level", "").strip().lower()
rank = SEVERITY_RANKING.get(raw_value, 0)
return (rank, raw_value)

43
app/services/storage.py Normal file
View File

@@ -0,0 +1,43 @@
import json
import uuid
from dataclasses import asdict, dataclass
from pathlib import Path
from app.services.exporter import ExportResult
@dataclass(slots=True)
class ResultMetadata:
result_id: str
file_path: str
download_name: str
mimetype: str
def persist_result(output_dir: Path, export_result: ExportResult) -> ResultMetadata:
"""Persist generated output and sidecar metadata in a temporary directory."""
result_id = uuid.uuid4().hex
extension = "txt" if export_result.output_format == "text" else "csv"
mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8"
file_path = output_dir / f"{result_id}.{extension}"
metadata_path = output_dir / f"{result_id}.json"
file_path.write_text(export_result.content, encoding="utf-8")
metadata = ResultMetadata(
result_id=result_id,
file_path=str(file_path),
download_name=f"waf-report.{extension}",
mimetype=mimetype,
)
metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8")
return metadata
def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None:
"""Load sidecar metadata for a generated file."""
metadata_path = output_dir / f"{result_id}.json"
if not metadata_path.exists():
return None
return json.loads(metadata_path.read_text(encoding="utf-8"))