Build Flask WAF log converter app

This commit is contained in:
Alfredo Di Stasio
2026-04-24 14:40:32 +02:00
parent f9579bd253
commit 355d61f11f
23 changed files with 1053 additions and 1 deletions

47
app/services/parser.py Normal file
View File

@@ -0,0 +1,47 @@
import shlex
from collections import OrderedDict
from io import BufferedIOBase, TextIOBase
class LogParseError(ValueError):
"""Raised when the uploaded log file cannot be parsed."""
def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
"""Parse a UTF-8 log file where each line contains shell-like key/value tokens."""
raw_bytes = stream.read()
if isinstance(raw_bytes, str):
content = raw_bytes
else:
content = raw_bytes.decode("utf-8")
records: list[dict[str, str]] = []
seen_keys: OrderedDict[str, None] = OrderedDict()
for line_number, raw_line in enumerate(content.splitlines(), start=1):
line = raw_line.strip()
if not line:
continue
try:
tokens = shlex.split(line, posix=True)
except ValueError as exc:
raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc
record: dict[str, str] = {}
for token in tokens:
if "=" not in token:
raise LogParseError(
f"Line {line_number}: token '{token}' is missing '='."
)
key, value = token.split("=", 1)
if not key:
raise LogParseError(f"Line {line_number}: empty key is not allowed.")
record[key] = value
seen_keys.setdefault(key, None)
records.append(record)
return records, list(seen_keys.keys())