Build Flask WAF log converter app
This commit is contained in:
1
app/services/__init__.py
Normal file
1
app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Service layer for parsing, processing, exporting, and file storage."""
|
||||
69
app/services/exporter.py
Normal file
69
app/services/exporter.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import csv
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.constants import VENDOR_FIELDS
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ExportResult:
|
||||
content: str
|
||||
columns: list[str]
|
||||
output_format: str
|
||||
|
||||
def preview(self, record_limit: int) -> str:
|
||||
"""Build a small preview string for the result page."""
|
||||
if self.output_format == "text":
|
||||
marker = f"--- record {record_limit + 1} ---"
|
||||
if marker in self.content:
|
||||
return self.content.split(marker, 1)[0].rstrip()
|
||||
return self.content
|
||||
|
||||
lines = self.content.splitlines()
|
||||
if len(lines) <= record_limit + 1:
|
||||
return self.content
|
||||
return "\n".join(lines[: record_limit + 1])
|
||||
|
||||
|
||||
def build_export(
|
||||
records: list[dict[str, str]],
|
||||
union_keys: list[str],
|
||||
mode: str,
|
||||
output_format: str,
|
||||
) -> ExportResult:
|
||||
columns = VENDOR_FIELDS if mode == "vendor" else union_keys
|
||||
|
||||
if output_format == "text":
|
||||
return ExportResult(
|
||||
content=_render_text(records, columns),
|
||||
columns=columns,
|
||||
output_format=output_format,
|
||||
)
|
||||
|
||||
return ExportResult(
|
||||
content=_render_csv(records, columns),
|
||||
columns=columns,
|
||||
output_format=output_format,
|
||||
)
|
||||
|
||||
|
||||
def _render_text(records: list[dict[str, str]], columns: list[str]) -> str:
|
||||
max_key_length = max((len(column) for column in columns), default=0)
|
||||
chunks: list[str] = []
|
||||
|
||||
for index, record in enumerate(records, start=1):
|
||||
chunks.append(f"--- record {index} ---")
|
||||
for column in columns:
|
||||
value = record.get(column, "")
|
||||
chunks.append(f" {column.ljust(max_key_length)} = {value}")
|
||||
|
||||
return "\n".join(chunks)
|
||||
|
||||
|
||||
def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str:
|
||||
buffer = io.StringIO()
|
||||
writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for record in records:
|
||||
writer.writerow({column: record.get(column, "") for column in columns})
|
||||
return buffer.getvalue()
|
||||
47
app/services/parser.py
Normal file
47
app/services/parser.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import shlex
|
||||
from collections import OrderedDict
|
||||
from io import BufferedIOBase, TextIOBase
|
||||
|
||||
|
||||
class LogParseError(ValueError):
|
||||
"""Raised when the uploaded log file cannot be parsed."""
|
||||
|
||||
|
||||
def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
|
||||
"""Parse a UTF-8 log file where each line contains shell-like key/value tokens."""
|
||||
raw_bytes = stream.read()
|
||||
if isinstance(raw_bytes, str):
|
||||
content = raw_bytes
|
||||
else:
|
||||
content = raw_bytes.decode("utf-8")
|
||||
|
||||
records: list[dict[str, str]] = []
|
||||
seen_keys: OrderedDict[str, None] = OrderedDict()
|
||||
|
||||
for line_number, raw_line in enumerate(content.splitlines(), start=1):
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
tokens = shlex.split(line, posix=True)
|
||||
except ValueError as exc:
|
||||
raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc
|
||||
|
||||
record: dict[str, str] = {}
|
||||
for token in tokens:
|
||||
if "=" not in token:
|
||||
raise LogParseError(
|
||||
f"Line {line_number}: token '{token}' is missing '='."
|
||||
)
|
||||
|
||||
key, value = token.split("=", 1)
|
||||
if not key:
|
||||
raise LogParseError(f"Line {line_number}: empty key is not allowed.")
|
||||
|
||||
record[key] = value
|
||||
seen_keys.setdefault(key, None)
|
||||
|
||||
records.append(record)
|
||||
|
||||
return records, list(seen_keys.keys())
|
||||
78
app/services/processing.py
Normal file
78
app/services/processing.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
from app.constants import SEVERITY_RANKING
|
||||
|
||||
|
||||
class ProcessingError(ValueError):
|
||||
"""Raised when records cannot be processed according to the selected options."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ProcessingOptions:
|
||||
policy_cs: str
|
||||
policy_ci: str
|
||||
severity_cs: str
|
||||
severity_ci: str
|
||||
sort_by: str
|
||||
order: str
|
||||
mode: str
|
||||
|
||||
|
||||
def filter_records(
|
||||
records: list[dict[str, str]], options: ProcessingOptions
|
||||
) -> list[dict[str, str]]:
|
||||
"""Apply user-selected filters to parsed records."""
|
||||
filtered: list[dict[str, str]] = []
|
||||
|
||||
for record in records:
|
||||
policy_value = record.get("policy", "")
|
||||
severity_value = record.get("severity_level", "")
|
||||
|
||||
if options.policy_cs and options.policy_cs not in policy_value:
|
||||
continue
|
||||
if options.policy_ci and options.policy_ci.lower() not in policy_value.lower():
|
||||
continue
|
||||
if options.severity_cs and options.severity_cs not in severity_value:
|
||||
continue
|
||||
if options.severity_ci and options.severity_ci.lower() not in severity_value.lower():
|
||||
continue
|
||||
|
||||
filtered.append(record)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def sort_records(
|
||||
records: list[dict[str, str]], options: ProcessingOptions
|
||||
) -> list[dict[str, str]]:
|
||||
"""Sort records by datetime or severity using the requested order."""
|
||||
reverse = options.order == "desc"
|
||||
|
||||
if options.sort_by == "datetime":
|
||||
key_func = _datetime_key
|
||||
elif options.sort_by == "severity":
|
||||
key_func = _severity_key
|
||||
else:
|
||||
raise ProcessingError("Unsupported sort field.")
|
||||
|
||||
return sorted(records, key=key_func, reverse=reverse)
|
||||
|
||||
|
||||
def _datetime_key(record: dict[str, str]) -> tuple[int, datetime]:
|
||||
date_value = record.get("v015xxxxdate", "").strip()
|
||||
time_value = record.get("time", "").strip()
|
||||
if not date_value or not time_value:
|
||||
return (1, datetime.max)
|
||||
|
||||
try:
|
||||
parsed = datetime.strptime(f"{date_value} {time_value}", "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
return (1, datetime.max)
|
||||
return (0, parsed)
|
||||
|
||||
|
||||
def _severity_key(record: dict[str, str]) -> tuple[int, str]:
|
||||
raw_value = record.get("severity_level", "").strip().lower()
|
||||
rank = SEVERITY_RANKING.get(raw_value, 0)
|
||||
return (rank, raw_value)
|
||||
43
app/services/storage.py
Normal file
43
app/services/storage.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.exporter import ExportResult
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ResultMetadata:
|
||||
result_id: str
|
||||
file_path: str
|
||||
download_name: str
|
||||
mimetype: str
|
||||
|
||||
|
||||
def persist_result(output_dir: Path, export_result: ExportResult) -> ResultMetadata:
|
||||
"""Persist generated output and sidecar metadata in a temporary directory."""
|
||||
result_id = uuid.uuid4().hex
|
||||
extension = "txt" if export_result.output_format == "text" else "csv"
|
||||
mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8"
|
||||
|
||||
file_path = output_dir / f"{result_id}.{extension}"
|
||||
metadata_path = output_dir / f"{result_id}.json"
|
||||
|
||||
file_path.write_text(export_result.content, encoding="utf-8")
|
||||
metadata = ResultMetadata(
|
||||
result_id=result_id,
|
||||
file_path=str(file_path),
|
||||
download_name=f"waf-report.{extension}",
|
||||
mimetype=mimetype,
|
||||
)
|
||||
metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8")
|
||||
return metadata
|
||||
|
||||
|
||||
def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None:
|
||||
"""Load sidecar metadata for a generated file."""
|
||||
metadata_path = output_dir / f"{result_id}.json"
|
||||
if not metadata_path.exists():
|
||||
return None
|
||||
|
||||
return json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||
Reference in New Issue
Block a user