From 355d61f11fd9305217ae3b8c7bcf071c7ce967b8 Mon Sep 17 00:00:00 2001 From: Alfredo Di Stasio Date: Fri, 24 Apr 2026 14:40:32 +0200 Subject: [PATCH] Build Flask WAF log converter app --- .dockerignore | 6 ++ .gitignore | 7 ++ Dockerfile | 37 +++++++++ README.md | 89 +++++++++++++++++++++- app/__init__.py | 28 +++++++ app/config.py | 13 ++++ app/constants.py | 35 +++++++++ app/routes.py | 150 +++++++++++++++++++++++++++++++++++++ app/services/__init__.py | 1 + app/services/exporter.py | 69 +++++++++++++++++ app/services/parser.py | 47 ++++++++++++ app/services/processing.py | 78 +++++++++++++++++++ app/services/storage.py | 43 +++++++++++ app/templates/base.html | 38 ++++++++++ app/templates/index.html | 100 +++++++++++++++++++++++++ app/templates/result.html | 45 +++++++++++ compose.yaml | 17 +++++ pyproject.toml | 28 +++++++ tests/conftest.py | 26 +++++++ tests/test_app.py | 118 +++++++++++++++++++++++++++++ tests/test_parser.py | 30 ++++++++ tests/test_processing.py | 46 ++++++++++++ wsgi.py | 3 + 23 files changed, 1053 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 app/__init__.py create mode 100644 app/config.py create mode 100644 app/constants.py create mode 100644 app/routes.py create mode 100644 app/services/__init__.py create mode 100644 app/services/exporter.py create mode 100644 app/services/parser.py create mode 100644 app/services/processing.py create mode 100644 app/services/storage.py create mode 100644 app/templates/base.html create mode 100644 app/templates/index.html create mode 100644 app/templates/result.html create mode 100644 compose.yaml create mode 100644 pyproject.toml create mode 100644 tests/conftest.py create mode 100644 tests/test_app.py create mode 100644 tests/test_parser.py create mode 100644 tests/test_processing.py create mode 100644 wsgi.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..adea748 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.git +.pytest_cache +__pycache__ +*.pyc +instance +.venv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1933bf8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +.pytest_cache/ +*.pyc +*.pyo +*.egg-info/ +.venv/ +instance/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..acf7c90 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.12-slim AS base + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY pyproject.toml README.md ./ +COPY app ./app +COPY tests ./tests +COPY wsgi.py ./ + +RUN useradd --create-home appuser && \ + mkdir -p /app/instance/outputs && \ + chown -R appuser:appuser /app + +FROM base AS production + +ENV OUTPUT_DIRECTORY=/app/instance/outputs + +RUN pip install --no-cache-dir . + +USER appuser + +EXPOSE 8000 + +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "2", "--threads", "4", "wsgi:app"] + +FROM base AS test + +ENV OUTPUT_DIRECTORY=/app/instance/outputs + +RUN pip install --no-cache-dir ".[dev]" + +USER appuser + +CMD ["python", "-m", "pytest"] diff --git a/README.md b/README.md index 007658c..dc69a5e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,90 @@ # webfortilog -Flask based application to convert FortiWeb logs \ No newline at end of file +Flask-based web application that converts WAF log files into aligned text reports or CSV exports. + +## Features + +- Upload a UTF-8 log file where each line is a single record +- Parse shell-style `key=value` and `key="value with spaces"` tokens +- Support `vendor` mode with fixed columns and `full` mode with dynamic columns +- Filter by policy and severity with case-sensitive or case-insensitive partial matching +- Sort by combined datetime or severity ranking +- Preview results in the browser and download the generated file +- Run locally with Flask or in Docker with Gunicorn + +## Project structure + +```text +app/ + services/ + templates/ +tests/ +Dockerfile +pyproject.toml +wsgi.py +``` + +## Local usage + +### Requirements + +- Python 3.12 + +### Install + +```bash +python3.12 -m venv .venv +source .venv/bin/activate +pip install -e ".[dev]" +``` + +### Run + +```bash +export FLASK_APP=wsgi.py +flask run --debug +``` + +Open `http://127.0.0.1:5000`. + +### Test + +```bash +pytest +``` + +## Docker usage + +### Build + +```bash +docker build -t webfortilog . +``` + +### Run + +```bash +docker run --rm -p 8000:8000 webfortilog +``` + +Open `http://127.0.0.1:8000`. + +## Docker Compose usage + +### Start the web app + +```bash +docker compose up --build web +``` + +### Run the test suite in a container + +```bash +docker compose run --rm test +``` + +## Notes + +- Temporary output files are written to `instance/outputs` +- The application does not require a database +- Gunicorn is used as the production WSGI server diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..387d243 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,28 @@ +from pathlib import Path + +from flask import Flask, flash, render_template +from werkzeug.exceptions import RequestEntityTooLarge + +from app.config import Config +from app.routes import main_blueprint + + +def create_app(config_class: type[Config] = Config) -> Flask: + """Application factory used by Flask and Gunicorn.""" + app = Flask(__name__, instance_relative_config=True) + app.config.from_object(config_class) + + output_dir = Path(app.config["OUTPUT_DIRECTORY"]) + if not output_dir.is_absolute(): + output_dir = Path(app.instance_path) / output_dir + app.config["OUTPUT_DIRECTORY"] = output_dir + output_dir.mkdir(parents=True, exist_ok=True) + + app.register_blueprint(main_blueprint) + + @app.errorhandler(RequestEntityTooLarge) + def handle_file_too_large(_error): + flash("The uploaded file is too large.", "danger") + return render_template("index.html"), 413 + + return app diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..441a55e --- /dev/null +++ b/app/config.py @@ -0,0 +1,13 @@ +import os +from pathlib import Path + + +class Config: + """Default configuration for local and container usage.""" + + SECRET_KEY = os.environ.get("SECRET_KEY", "dev-secret-key-change-me") + MAX_CONTENT_LENGTH = int(os.environ.get("MAX_CONTENT_LENGTH", 10 * 1024 * 1024)) + PREVIEW_RECORD_LIMIT = int(os.environ.get("PREVIEW_RECORD_LIMIT", 5)) + OUTPUT_DIRECTORY = Path( + os.environ.get("OUTPUT_DIRECTORY", Path("instance") / "outputs") + ) diff --git a/app/constants.py b/app/constants.py new file mode 100644 index 0000000..f6d9cc1 --- /dev/null +++ b/app/constants.py @@ -0,0 +1,35 @@ +VENDOR_FIELDS = [ + "v015xxxxdate", + "time", + "policy", + "http_method", + "http_host", + "http_url", + "http_refer", + "service", + "backend_service", + "msg", + "signature_subclass", + "signature_id", + "owasp_top10", + "match_location", + "action", + "severity_level", +] + +SEVERITY_RANKING = { + "critical": 5, + "high": 4, + "medium": 3, + "low": 2, + "info": 1, + "informational": 1, + "unknown": 0, + "none": 0, + "n/a": 0, +} + +SORTABLE_FIELDS = {"datetime", "severity"} +SORT_ORDERS = {"asc", "desc"} +MODES = {"vendor", "full"} +OUTPUT_FORMATS = {"text", "csv"} diff --git a/app/routes.py b/app/routes.py new file mode 100644 index 0000000..1390d9c --- /dev/null +++ b/app/routes.py @@ -0,0 +1,150 @@ +from dataclasses import dataclass +from pathlib import Path + +from flask import ( + Blueprint, + current_app, + flash, + redirect, + render_template, + request, + send_file, + url_for, +) +from werkzeug.datastructures import FileStorage + +from app.constants import MODES, OUTPUT_FORMATS, SORTABLE_FIELDS, SORT_ORDERS +from app.services.exporter import build_export +from app.services.parser import LogParseError, parse_log_file +from app.services.processing import ( + ProcessingError, + ProcessingOptions, + filter_records, + sort_records, +) +from app.services.storage import load_result_metadata, persist_result + +main_blueprint = Blueprint("main", __name__) + + +@dataclass(slots=True) +class FormData: + mode: str + output_format: str + sort_by: str + order: str + policy_cs: str + policy_ci: str + severity_cs: str + severity_ci: str + + +def _normalize_form() -> FormData: + return FormData( + mode=request.form.get("mode", "vendor").strip(), + output_format=request.form.get("output_format", "text").strip(), + sort_by=request.form.get("sort_by", "datetime").strip(), + order=request.form.get("order", "asc").strip(), + policy_cs=request.form.get("policy_cs", "").strip(), + policy_ci=request.form.get("policy_ci", "").strip(), + severity_cs=request.form.get("severity_cs", "").strip(), + severity_ci=request.form.get("severity_ci", "").strip(), + ) + + +def _validate_form(file: FileStorage | None, form: FormData) -> list[str]: + errors: list[str] = [] + + if file is None or not file.filename: + errors.append("Please choose a log file to upload.") + + if form.mode not in MODES: + errors.append("Invalid mode selection.") + if form.output_format not in OUTPUT_FORMATS: + errors.append("Invalid output format selection.") + if form.sort_by not in SORTABLE_FIELDS: + errors.append("Invalid sort field selection.") + if form.order not in SORT_ORDERS: + errors.append("Invalid sort order selection.") + if form.policy_cs and form.policy_ci: + errors.append( + "Policy filter must use either case-sensitive or case-insensitive match, not both." + ) + if form.severity_cs and form.severity_ci: + errors.append( + "Severity filter must use either case-sensitive or case-insensitive match, not both." + ) + return errors + + +@main_blueprint.get("/") +def index(): + return render_template("index.html") + + +@main_blueprint.post("/convert") +def convert(): + uploaded_file = request.files.get("log_file") + form = _normalize_form() + errors = _validate_form(uploaded_file, form) + if errors: + for error in errors: + flash(error, "danger") + return render_template("index.html", form=form), 400 + + assert uploaded_file is not None + + try: + records, union_keys = parse_log_file(uploaded_file.stream) + options = ProcessingOptions( + policy_cs=form.policy_cs, + policy_ci=form.policy_ci, + severity_cs=form.severity_cs, + severity_ci=form.severity_ci, + sort_by=form.sort_by, + order=form.order, + mode=form.mode, + ) + filtered_records = filter_records(records, options) + sorted_records = sort_records(filtered_records, options) + export_result = build_export(sorted_records, union_keys, form.mode, form.output_format) + metadata = persist_result( + output_dir=current_app.config["OUTPUT_DIRECTORY"], + export_result=export_result, + ) + except (LogParseError, ProcessingError) as exc: + flash(str(exc), "danger") + return render_template("index.html", form=form), 400 + except UnicodeDecodeError: + flash("The uploaded file is not valid UTF-8 text.", "danger") + return render_template("index.html", form=form), 400 + + preview_limit = current_app.config["PREVIEW_RECORD_LIMIT"] + return render_template( + "result.html", + result_id=metadata.result_id, + preview_text=export_result.preview(preview_limit), + output_format=form.output_format, + record_count=len(sorted_records), + parsed_count=len(records), + filtered_count=len(sorted_records), + mode=form.mode, + sort_by=form.sort_by, + order=form.order, + ) + + +@main_blueprint.get("/download/") +def download(result_id: str): + metadata = load_result_metadata(current_app.config["OUTPUT_DIRECTORY"], result_id) + if metadata is None: + flash("Requested output file could not be found.", "danger") + return redirect(url_for("main.index")) + + return send_file( + Path(metadata["file_path"]), + as_attachment=True, + download_name=metadata["download_name"], + mimetype=metadata["mimetype"], + max_age=0, + ) diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..3ef3685 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1 @@ +"""Service layer for parsing, processing, exporting, and file storage.""" diff --git a/app/services/exporter.py b/app/services/exporter.py new file mode 100644 index 0000000..917883b --- /dev/null +++ b/app/services/exporter.py @@ -0,0 +1,69 @@ +import csv +import io +from dataclasses import dataclass + +from app.constants import VENDOR_FIELDS + + +@dataclass(slots=True) +class ExportResult: + content: str + columns: list[str] + output_format: str + + def preview(self, record_limit: int) -> str: + """Build a small preview string for the result page.""" + if self.output_format == "text": + marker = f"--- record {record_limit + 1} ---" + if marker in self.content: + return self.content.split(marker, 1)[0].rstrip() + return self.content + + lines = self.content.splitlines() + if len(lines) <= record_limit + 1: + return self.content + return "\n".join(lines[: record_limit + 1]) + + +def build_export( + records: list[dict[str, str]], + union_keys: list[str], + mode: str, + output_format: str, +) -> ExportResult: + columns = VENDOR_FIELDS if mode == "vendor" else union_keys + + if output_format == "text": + return ExportResult( + content=_render_text(records, columns), + columns=columns, + output_format=output_format, + ) + + return ExportResult( + content=_render_csv(records, columns), + columns=columns, + output_format=output_format, + ) + + +def _render_text(records: list[dict[str, str]], columns: list[str]) -> str: + max_key_length = max((len(column) for column in columns), default=0) + chunks: list[str] = [] + + for index, record in enumerate(records, start=1): + chunks.append(f"--- record {index} ---") + for column in columns: + value = record.get(column, "") + chunks.append(f" {column.ljust(max_key_length)} = {value}") + + return "\n".join(chunks) + + +def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str: + buffer = io.StringIO() + writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore") + writer.writeheader() + for record in records: + writer.writerow({column: record.get(column, "") for column in columns}) + return buffer.getvalue() diff --git a/app/services/parser.py b/app/services/parser.py new file mode 100644 index 0000000..13cd50e --- /dev/null +++ b/app/services/parser.py @@ -0,0 +1,47 @@ +import shlex +from collections import OrderedDict +from io import BufferedIOBase, TextIOBase + + +class LogParseError(ValueError): + """Raised when the uploaded log file cannot be parsed.""" + + +def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]: + """Parse a UTF-8 log file where each line contains shell-like key/value tokens.""" + raw_bytes = stream.read() + if isinstance(raw_bytes, str): + content = raw_bytes + else: + content = raw_bytes.decode("utf-8") + + records: list[dict[str, str]] = [] + seen_keys: OrderedDict[str, None] = OrderedDict() + + for line_number, raw_line in enumerate(content.splitlines(), start=1): + line = raw_line.strip() + if not line: + continue + + try: + tokens = shlex.split(line, posix=True) + except ValueError as exc: + raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc + + record: dict[str, str] = {} + for token in tokens: + if "=" not in token: + raise LogParseError( + f"Line {line_number}: token '{token}' is missing '='." + ) + + key, value = token.split("=", 1) + if not key: + raise LogParseError(f"Line {line_number}: empty key is not allowed.") + + record[key] = value + seen_keys.setdefault(key, None) + + records.append(record) + + return records, list(seen_keys.keys()) diff --git a/app/services/processing.py b/app/services/processing.py new file mode 100644 index 0000000..f18439f --- /dev/null +++ b/app/services/processing.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass +from datetime import datetime + +from app.constants import SEVERITY_RANKING + + +class ProcessingError(ValueError): + """Raised when records cannot be processed according to the selected options.""" + + +@dataclass(slots=True) +class ProcessingOptions: + policy_cs: str + policy_ci: str + severity_cs: str + severity_ci: str + sort_by: str + order: str + mode: str + + +def filter_records( + records: list[dict[str, str]], options: ProcessingOptions +) -> list[dict[str, str]]: + """Apply user-selected filters to parsed records.""" + filtered: list[dict[str, str]] = [] + + for record in records: + policy_value = record.get("policy", "") + severity_value = record.get("severity_level", "") + + if options.policy_cs and options.policy_cs not in policy_value: + continue + if options.policy_ci and options.policy_ci.lower() not in policy_value.lower(): + continue + if options.severity_cs and options.severity_cs not in severity_value: + continue + if options.severity_ci and options.severity_ci.lower() not in severity_value.lower(): + continue + + filtered.append(record) + + return filtered + + +def sort_records( + records: list[dict[str, str]], options: ProcessingOptions +) -> list[dict[str, str]]: + """Sort records by datetime or severity using the requested order.""" + reverse = options.order == "desc" + + if options.sort_by == "datetime": + key_func = _datetime_key + elif options.sort_by == "severity": + key_func = _severity_key + else: + raise ProcessingError("Unsupported sort field.") + + return sorted(records, key=key_func, reverse=reverse) + + +def _datetime_key(record: dict[str, str]) -> tuple[int, datetime]: + date_value = record.get("v015xxxxdate", "").strip() + time_value = record.get("time", "").strip() + if not date_value or not time_value: + return (1, datetime.max) + + try: + parsed = datetime.strptime(f"{date_value} {time_value}", "%Y-%m-%d %H:%M:%S") + except ValueError: + return (1, datetime.max) + return (0, parsed) + + +def _severity_key(record: dict[str, str]) -> tuple[int, str]: + raw_value = record.get("severity_level", "").strip().lower() + rank = SEVERITY_RANKING.get(raw_value, 0) + return (rank, raw_value) diff --git a/app/services/storage.py b/app/services/storage.py new file mode 100644 index 0000000..d0e3d91 --- /dev/null +++ b/app/services/storage.py @@ -0,0 +1,43 @@ +import json +import uuid +from dataclasses import asdict, dataclass +from pathlib import Path + +from app.services.exporter import ExportResult + + +@dataclass(slots=True) +class ResultMetadata: + result_id: str + file_path: str + download_name: str + mimetype: str + + +def persist_result(output_dir: Path, export_result: ExportResult) -> ResultMetadata: + """Persist generated output and sidecar metadata in a temporary directory.""" + result_id = uuid.uuid4().hex + extension = "txt" if export_result.output_format == "text" else "csv" + mimetype = "text/plain; charset=utf-8" if extension == "txt" else "text/csv; charset=utf-8" + + file_path = output_dir / f"{result_id}.{extension}" + metadata_path = output_dir / f"{result_id}.json" + + file_path.write_text(export_result.content, encoding="utf-8") + metadata = ResultMetadata( + result_id=result_id, + file_path=str(file_path), + download_name=f"waf-report.{extension}", + mimetype=mimetype, + ) + metadata_path.write_text(json.dumps(asdict(metadata)), encoding="utf-8") + return metadata + + +def load_result_metadata(output_dir: Path, result_id: str) -> dict[str, str] | None: + """Load sidecar metadata for a generated file.""" + metadata_path = output_dir / f"{result_id}.json" + if not metadata_path.exists(): + return None + + return json.loads(metadata_path.read_text(encoding="utf-8")) diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000..fbf2c80 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,38 @@ + + + + + + WAF Log Converter + + + +
+
+
+
+

WAF Log Converter

+

+ Upload a UTF-8 WAF log file and export a filtered report as readable text or CSV. +

+
+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} + + {% block content %}{% endblock %} +
+
+
+ + diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100644 index 0000000..7961ec0 --- /dev/null +++ b/app/templates/index.html @@ -0,0 +1,100 @@ +{% extends "base.html" %} +{% set form = form or none %} +{% block content %} +
+
+
+
+ + +
Each line must contain one record using shell-like key/value tokens.
+
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ Use only one policy filter and one severity filter at a time. Matching happens as a partial substring. +
+ +
+ + +
+
+
+
+{% endblock %} diff --git a/app/templates/result.html b/app/templates/result.html new file mode 100644 index 0000000..7ca3f27 --- /dev/null +++ b/app/templates/result.html @@ -0,0 +1,45 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+
+

Result summary

+
+
Parsed records
+
{{ parsed_count }}
+
Output records
+
{{ filtered_count }}
+
Mode
+
{{ mode }}
+
Format
+
{{ output_format }}
+
Sort
+
{{ sort_by }} / {{ order }}
+
+ + +
+
+
+ +
+
+
+
+

Preview

+ Showing up to {{ record_count if record_count < 5 else 5 }} records +
+
{{ preview_text }}
+
+
+
+
+{% endblock %} diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..dc0d2f3 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,17 @@ +services: + web: + build: + context: . + target: production + ports: + - "8000:8000" + environment: + SECRET_KEY: change-me + OUTPUT_DIRECTORY: /app/instance/outputs + + test: + build: + context: . + target: test + environment: + OUTPUT_DIRECTORY: /app/instance/outputs diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3563c21 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "webfortilog" +version = "0.1.0" +description = "Flask application to convert WAF log files into text or CSV reports." +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "Flask>=3.0,<4.0", + "gunicorn>=22.0,<24.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0,<9.0", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] +filterwarnings = [ + "error", +] + +[tool.setuptools] +packages = ["app", "app.services"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ebcce3d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,26 @@ +import shutil +from pathlib import Path + +import pytest + +from app import create_app + + +class TestConfig: + TESTING = True + SECRET_KEY = "test-secret" + MAX_CONTENT_LENGTH = 1024 * 1024 + PREVIEW_RECORD_LIMIT = 5 + OUTPUT_DIRECTORY = "test-outputs" + + +@pytest.fixture() +def app(): + flask_app = create_app(TestConfig) + yield flask_app + shutil.rmtree(Path(flask_app.instance_path) / "test-outputs", ignore_errors=True) + + +@pytest.fixture() +def client(app): + return app.test_client() diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..f9d672b --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,118 @@ +import io + + +SAMPLE_LOG = ( + 'v015xxxxdate=2024-05-01 time=10:00:00 policy="Prod Policy" ' + 'http_method=GET http_host=example.com http_url="/login" ' + 'http_refer="https://ref.example" service=edge backend_service=api ' + 'msg="SQL injection blocked" signature_subclass=SQL signature_id=942100 ' + 'owasp_top10=A03 match_location=body action=blocked severity_level=high\n' + 'v015xxxxdate=2024-05-02 time=11:00:00 policy="Prod Policy" ' + 'http_method=POST http_host=example.com http_url="/checkout" ' + 'http_refer="https://shop.example" service=edge backend_service=orders ' + 'msg="XSS blocked" signature_subclass=XSS signature_id=941100 ' + 'owasp_top10=A03 match_location=query action=monitored severity_level=medium\n' +) + + +def test_index_page_loads(client): + response = client.get("/") + + assert response.status_code == 200 + assert b"WAF Log Converter" in response.data + + +def test_convert_returns_text_preview_and_download_link(client): + response = client.post( + "/convert", + data={ + "mode": "vendor", + "output_format": "text", + "sort_by": "severity", + "order": "desc", + "policy_cs": "", + "policy_ci": "prod", + "severity_cs": "", + "severity_ci": "", + "log_file": (io.BytesIO(SAMPLE_LOG.encode("utf-8")), "sample.log"), + }, + content_type="multipart/form-data", + ) + + assert response.status_code == 200 + assert b"Download export" in response.data + assert b"--- record 1 ---" in response.data + + +def test_convert_full_mode_csv_preserves_union_order(client): + response = client.post( + "/convert", + data={ + "mode": "full", + "output_format": "csv", + "sort_by": "datetime", + "order": "asc", + "policy_cs": "", + "policy_ci": "", + "severity_cs": "", + "severity_ci": "", + "log_file": (io.BytesIO(SAMPLE_LOG.encode("utf-8")), "sample.log"), + }, + content_type="multipart/form-data", + ) + + assert response.status_code == 200 + assert b"TEXT" not in response.data + assert b"Download export" in response.data + + +def test_convert_rejects_mutually_exclusive_filters(client): + response = client.post( + "/convert", + data={ + "mode": "vendor", + "output_format": "csv", + "sort_by": "datetime", + "order": "asc", + "policy_cs": "A", + "policy_ci": "a", + "severity_cs": "", + "severity_ci": "", + "log_file": (io.BytesIO(SAMPLE_LOG.encode("utf-8")), "sample.log"), + }, + content_type="multipart/form-data", + ) + + assert response.status_code == 400 + assert b"Policy filter must use either case-sensitive or case-insensitive match" in response.data + + +def test_download_route_returns_generated_file(client): + convert_response = client.post( + "/convert", + data={ + "mode": "vendor", + "output_format": "csv", + "sort_by": "datetime", + "order": "asc", + "policy_cs": "", + "policy_ci": "", + "severity_cs": "", + "severity_ci": "", + "log_file": (io.BytesIO(SAMPLE_LOG.encode("utf-8")), "sample.log"), + }, + content_type="multipart/form-data", + ) + + html = convert_response.data.decode("utf-8") + marker = '/download/' + start = html.index(marker) + len(marker) + end = html.index('"', start) + result_id = html[start:end] + + download_response = client.get(f"/download/{result_id}") + + assert download_response.status_code == 200 + assert download_response.headers["Content-Type"].startswith("text/csv") + assert b"v015xxxxdate,time,policy" in download_response.data + download_response.close() diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..52c6a89 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,30 @@ +import io + +import pytest + +from app.services.parser import LogParseError, parse_log_file + + +def test_parse_log_file_supports_shell_style_quotes(): + stream = io.BytesIO( + b'v015xxxxdate=2024-02-15 time=09:10:11 policy="Strict Policy" msg="blocked request"\n' + ) + + records, union_keys = parse_log_file(stream) + + assert records == [ + { + "v015xxxxdate": "2024-02-15", + "time": "09:10:11", + "policy": "Strict Policy", + "msg": "blocked request", + } + ] + assert union_keys == ["v015xxxxdate", "time", "policy", "msg"] + + +def test_parse_log_file_rejects_tokens_without_equals(): + stream = io.BytesIO(b"v015xxxxdate=2024-02-15 broken-token\n") + + with pytest.raises(LogParseError): + parse_log_file(stream) diff --git a/tests/test_processing.py b/tests/test_processing.py new file mode 100644 index 0000000..b961dfc --- /dev/null +++ b/tests/test_processing.py @@ -0,0 +1,46 @@ +from app.services.processing import ProcessingOptions, filter_records, sort_records + + +def test_filter_records_supports_case_insensitive_filters(): + records = [ + {"policy": "ProdPolicy", "severity_level": "HIGH"}, + {"policy": "OtherPolicy", "severity_level": "low"}, + ] + options = ProcessingOptions( + policy_cs="", + policy_ci="prod", + severity_cs="", + severity_ci="high", + sort_by="datetime", + order="asc", + mode="vendor", + ) + + filtered = filter_records(records, options) + + assert filtered == [{"policy": "ProdPolicy", "severity_level": "HIGH"}] + + +def test_sort_records_by_severity_desc_uses_defined_ranking(): + records = [ + {"severity_level": "medium"}, + {"severity_level": "critical"}, + {"severity_level": "info"}, + ] + options = ProcessingOptions( + policy_cs="", + policy_ci="", + severity_cs="", + severity_ci="", + sort_by="severity", + order="desc", + mode="vendor", + ) + + sorted_records = sort_records(records, options) + + assert [record["severity_level"] for record in sorted_records] == [ + "critical", + "medium", + "info", + ] diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..0a23b5a --- /dev/null +++ b/wsgi.py @@ -0,0 +1,3 @@ +from app import create_app + +app = create_app()