Improve log upload handling
This commit is contained in:
@@ -7,6 +7,15 @@ from app.config import Config
|
||||
from app.routes import main_blueprint
|
||||
|
||||
|
||||
def _format_size_limit(size_limit_bytes: int) -> str:
|
||||
"""Render the upload limit in a friendly unit for error messages."""
|
||||
if size_limit_bytes >= 1024 * 1024:
|
||||
return f"{size_limit_bytes / (1024 * 1024):.0f} MB"
|
||||
if size_limit_bytes >= 1024:
|
||||
return f"{size_limit_bytes / 1024:.0f} KB"
|
||||
return f"{size_limit_bytes} bytes"
|
||||
|
||||
|
||||
def create_app(config_class: type[Config] = Config) -> Flask:
|
||||
"""Application factory used by Flask and Gunicorn."""
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
@@ -22,7 +31,11 @@ def create_app(config_class: type[Config] = Config) -> Flask:
|
||||
|
||||
@app.errorhandler(RequestEntityTooLarge)
|
||||
def handle_file_too_large(_error):
|
||||
flash("The uploaded file is too large.", "danger")
|
||||
size_limit_bytes = int(app.config["MAX_CONTENT_LENGTH"])
|
||||
flash(
|
||||
f"The uploaded file is too large. Maximum allowed size is {_format_size_limit(size_limit_bytes)}.",
|
||||
"danger",
|
||||
)
|
||||
return render_template("index.html"), 413
|
||||
|
||||
return app
|
||||
|
||||
@@ -2,11 +2,25 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _get_max_content_length() -> int:
|
||||
"""Resolve the upload size limit from environment settings."""
|
||||
upload_limit_mb = os.environ.get("MAX_UPLOAD_SIZE_MB")
|
||||
if upload_limit_mb:
|
||||
return int(upload_limit_mb) * 1024 * 1024
|
||||
|
||||
max_content_length = os.environ.get("MAX_CONTENT_LENGTH")
|
||||
if max_content_length:
|
||||
return int(max_content_length)
|
||||
|
||||
return 100 * 1024 * 1024
|
||||
|
||||
|
||||
class Config:
|
||||
"""Default configuration for local and container usage."""
|
||||
|
||||
SECRET_KEY = os.environ.get("SECRET_KEY", "dev-secret-key-change-me")
|
||||
MAX_CONTENT_LENGTH = int(os.environ.get("MAX_CONTENT_LENGTH", 10 * 1024 * 1024))
|
||||
# Default to 100 MiB so larger WAF exports can be processed without tuning.
|
||||
MAX_CONTENT_LENGTH = _get_max_content_length()
|
||||
PREVIEW_RECORD_LIMIT = int(os.environ.get("PREVIEW_RECORD_LIMIT", 5))
|
||||
OUTPUT_DIRECTORY = Path(
|
||||
os.environ.get("OUTPUT_DIRECTORY", Path("instance") / "outputs")
|
||||
|
||||
@@ -116,7 +116,10 @@ def convert():
|
||||
flash(str(exc), "danger")
|
||||
return render_template("index.html", form=form), 400
|
||||
except UnicodeDecodeError:
|
||||
flash("The uploaded file is not valid UTF-8 text.", "danger")
|
||||
flash(
|
||||
"The uploaded file could not be decoded. Supported encodings are UTF-8, UTF-8 with BOM, Windows-1252, and Latin-1.",
|
||||
"danger",
|
||||
)
|
||||
return render_template("index.html", form=form), 400
|
||||
|
||||
preview_limit = current_app.config["PREVIEW_RECORD_LIMIT"]
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import shlex
|
||||
from collections import OrderedDict
|
||||
from io import BufferedIOBase, TextIOBase
|
||||
|
||||
@@ -7,13 +6,68 @@ class LogParseError(ValueError):
|
||||
"""Raised when the uploaded log file cannot be parsed."""
|
||||
|
||||
|
||||
def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
|
||||
"""Parse a UTF-8 log file where each line contains shell-like key/value tokens."""
|
||||
raw_bytes = stream.read()
|
||||
def _decode_log_content(raw_bytes: bytes | str) -> str:
|
||||
"""Decode uploaded log content using practical text encodings seen in exports."""
|
||||
if isinstance(raw_bytes, str):
|
||||
content = raw_bytes
|
||||
else:
|
||||
content = raw_bytes.decode("utf-8")
|
||||
return raw_bytes
|
||||
|
||||
for encoding in ("utf-8-sig", "cp1252", "latin-1"):
|
||||
try:
|
||||
return raw_bytes.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
raise UnicodeDecodeError("unknown", b"", 0, 1, "Unsupported text encoding.")
|
||||
|
||||
|
||||
def _tokenize_line(line: str) -> list[str]:
|
||||
"""Split a line using shell-like rules while tolerating unmatched trailing quotes."""
|
||||
tokens: list[str] = []
|
||||
current: list[str] = []
|
||||
quote_char: str | None = None
|
||||
escape_next = False
|
||||
|
||||
for char in line:
|
||||
if escape_next:
|
||||
current.append(char)
|
||||
escape_next = False
|
||||
continue
|
||||
|
||||
if char == "\\":
|
||||
escape_next = True
|
||||
continue
|
||||
|
||||
if quote_char is not None:
|
||||
if char == quote_char:
|
||||
quote_char = None
|
||||
else:
|
||||
current.append(char)
|
||||
continue
|
||||
|
||||
if char in {'"', "'"}:
|
||||
quote_char = char
|
||||
continue
|
||||
|
||||
if char.isspace():
|
||||
if current:
|
||||
tokens.append("".join(current))
|
||||
current = []
|
||||
continue
|
||||
|
||||
current.append(char)
|
||||
|
||||
if escape_next:
|
||||
current.append("\\")
|
||||
if current:
|
||||
tokens.append("".join(current))
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]:
|
||||
"""Parse a text log file where each line contains shell-like key/value tokens."""
|
||||
raw_bytes = stream.read()
|
||||
content = _decode_log_content(raw_bytes)
|
||||
|
||||
records: list[dict[str, str]] = []
|
||||
seen_keys: OrderedDict[str, None] = OrderedDict()
|
||||
@@ -23,10 +77,7 @@ def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str,
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
tokens = shlex.split(line, posix=True)
|
||||
except ValueError as exc:
|
||||
raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc
|
||||
tokens = _tokenize_line(line)
|
||||
|
||||
record: dict[str, str] = {}
|
||||
for token in tokens:
|
||||
|
||||
Reference in New Issue
Block a user