import shlex from collections import OrderedDict from io import BufferedIOBase, TextIOBase class LogParseError(ValueError): """Raised when the uploaded log file cannot be parsed.""" def parse_log_file(stream: BufferedIOBase | TextIOBase) -> tuple[list[dict[str, str]], list[str]]: """Parse a UTF-8 log file where each line contains shell-like key/value tokens.""" raw_bytes = stream.read() if isinstance(raw_bytes, str): content = raw_bytes else: content = raw_bytes.decode("utf-8") records: list[dict[str, str]] = [] seen_keys: OrderedDict[str, None] = OrderedDict() for line_number, raw_line in enumerate(content.splitlines(), start=1): line = raw_line.strip() if not line: continue try: tokens = shlex.split(line, posix=True) except ValueError as exc: raise LogParseError(f"Line {line_number}: invalid shell-style quoting.") from exc record: dict[str, str] = {} for token in tokens: if "=" not in token: raise LogParseError( f"Line {line_number}: token '{token}' is missing '='." ) key, value = token.split("=", 1) if not key: raise LogParseError(f"Line {line_number}: empty key is not allowed.") record[key] = value seen_keys.setdefault(key, None) records.append(record) return records, list(seen_keys.keys())