Harden parser for malformed multiline records

This commit is contained in:
Alfredo Di Stasio
2026-04-24 15:12:51 +02:00
parent f64deb9c0d
commit 235aa47dd3
2 changed files with 78 additions and 52 deletions

View File

@@ -58,3 +58,19 @@ def test_parse_log_file_tolerates_unterminated_quotes():
records, _union_keys = parse_log_file(stream)
assert records[0]["msg"] == "broken quoted value"
def test_parse_log_file_rebuilds_record_after_embedded_newlines():
stream = io.BytesIO(
b'v015xxxxdate=2024-02-15 time=09:10:11 msg="hello\n'
b'broken-fragment\n'
b'world" action=Alert\n'
b'v015xxxxdate=2024-02-15 time=09:10:12 msg="next" action=Monitor\n'
)
records, _union_keys = parse_log_file(stream)
assert len(records) == 2
assert records[0]["msg"] == "hellobroken-fragmentworld"
assert records[0]["action"] == "Alert"
assert records[1]["msg"] == "next"