Reduce conversion memory footprint

This commit is contained in:
Alfredo Di Stasio
2026-04-27 11:44:40 +02:00
parent 9313b54abb
commit f9f792f6a1
10 changed files with 324 additions and 102 deletions

View File

@@ -74,3 +74,19 @@ def test_parse_log_file_rebuilds_record_after_embedded_newlines():
assert records[0]["msg"] == "hellobroken-fragmentworld"
assert records[0]["action"] == "Alert"
assert records[1]["msg"] == "next"
def test_parse_log_file_does_not_require_full_stream_read():
class NoFullReadBytesIO(io.BytesIO):
def read(self, size=-1):
if size == -1:
raise AssertionError("full stream read should not be used")
return super().read(size)
stream = NoFullReadBytesIO(
b'v015xxxxdate=2024-02-15 time=09:10:11 policy="Strict Policy" msg="blocked request"\n'
)
records, _union_keys = parse_log_file(stream)
assert records[0]["policy"] == "Strict Policy"

View File

@@ -16,7 +16,7 @@ def test_filter_records_supports_case_insensitive_filters():
mode="vendor",
)
filtered = filter_records(records, options)
filtered = list(filter_records(records, options))
assert filtered == [{"policy": "ProdPolicy", "severity_level": "HIGH"}]

34
tests/test_storage.py Normal file
View File

@@ -0,0 +1,34 @@
from pathlib import Path
from app.services.storage import persist_result
def test_persist_result_writes_csv_and_collects_preview(tmp_path: Path):
metadata, export_result = persist_result(
output_dir=tmp_path,
records=[
{
"v015xxxxdate": "2024-05-01",
"time": "10:00:00",
"policy": "Prod Policy",
"severity_level": "high",
},
{
"v015xxxxdate": "2024-05-02",
"time": "11:00:00",
"policy": "Other Policy",
"severity_level": "low",
},
],
union_keys=["v015xxxxdate", "time", "policy", "severity_level"],
mode="full",
output_format="csv",
preview_record_limit=1,
)
written = Path(metadata.file_path).read_text(encoding="utf-8")
assert metadata.download_name == "waf-report.csv"
assert "v015xxxxdate,time,policy,severity_level" in written
assert "2024-05-01,10:00:00,Prod Policy,high" in written
assert export_result.preview(1).count("\n") == 1