Reduce conversion memory footprint

This commit is contained in:
Alfredo Di Stasio
2026-04-27 11:44:40 +02:00
parent 9313b54abb
commit f9f792f6a1
10 changed files with 324 additions and 102 deletions

View File

@@ -0,0 +1,47 @@
from dataclasses import dataclass
from pathlib import Path
from app.services.exporter import ExportResult
from app.services.parser import create_parse_session
from app.services.processing import ProcessingOptions, filter_records, sort_records
from app.services.storage import ResultMetadata, persist_result
@dataclass(slots=True)
class ConversionResult:
metadata: ResultMetadata
export_result: ExportResult
parsed_count: int
filtered_count: int
def convert_uploaded_log(
stream,
options: ProcessingOptions,
output_dir: Path,
output_format: str,
preview_record_limit: int,
) -> ConversionResult:
"""Convert an uploaded log into a persisted export with a small in-memory preview.
Parsing, filtering, and export writing are streamed to keep memory usage low.
Sorting still materializes the filtered records because global ordering by datetime
or severity requires seeing the whole filtered result set first.
"""
parse_session = create_parse_session(stream)
sorted_records = sort_records(filter_records(parse_session.iter_records(), options), options)
metadata, export_result = persist_result(
output_dir=output_dir,
records=sorted_records,
union_keys=parse_session.union_keys(),
mode=options.mode,
output_format=output_format,
preview_record_limit=preview_record_limit,
)
return ConversionResult(
metadata=metadata,
export_result=export_result,
parsed_count=parse_session.parsed_count,
filtered_count=len(sorted_records),
)