Reduce conversion memory footprint
This commit is contained in:
47
app/services/conversion.py
Normal file
47
app/services/conversion.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.exporter import ExportResult
|
||||
from app.services.parser import create_parse_session
|
||||
from app.services.processing import ProcessingOptions, filter_records, sort_records
|
||||
from app.services.storage import ResultMetadata, persist_result
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ConversionResult:
|
||||
metadata: ResultMetadata
|
||||
export_result: ExportResult
|
||||
parsed_count: int
|
||||
filtered_count: int
|
||||
|
||||
|
||||
def convert_uploaded_log(
|
||||
stream,
|
||||
options: ProcessingOptions,
|
||||
output_dir: Path,
|
||||
output_format: str,
|
||||
preview_record_limit: int,
|
||||
) -> ConversionResult:
|
||||
"""Convert an uploaded log into a persisted export with a small in-memory preview.
|
||||
|
||||
Parsing, filtering, and export writing are streamed to keep memory usage low.
|
||||
Sorting still materializes the filtered records because global ordering by datetime
|
||||
or severity requires seeing the whole filtered result set first.
|
||||
"""
|
||||
parse_session = create_parse_session(stream)
|
||||
sorted_records = sort_records(filter_records(parse_session.iter_records(), options), options)
|
||||
metadata, export_result = persist_result(
|
||||
output_dir=output_dir,
|
||||
records=sorted_records,
|
||||
union_keys=parse_session.union_keys(),
|
||||
mode=options.mode,
|
||||
output_format=output_format,
|
||||
preview_record_limit=preview_record_limit,
|
||||
)
|
||||
|
||||
return ConversionResult(
|
||||
metadata=metadata,
|
||||
export_result=export_result,
|
||||
parsed_count=parse_session.parsed_count,
|
||||
filtered_count=len(sorted_records),
|
||||
)
|
||||
Reference in New Issue
Block a user