Reduce conversion memory footprint

This commit is contained in:
Alfredo Di Stasio
2026-04-27 11:44:40 +02:00
parent 9313b54abb
commit f9f792f6a1
10 changed files with 324 additions and 102 deletions

View File

@@ -1,69 +1,107 @@
import csv
import io
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence, TextIO
from app.constants import VENDOR_FIELDS
@dataclass(slots=True)
class ExportResult:
content: str
columns: list[str]
output_format: str
preview_text: str
def preview(self, record_limit: int) -> str:
"""Build a small preview string for the result page."""
if self.output_format == "text":
marker = f"--- record {record_limit + 1} ---"
if marker in self.content:
return self.content.split(marker, 1)[0].rstrip()
return self.content
lines = self.content.splitlines()
if len(lines) <= record_limit + 1:
return self.content
return "\n".join(lines[: record_limit + 1])
def preview(self, _record_limit: int) -> str:
"""Return the preview that was collected during export writing."""
return self.preview_text
def build_export(
records: list[dict[str, str]],
def write_export(
file_path: Path,
records: Sequence[dict[str, str]],
union_keys: list[str],
mode: str,
output_format: str,
preview_record_limit: int,
) -> ExportResult:
"""Write the final export directly to disk and keep only a small preview in memory."""
columns = VENDOR_FIELDS if mode == "vendor" else union_keys
if output_format == "text":
return ExportResult(
content=_render_text(records, columns),
columns=columns,
output_format=output_format,
)
with file_path.open("w", encoding="utf-8", newline="") as export_file:
if output_format == "text":
preview_text = _write_text(
export_file=export_file,
records=records,
columns=columns,
preview_record_limit=preview_record_limit,
)
else:
preview_text = _write_csv(
export_file=export_file,
records=records,
columns=columns,
preview_record_limit=preview_record_limit,
)
return ExportResult(
content=_render_csv(records, columns),
columns=columns,
output_format=output_format,
preview_text=preview_text,
)
def _render_text(records: list[dict[str, str]], columns: list[str]) -> str:
def _write_text(
export_file: TextIO,
records: Sequence[dict[str, str]],
columns: list[str],
preview_record_limit: int,
) -> str:
max_key_length = max((len(column) for column in columns), default=0)
chunks: list[str] = []
preview_lines: list[str] = []
wrote_line = False
for index, record in enumerate(records, start=1):
chunks.append(f"--- record {index} ---")
header = f"--- record {index} ---"
wrote_line = _write_line(export_file, header, wrote_line)
if index <= preview_record_limit:
preview_lines.append(header)
for column in columns:
value = record.get(column, "")
chunks.append(f" {column.ljust(max_key_length)} = {value}")
line = f" {column.ljust(max_key_length)} = {record.get(column, '')}"
wrote_line = _write_line(export_file, line, wrote_line)
if index <= preview_record_limit:
preview_lines.append(line)
return "\n".join(chunks)
return "\n".join(preview_lines)
def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str:
buffer = io.StringIO()
writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore")
def _write_csv(
export_file: TextIO,
records: Sequence[dict[str, str]],
columns: list[str],
preview_record_limit: int,
) -> str:
writer = csv.DictWriter(export_file, fieldnames=columns, extrasaction="ignore")
writer.writeheader()
for record in records:
writer.writerow({column: record.get(column, "") for column in columns})
return buffer.getvalue()
preview_buffer = io.StringIO()
preview_writer = csv.DictWriter(preview_buffer, fieldnames=columns, extrasaction="ignore")
preview_writer.writeheader()
for index, record in enumerate(records, start=1):
row = {column: record.get(column, "") for column in columns}
writer.writerow(row)
if index <= preview_record_limit:
preview_writer.writerow(row)
return preview_buffer.getvalue().rstrip("\n")
def _write_line(export_file: TextIO, line: str, wrote_line: bool) -> bool:
"""Write lines without leaving a trailing newline at the end of the file."""
if wrote_line:
export_file.write("\n")
export_file.write(line)
return True