Reduce conversion memory footprint
This commit is contained in:
@@ -1,69 +1,107 @@
|
||||
import csv
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence, TextIO
|
||||
|
||||
from app.constants import VENDOR_FIELDS
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ExportResult:
|
||||
content: str
|
||||
columns: list[str]
|
||||
output_format: str
|
||||
preview_text: str
|
||||
|
||||
def preview(self, record_limit: int) -> str:
|
||||
"""Build a small preview string for the result page."""
|
||||
if self.output_format == "text":
|
||||
marker = f"--- record {record_limit + 1} ---"
|
||||
if marker in self.content:
|
||||
return self.content.split(marker, 1)[0].rstrip()
|
||||
return self.content
|
||||
|
||||
lines = self.content.splitlines()
|
||||
if len(lines) <= record_limit + 1:
|
||||
return self.content
|
||||
return "\n".join(lines[: record_limit + 1])
|
||||
def preview(self, _record_limit: int) -> str:
|
||||
"""Return the preview that was collected during export writing."""
|
||||
return self.preview_text
|
||||
|
||||
|
||||
def build_export(
|
||||
records: list[dict[str, str]],
|
||||
def write_export(
|
||||
file_path: Path,
|
||||
records: Sequence[dict[str, str]],
|
||||
union_keys: list[str],
|
||||
mode: str,
|
||||
output_format: str,
|
||||
preview_record_limit: int,
|
||||
) -> ExportResult:
|
||||
"""Write the final export directly to disk and keep only a small preview in memory."""
|
||||
columns = VENDOR_FIELDS if mode == "vendor" else union_keys
|
||||
|
||||
if output_format == "text":
|
||||
return ExportResult(
|
||||
content=_render_text(records, columns),
|
||||
columns=columns,
|
||||
output_format=output_format,
|
||||
)
|
||||
with file_path.open("w", encoding="utf-8", newline="") as export_file:
|
||||
if output_format == "text":
|
||||
preview_text = _write_text(
|
||||
export_file=export_file,
|
||||
records=records,
|
||||
columns=columns,
|
||||
preview_record_limit=preview_record_limit,
|
||||
)
|
||||
else:
|
||||
preview_text = _write_csv(
|
||||
export_file=export_file,
|
||||
records=records,
|
||||
columns=columns,
|
||||
preview_record_limit=preview_record_limit,
|
||||
)
|
||||
|
||||
return ExportResult(
|
||||
content=_render_csv(records, columns),
|
||||
columns=columns,
|
||||
output_format=output_format,
|
||||
preview_text=preview_text,
|
||||
)
|
||||
|
||||
|
||||
def _render_text(records: list[dict[str, str]], columns: list[str]) -> str:
|
||||
def _write_text(
|
||||
export_file: TextIO,
|
||||
records: Sequence[dict[str, str]],
|
||||
columns: list[str],
|
||||
preview_record_limit: int,
|
||||
) -> str:
|
||||
max_key_length = max((len(column) for column in columns), default=0)
|
||||
chunks: list[str] = []
|
||||
preview_lines: list[str] = []
|
||||
wrote_line = False
|
||||
|
||||
for index, record in enumerate(records, start=1):
|
||||
chunks.append(f"--- record {index} ---")
|
||||
header = f"--- record {index} ---"
|
||||
wrote_line = _write_line(export_file, header, wrote_line)
|
||||
if index <= preview_record_limit:
|
||||
preview_lines.append(header)
|
||||
|
||||
for column in columns:
|
||||
value = record.get(column, "")
|
||||
chunks.append(f" {column.ljust(max_key_length)} = {value}")
|
||||
line = f" {column.ljust(max_key_length)} = {record.get(column, '')}"
|
||||
wrote_line = _write_line(export_file, line, wrote_line)
|
||||
if index <= preview_record_limit:
|
||||
preview_lines.append(line)
|
||||
|
||||
return "\n".join(chunks)
|
||||
return "\n".join(preview_lines)
|
||||
|
||||
|
||||
def _render_csv(records: list[dict[str, str]], columns: list[str]) -> str:
|
||||
buffer = io.StringIO()
|
||||
writer = csv.DictWriter(buffer, fieldnames=columns, extrasaction="ignore")
|
||||
def _write_csv(
|
||||
export_file: TextIO,
|
||||
records: Sequence[dict[str, str]],
|
||||
columns: list[str],
|
||||
preview_record_limit: int,
|
||||
) -> str:
|
||||
writer = csv.DictWriter(export_file, fieldnames=columns, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for record in records:
|
||||
writer.writerow({column: record.get(column, "") for column in columns})
|
||||
return buffer.getvalue()
|
||||
|
||||
preview_buffer = io.StringIO()
|
||||
preview_writer = csv.DictWriter(preview_buffer, fieldnames=columns, extrasaction="ignore")
|
||||
preview_writer.writeheader()
|
||||
|
||||
for index, record in enumerate(records, start=1):
|
||||
row = {column: record.get(column, "") for column in columns}
|
||||
writer.writerow(row)
|
||||
if index <= preview_record_limit:
|
||||
preview_writer.writerow(row)
|
||||
|
||||
return preview_buffer.getvalue().rstrip("\n")
|
||||
|
||||
|
||||
def _write_line(export_file: TextIO, line: str, wrote_line: bool) -> bool:
|
||||
"""Write lines without leaving a trailing newline at the end of the file."""
|
||||
if wrote_line:
|
||||
export_file.write("\n")
|
||||
export_file.write(line)
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user