feat(v2): add snapshot extractor framework and run command

This commit is contained in:
Alfredo Di Stasio
2026-03-13 14:24:54 +01:00
parent 6fc583c79f
commit 850e4de71b
10 changed files with 796 additions and 0 deletions

View File

@ -0,0 +1,63 @@
from __future__ import annotations
from django.core.management.base import BaseCommand, CommandError
from django.utils.dateparse import parse_date
from apps.ingestion.extractors import ExtractorError, available_extractors, create_extractor
class Command(BaseCommand):
help = "Run a snapshot extractor and emit importable JSON snapshots."
def add_arguments(self, parser):
parser.add_argument("extractor_name", choices=available_extractors())
parser.add_argument(
"--output-path",
dest="output_path",
default=None,
help="Directory or .json file path where snapshot should be written. Defaults to incoming dir.",
)
parser.add_argument(
"--snapshot-date",
dest="snapshot_date",
default=None,
help="Override snapshot date in YYYY-MM-DD format.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate extraction/normalization without writing snapshot file.",
)
parser.add_argument(
"--indent",
type=int,
default=2,
help="JSON indent level for emitted snapshots.",
)
def handle(self, *args, **options):
snapshot_date = None
if options["snapshot_date"]:
snapshot_date = parse_date(options["snapshot_date"])
if snapshot_date is None:
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
try:
extractor = create_extractor(options["extractor_name"])
result = extractor.run(
output_path=options["output_path"],
snapshot_date=snapshot_date,
write_output=not options["dry_run"],
indent=options["indent"],
)
except ExtractorError as exc:
raise CommandError(str(exc)) from exc
output_path = str(result.output_path) if result.output_path else "<dry-run>"
self.stdout.write(
self.style.SUCCESS(
f"Extractor {result.extractor_name} completed: "
f"source={result.source_name} date={result.snapshot_date} "
f"records={result.records_count} output={output_path}"
)
)