feat(v2): add snapshot extractor framework and run command
This commit is contained in:
63
apps/ingestion/management/commands/run_extractor.py
Normal file
63
apps/ingestion/management/commands/run_extractor.py
Normal file
@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.utils.dateparse import parse_date
|
||||
|
||||
from apps.ingestion.extractors import ExtractorError, available_extractors, create_extractor
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Run a snapshot extractor and emit importable JSON snapshots."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("extractor_name", choices=available_extractors())
|
||||
parser.add_argument(
|
||||
"--output-path",
|
||||
dest="output_path",
|
||||
default=None,
|
||||
help="Directory or .json file path where snapshot should be written. Defaults to incoming dir.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--snapshot-date",
|
||||
dest="snapshot_date",
|
||||
default=None,
|
||||
help="Override snapshot date in YYYY-MM-DD format.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Validate extraction/normalization without writing snapshot file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--indent",
|
||||
type=int,
|
||||
default=2,
|
||||
help="JSON indent level for emitted snapshots.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
snapshot_date = None
|
||||
if options["snapshot_date"]:
|
||||
snapshot_date = parse_date(options["snapshot_date"])
|
||||
if snapshot_date is None:
|
||||
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
|
||||
|
||||
try:
|
||||
extractor = create_extractor(options["extractor_name"])
|
||||
result = extractor.run(
|
||||
output_path=options["output_path"],
|
||||
snapshot_date=snapshot_date,
|
||||
write_output=not options["dry_run"],
|
||||
indent=options["indent"],
|
||||
)
|
||||
except ExtractorError as exc:
|
||||
raise CommandError(str(exc)) from exc
|
||||
|
||||
output_path = str(result.output_path) if result.output_path else "<dry-run>"
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Extractor {result.extractor_name} completed: "
|
||||
f"source={result.source_name} date={result.snapshot_date} "
|
||||
f"records={result.records_count} output={output_path}"
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user