Add v2 snapshot schema validation and import_snapshots command
This commit is contained in:
@ -8,6 +8,8 @@ class ImportFileInline(admin.TabularInline):
|
||||
extra = 0
|
||||
readonly_fields = (
|
||||
"relative_path",
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
"status",
|
||||
"checksum",
|
||||
"file_size_bytes",
|
||||
@ -61,6 +63,8 @@ class ImportFileAdmin(admin.ModelAdmin):
|
||||
"id",
|
||||
"import_run",
|
||||
"relative_path",
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
"status",
|
||||
"rows_total",
|
||||
"rows_upserted",
|
||||
@ -68,10 +72,12 @@ class ImportFileAdmin(admin.ModelAdmin):
|
||||
"processed_at",
|
||||
)
|
||||
list_filter = ("status",)
|
||||
search_fields = ("relative_path", "checksum", "error_message")
|
||||
search_fields = ("relative_path", "source_name", "checksum", "error_message")
|
||||
readonly_fields = (
|
||||
"import_run",
|
||||
"relative_path",
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
"status",
|
||||
"checksum",
|
||||
"file_size_bytes",
|
||||
|
||||
0
apps/ingestion/management/__init__.py
Normal file
0
apps/ingestion/management/__init__.py
Normal file
0
apps/ingestion/management/commands/__init__.py
Normal file
0
apps/ingestion/management/commands/__init__.py
Normal file
23
apps/ingestion/management/commands/import_snapshots.py
Normal file
23
apps/ingestion/management/commands/import_snapshots.py
Normal file
@ -0,0 +1,23 @@
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.ingestion.services.snapshot_import import SnapshotImporter
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Import static JSON snapshots from incoming directory into PostgreSQL."
|
||||
|
||||
def handle(self, *args, **options):
|
||||
importer = SnapshotImporter(
|
||||
incoming_dir=settings.STATIC_DATASET_INCOMING_DIR,
|
||||
archive_dir=settings.STATIC_DATASET_ARCHIVE_DIR,
|
||||
failed_dir=settings.STATIC_DATASET_FAILED_DIR,
|
||||
)
|
||||
run = importer.run()
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Import run {run.id} completed: status={run.status} "
|
||||
f"files={run.files_processed}/{run.files_total} "
|
||||
f"rows_upserted={run.rows_upserted} rows_failed={run.rows_failed}"
|
||||
)
|
||||
)
|
||||
@ -0,0 +1,27 @@
|
||||
# Generated by Django 5.2.12 on 2026-03-13 12:59
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('ingestion', '0003_importrun_importfile_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='importfile',
|
||||
name='snapshot_date',
|
||||
field=models.DateField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='importfile',
|
||||
name='source_name',
|
||||
field=models.CharField(blank=True, max_length=120),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='importfile',
|
||||
index=models.Index(fields=['source_name', 'snapshot_date'], name='ingestion_i_source__de6843_idx'),
|
||||
),
|
||||
]
|
||||
@ -57,6 +57,8 @@ class ImportFile(models.Model):
|
||||
related_name="files",
|
||||
)
|
||||
relative_path = models.CharField(max_length=260)
|
||||
source_name = models.CharField(max_length=120, blank=True)
|
||||
snapshot_date = models.DateField(blank=True, null=True)
|
||||
status = models.CharField(max_length=24, choices=FileStatus.choices, default=FileStatus.PENDING)
|
||||
checksum = models.CharField(max_length=128, blank=True)
|
||||
file_size_bytes = models.PositiveBigIntegerField(blank=True, null=True)
|
||||
@ -79,6 +81,7 @@ class ImportFile(models.Model):
|
||||
indexes = [
|
||||
models.Index(fields=["import_run", "status"]),
|
||||
models.Index(fields=["relative_path"]),
|
||||
models.Index(fields=["source_name", "snapshot_date"]),
|
||||
models.Index(fields=["processed_at"]),
|
||||
]
|
||||
|
||||
|
||||
310
apps/ingestion/services/snapshot_import.py
Normal file
310
apps/ingestion/services/snapshot_import.py
Normal file
@ -0,0 +1,310 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from django.db import transaction
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils import timezone
|
||||
from django.utils.dateparse import parse_date
|
||||
|
||||
from apps.competitions.models import Competition, Season
|
||||
from apps.ingestion.models import ImportFile, ImportRun
|
||||
from apps.ingestion.snapshots import SnapshotSchemaValidator, SnapshotValidationError
|
||||
from apps.players.models import Nationality, Player, Position, Role
|
||||
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
||||
from apps.teams.models import Team
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImportSummary:
|
||||
files_total: int = 0
|
||||
files_processed: int = 0
|
||||
rows_total: int = 0
|
||||
rows_upserted: int = 0
|
||||
rows_failed: int = 0
|
||||
|
||||
|
||||
def _safe_move(src: Path, destination_dir: Path) -> Path:
|
||||
destination_dir.mkdir(parents=True, exist_ok=True)
|
||||
candidate = destination_dir / src.name
|
||||
if candidate.exists():
|
||||
ts = datetime.utcnow().strftime("%Y%m%d%H%M%S")
|
||||
candidate = destination_dir / f"{src.stem}-{ts}{src.suffix}"
|
||||
shutil.move(str(src), str(candidate))
|
||||
return candidate
|
||||
|
||||
|
||||
def _file_checksum(path: Path) -> str:
|
||||
digest = hashlib.sha256()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||
digest.update(chunk)
|
||||
return digest.hexdigest()
|
||||
|
||||
|
||||
def _normalize_decimal(value: float | int | str) -> Decimal:
|
||||
return Decimal(str(value)).quantize(Decimal("0.01"))
|
||||
|
||||
|
||||
def _parse_season_dates(label: str) -> tuple[date, date]:
|
||||
if "-" in label:
|
||||
first = label.split("-", 1)[0]
|
||||
else:
|
||||
first = label
|
||||
year = int(first)
|
||||
return date(year, 9, 1), date(year + 1, 7, 31)
|
||||
|
||||
|
||||
def _resolve_nationality(value: str | None) -> Nationality | None:
|
||||
if not value:
|
||||
return None
|
||||
token = value.strip()
|
||||
if not token:
|
||||
return None
|
||||
if len(token) == 2:
|
||||
code = token.upper()
|
||||
obj, _ = Nationality.objects.get_or_create(
|
||||
iso2_code=code,
|
||||
defaults={"name": code},
|
||||
)
|
||||
return obj
|
||||
return Nationality.objects.filter(name__iexact=token).first()
|
||||
|
||||
|
||||
def _position_code(position_value: str) -> str:
|
||||
token = position_value.strip().upper().replace(" ", "_")
|
||||
return (token[:10] or "UNK")
|
||||
|
||||
|
||||
def _role_code(role_value: str) -> str:
|
||||
token = slugify(role_value).replace("-", "_")
|
||||
return (token[:32] or "unknown")
|
||||
|
||||
|
||||
def _player_season_source_uid(record: dict[str, Any], source_name: str, snapshot_date: date) -> str:
|
||||
return (
|
||||
f"{source_name}:{snapshot_date.isoformat()}:"
|
||||
f"{record['competition_external_id']}:{record['season']}:"
|
||||
f"{record['team_external_id']}:{record['player_external_id']}"
|
||||
)
|
||||
|
||||
|
||||
def _upsert_record(record: dict[str, Any], *, source_name: str, snapshot_date: date) -> None:
|
||||
competition_slug = slugify(record["competition_name"]) or f"competition-{record['competition_external_id']}"
|
||||
competition, _ = Competition.objects.update_or_create(
|
||||
source_uid=record["competition_external_id"],
|
||||
defaults={
|
||||
"name": record["competition_name"],
|
||||
"slug": competition_slug,
|
||||
"competition_type": Competition.CompetitionType.LEAGUE,
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
|
||||
start_date, end_date = _parse_season_dates(record["season"])
|
||||
season, _ = Season.objects.update_or_create(
|
||||
source_uid=f"season:{record['season']}",
|
||||
defaults={
|
||||
"label": record["season"],
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"is_current": False,
|
||||
},
|
||||
)
|
||||
|
||||
team_slug = slugify(record["team_name"]) or f"team-{record['team_external_id']}"
|
||||
team, _ = Team.objects.update_or_create(
|
||||
source_uid=record["team_external_id"],
|
||||
defaults={
|
||||
"name": record["team_name"],
|
||||
"slug": team_slug,
|
||||
"short_name": "",
|
||||
},
|
||||
)
|
||||
|
||||
position, _ = Position.objects.get_or_create(
|
||||
code=_position_code(record["position"]),
|
||||
defaults={"name": record["position"]},
|
||||
)
|
||||
role = None
|
||||
if record.get("role"):
|
||||
role, _ = Role.objects.get_or_create(
|
||||
code=_role_code(record["role"]),
|
||||
defaults={"name": record["role"]},
|
||||
)
|
||||
|
||||
player, _ = Player.objects.update_or_create(
|
||||
source_uid=record["player_external_id"],
|
||||
defaults={
|
||||
"first_name": record["first_name"],
|
||||
"last_name": record["last_name"],
|
||||
"full_name": record["full_name"],
|
||||
"birth_date": parse_date(record["birth_date"]),
|
||||
"nationality": _resolve_nationality(record.get("nationality")),
|
||||
"nominal_position": position,
|
||||
"inferred_role": role,
|
||||
"height_cm": record["height_cm"],
|
||||
"weight_kg": record["weight_kg"],
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
|
||||
player_season, _ = PlayerSeason.objects.update_or_create(
|
||||
source_uid=_player_season_source_uid(record, source_name=source_name, snapshot_date=snapshot_date),
|
||||
defaults={
|
||||
"player": player,
|
||||
"season": season,
|
||||
"team": team,
|
||||
"competition": competition,
|
||||
"games_played": int(record["games_played"]),
|
||||
"games_started": 0,
|
||||
"minutes_played": int(round(float(record["minutes_per_game"]) * int(record["games_played"]))),
|
||||
},
|
||||
)
|
||||
|
||||
PlayerSeasonStats.objects.update_or_create(
|
||||
player_season=player_season,
|
||||
defaults={
|
||||
"points": _normalize_decimal(record["points_per_game"]),
|
||||
"rebounds": _normalize_decimal(record["rebounds_per_game"]),
|
||||
"assists": _normalize_decimal(record["assists_per_game"]),
|
||||
"steals": _normalize_decimal(record["steals_per_game"]),
|
||||
"blocks": _normalize_decimal(record["blocks_per_game"]),
|
||||
"turnovers": _normalize_decimal(record["turnovers_per_game"]),
|
||||
"fg_pct": _normalize_decimal(record["fg_pct"]),
|
||||
"three_pct": _normalize_decimal(record["three_pt_pct"]),
|
||||
"ft_pct": _normalize_decimal(record["ft_pct"]),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class SnapshotImporter:
|
||||
def __init__(self, *, incoming_dir: str, archive_dir: str, failed_dir: str):
|
||||
self.incoming_dir = Path(incoming_dir)
|
||||
self.archive_dir = Path(archive_dir)
|
||||
self.failed_dir = Path(failed_dir)
|
||||
|
||||
def _list_input_files(self) -> list[Path]:
|
||||
if not self.incoming_dir.exists():
|
||||
return []
|
||||
return sorted(path for path in self.incoming_dir.iterdir() if path.is_file() and path.suffix.lower() == ".json")
|
||||
|
||||
def run(self, *, triggered_by=None) -> ImportRun:
|
||||
run = ImportRun.objects.create(
|
||||
source="static_snapshot_json",
|
||||
status=ImportRun.RunStatus.RUNNING,
|
||||
triggered_by=triggered_by,
|
||||
started_at=timezone.now(),
|
||||
context={
|
||||
"incoming_dir": str(self.incoming_dir),
|
||||
"archive_dir": str(self.archive_dir),
|
||||
"failed_dir": str(self.failed_dir),
|
||||
},
|
||||
)
|
||||
|
||||
summary = ImportSummary()
|
||||
files = self._list_input_files()
|
||||
summary.files_total = len(files)
|
||||
|
||||
for path in files:
|
||||
checksum = _file_checksum(path)
|
||||
file_row = ImportFile.objects.create(
|
||||
import_run=run,
|
||||
relative_path=path.name,
|
||||
status=ImportFile.FileStatus.PROCESSING,
|
||||
checksum=checksum,
|
||||
file_size_bytes=path.stat().st_size,
|
||||
)
|
||||
|
||||
# Duplicate file content previously imported successfully.
|
||||
already_imported = ImportFile.objects.filter(
|
||||
checksum=checksum,
|
||||
status=ImportFile.FileStatus.SUCCESS,
|
||||
).exclude(pk=file_row.pk).exists()
|
||||
if already_imported:
|
||||
file_row.status = ImportFile.FileStatus.SKIPPED
|
||||
file_row.error_message = "Skipped duplicate checksum already imported successfully."
|
||||
file_row.processed_at = timezone.now()
|
||||
file_row.save(update_fields=["status", "error_message", "processed_at"])
|
||||
_safe_move(path, self.archive_dir)
|
||||
summary.files_processed += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
validated = SnapshotSchemaValidator.validate(payload)
|
||||
|
||||
file_row.source_name = validated.source_name
|
||||
file_row.snapshot_date = validated.snapshot_date
|
||||
file_row.rows_total = len(validated.records)
|
||||
|
||||
with transaction.atomic():
|
||||
for record in validated.records:
|
||||
_upsert_record(record, source_name=validated.source_name, snapshot_date=validated.snapshot_date)
|
||||
|
||||
file_row.status = ImportFile.FileStatus.SUCCESS
|
||||
file_row.rows_upserted = len(validated.records)
|
||||
file_row.payload_preview = {
|
||||
"source_name": validated.source_name,
|
||||
"snapshot_date": validated.snapshot_date.isoformat(),
|
||||
"sample_record": validated.records[0],
|
||||
}
|
||||
_safe_move(path, self.archive_dir)
|
||||
except (json.JSONDecodeError, SnapshotValidationError, ValueError) as exc:
|
||||
file_row.status = ImportFile.FileStatus.FAILED
|
||||
file_row.error_message = str(exc)
|
||||
_safe_move(path, self.failed_dir)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
file_row.status = ImportFile.FileStatus.FAILED
|
||||
file_row.error_message = f"Unhandled import error: {exc}"
|
||||
_safe_move(path, self.failed_dir)
|
||||
|
||||
file_row.processed_at = timezone.now()
|
||||
file_row.save(
|
||||
update_fields=[
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
"status",
|
||||
"rows_total",
|
||||
"rows_upserted",
|
||||
"rows_failed",
|
||||
"error_message",
|
||||
"payload_preview",
|
||||
"processed_at",
|
||||
]
|
||||
)
|
||||
|
||||
summary.files_processed += 1
|
||||
summary.rows_total += file_row.rows_total
|
||||
summary.rows_upserted += file_row.rows_upserted
|
||||
summary.rows_failed += file_row.rows_failed + (1 if file_row.status == ImportFile.FileStatus.FAILED else 0)
|
||||
|
||||
run.status = ImportRun.RunStatus.SUCCESS if summary.rows_failed == 0 else ImportRun.RunStatus.FAILED
|
||||
run.files_total = summary.files_total
|
||||
run.files_processed = summary.files_processed
|
||||
run.rows_total = summary.rows_total
|
||||
run.rows_upserted = summary.rows_upserted
|
||||
run.rows_failed = summary.rows_failed
|
||||
run.finished_at = timezone.now()
|
||||
if summary.rows_failed:
|
||||
run.error_summary = f"{summary.rows_failed} file/row import error(s)."
|
||||
run.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"files_total",
|
||||
"files_processed",
|
||||
"rows_total",
|
||||
"rows_upserted",
|
||||
"rows_failed",
|
||||
"error_summary",
|
||||
"finished_at",
|
||||
]
|
||||
)
|
||||
return run
|
||||
3
apps/ingestion/snapshots/__init__.py
Normal file
3
apps/ingestion/snapshots/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .schema import SnapshotSchemaValidator, SnapshotValidationError, SnapshotValidationResult
|
||||
|
||||
__all__ = ["SnapshotSchemaValidator", "SnapshotValidationError", "SnapshotValidationResult"]
|
||||
182
apps/ingestion/snapshots/schema.py
Normal file
182
apps/ingestion/snapshots/schema.py
Normal file
@ -0,0 +1,182 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Any
|
||||
|
||||
from django.utils.dateparse import parse_date
|
||||
|
||||
REQUIRED_RECORD_FIELDS = {
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
"season",
|
||||
"team_external_id",
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"birth_date",
|
||||
"nationality",
|
||||
"height_cm",
|
||||
"weight_kg",
|
||||
"position",
|
||||
"games_played",
|
||||
"minutes_per_game",
|
||||
"points_per_game",
|
||||
"rebounds_per_game",
|
||||
"assists_per_game",
|
||||
"steals_per_game",
|
||||
"blocks_per_game",
|
||||
"turnovers_per_game",
|
||||
"fg_pct",
|
||||
"three_pt_pct",
|
||||
"ft_pct",
|
||||
}
|
||||
|
||||
ALLOWED_TOP_LEVEL_FIELDS = {
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
"records",
|
||||
"source_metadata",
|
||||
"raw_payload",
|
||||
}
|
||||
|
||||
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | {
|
||||
"role",
|
||||
"source_metadata",
|
||||
"raw_payload",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotValidationResult:
|
||||
source_name: str
|
||||
snapshot_date: date
|
||||
records: list[dict[str, Any]]
|
||||
|
||||
|
||||
class SnapshotValidationError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class SnapshotSchemaValidator:
|
||||
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
|
||||
|
||||
@staticmethod
|
||||
def _require_string(value: Any, field: str) -> str:
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
raise SnapshotValidationError(f"{field} must be a non-empty string")
|
||||
return value.strip()
|
||||
|
||||
@staticmethod
|
||||
def _require_non_negative_int(value: Any, field: str) -> int:
|
||||
if isinstance(value, bool):
|
||||
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
|
||||
if parsed < 0:
|
||||
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
||||
return parsed
|
||||
|
||||
@staticmethod
|
||||
def _require_float(value: Any, field: str) -> float:
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise SnapshotValidationError(f"{field} must be numeric") from exc
|
||||
return parsed
|
||||
|
||||
@classmethod
|
||||
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
|
||||
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
|
||||
if unknown:
|
||||
raise SnapshotValidationError(
|
||||
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
|
||||
)
|
||||
|
||||
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
|
||||
if missing:
|
||||
raise SnapshotValidationError(
|
||||
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
|
||||
)
|
||||
|
||||
normalized = dict(record)
|
||||
for field in (
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
"season",
|
||||
"team_external_id",
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"nationality",
|
||||
"position",
|
||||
):
|
||||
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
|
||||
|
||||
if record.get("role") is not None:
|
||||
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
|
||||
|
||||
birth_date = parse_date(str(record.get("birth_date")))
|
||||
if not birth_date:
|
||||
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
||||
normalized["birth_date"] = birth_date.isoformat()
|
||||
|
||||
normalized["height_cm"] = cls._require_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
||||
normalized["weight_kg"] = cls._require_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
||||
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
|
||||
|
||||
for field in (
|
||||
"minutes_per_game",
|
||||
"points_per_game",
|
||||
"rebounds_per_game",
|
||||
"assists_per_game",
|
||||
"steals_per_game",
|
||||
"blocks_per_game",
|
||||
"turnovers_per_game",
|
||||
"fg_pct",
|
||||
"three_pt_pct",
|
||||
"ft_pct",
|
||||
):
|
||||
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
|
||||
|
||||
return normalized
|
||||
|
||||
@classmethod
|
||||
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
|
||||
if not isinstance(payload, dict):
|
||||
raise SnapshotValidationError("Snapshot root must be an object")
|
||||
|
||||
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
|
||||
if unknown:
|
||||
raise SnapshotValidationError(
|
||||
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
|
||||
)
|
||||
|
||||
source_name = cls._require_string(payload.get("source_name"), "source_name")
|
||||
|
||||
snapshot_date_raw = payload.get("snapshot_date")
|
||||
snapshot_date = parse_date(str(snapshot_date_raw))
|
||||
if not snapshot_date:
|
||||
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
|
||||
|
||||
records = payload.get("records")
|
||||
if not isinstance(records, list) or not records:
|
||||
raise SnapshotValidationError("records must be a non-empty array")
|
||||
|
||||
normalized_records: list[dict[str, Any]] = []
|
||||
for index, record in enumerate(records):
|
||||
if not isinstance(record, dict):
|
||||
raise SnapshotValidationError(f"record[{index}] must be an object")
|
||||
normalized_records.append(cls._validate_record(record, index=index))
|
||||
|
||||
return SnapshotValidationResult(
|
||||
source_name=source_name,
|
||||
snapshot_date=snapshot_date,
|
||||
records=normalized_records,
|
||||
)
|
||||
Reference in New Issue
Block a user