Add v2 snapshot schema validation and import_snapshots command

This commit is contained in:
Alfredo Di Stasio
2026-03-13 14:00:39 +01:00
parent 6aa66807e9
commit eacff3d25e
14 changed files with 844 additions and 16 deletions

View File

@ -0,0 +1,3 @@
from .schema import SnapshotSchemaValidator, SnapshotValidationError, SnapshotValidationResult
__all__ = ["SnapshotSchemaValidator", "SnapshotValidationError", "SnapshotValidationResult"]

View File

@ -0,0 +1,182 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
from typing import Any
from django.utils.dateparse import parse_date
REQUIRED_RECORD_FIELDS = {
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
"first_name",
"last_name",
"birth_date",
"nationality",
"height_cm",
"weight_kg",
"position",
"games_played",
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
}
ALLOWED_TOP_LEVEL_FIELDS = {
"source_name",
"snapshot_date",
"records",
"source_metadata",
"raw_payload",
}
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | {
"role",
"source_metadata",
"raw_payload",
}
@dataclass
class SnapshotValidationResult:
source_name: str
snapshot_date: date
records: list[dict[str, Any]]
class SnapshotValidationError(ValueError):
pass
class SnapshotSchemaValidator:
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
@staticmethod
def _require_string(value: Any, field: str) -> str:
if not isinstance(value, str) or not value.strip():
raise SnapshotValidationError(f"{field} must be a non-empty string")
return value.strip()
@staticmethod
def _require_non_negative_int(value: Any, field: str) -> int:
if isinstance(value, bool):
raise SnapshotValidationError(f"{field} must be a non-negative integer")
try:
parsed = int(value)
except (TypeError, ValueError) as exc:
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
if parsed < 0:
raise SnapshotValidationError(f"{field} must be a non-negative integer")
return parsed
@staticmethod
def _require_float(value: Any, field: str) -> float:
try:
parsed = float(value)
except (TypeError, ValueError) as exc:
raise SnapshotValidationError(f"{field} must be numeric") from exc
return parsed
@classmethod
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
if unknown:
raise SnapshotValidationError(
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
)
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
if missing:
raise SnapshotValidationError(
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
)
normalized = dict(record)
for field in (
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
"first_name",
"last_name",
"nationality",
"position",
):
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
if record.get("role") is not None:
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
birth_date = parse_date(str(record.get("birth_date")))
if not birth_date:
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
normalized["birth_date"] = birth_date.isoformat()
normalized["height_cm"] = cls._require_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
normalized["weight_kg"] = cls._require_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
for field in (
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
):
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
return normalized
@classmethod
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
if not isinstance(payload, dict):
raise SnapshotValidationError("Snapshot root must be an object")
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
if unknown:
raise SnapshotValidationError(
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
)
source_name = cls._require_string(payload.get("source_name"), "source_name")
snapshot_date_raw = payload.get("snapshot_date")
snapshot_date = parse_date(str(snapshot_date_raw))
if not snapshot_date:
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
records = payload.get("records")
if not isinstance(records, list) or not records:
raise SnapshotValidationError("records must be a non-empty array")
normalized_records: list[dict[str, Any]] = []
for index, record in enumerate(records):
if not isinstance(record, dict):
raise SnapshotValidationError(f"record[{index}] must be an object")
normalized_records.append(cls._validate_record(record, index=index))
return SnapshotValidationResult(
source_name=source_name,
snapshot_date=snapshot_date,
records=normalized_records,
)