183 lines
5.8 KiB
Python
183 lines
5.8 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Any
|
|
|
|
from django.utils.dateparse import parse_date
|
|
|
|
REQUIRED_RECORD_FIELDS = {
|
|
"competition_external_id",
|
|
"competition_name",
|
|
"season",
|
|
"team_external_id",
|
|
"team_name",
|
|
"player_external_id",
|
|
"full_name",
|
|
"first_name",
|
|
"last_name",
|
|
"birth_date",
|
|
"nationality",
|
|
"height_cm",
|
|
"weight_kg",
|
|
"position",
|
|
"games_played",
|
|
"minutes_per_game",
|
|
"points_per_game",
|
|
"rebounds_per_game",
|
|
"assists_per_game",
|
|
"steals_per_game",
|
|
"blocks_per_game",
|
|
"turnovers_per_game",
|
|
"fg_pct",
|
|
"three_pt_pct",
|
|
"ft_pct",
|
|
}
|
|
|
|
ALLOWED_TOP_LEVEL_FIELDS = {
|
|
"source_name",
|
|
"snapshot_date",
|
|
"records",
|
|
"source_metadata",
|
|
"raw_payload",
|
|
}
|
|
|
|
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | {
|
|
"role",
|
|
"source_metadata",
|
|
"raw_payload",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class SnapshotValidationResult:
|
|
source_name: str
|
|
snapshot_date: date
|
|
records: list[dict[str, Any]]
|
|
|
|
|
|
class SnapshotValidationError(ValueError):
|
|
pass
|
|
|
|
|
|
class SnapshotSchemaValidator:
|
|
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
|
|
|
|
@staticmethod
|
|
def _require_string(value: Any, field: str) -> str:
|
|
if not isinstance(value, str) or not value.strip():
|
|
raise SnapshotValidationError(f"{field} must be a non-empty string")
|
|
return value.strip()
|
|
|
|
@staticmethod
|
|
def _require_non_negative_int(value: Any, field: str) -> int:
|
|
if isinstance(value, bool):
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
|
try:
|
|
parsed = int(value)
|
|
except (TypeError, ValueError) as exc:
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
|
|
if parsed < 0:
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
|
return parsed
|
|
|
|
@staticmethod
|
|
def _require_float(value: Any, field: str) -> float:
|
|
try:
|
|
parsed = float(value)
|
|
except (TypeError, ValueError) as exc:
|
|
raise SnapshotValidationError(f"{field} must be numeric") from exc
|
|
return parsed
|
|
|
|
@classmethod
|
|
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
|
|
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
|
|
if unknown:
|
|
raise SnapshotValidationError(
|
|
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
|
|
)
|
|
|
|
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
|
|
if missing:
|
|
raise SnapshotValidationError(
|
|
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
|
|
)
|
|
|
|
normalized = dict(record)
|
|
for field in (
|
|
"competition_external_id",
|
|
"competition_name",
|
|
"season",
|
|
"team_external_id",
|
|
"team_name",
|
|
"player_external_id",
|
|
"full_name",
|
|
"first_name",
|
|
"last_name",
|
|
"nationality",
|
|
"position",
|
|
):
|
|
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
|
|
|
|
if record.get("role") is not None:
|
|
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
|
|
|
|
birth_date = parse_date(str(record.get("birth_date")))
|
|
if not birth_date:
|
|
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
|
normalized["birth_date"] = birth_date.isoformat()
|
|
|
|
normalized["height_cm"] = cls._require_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
|
normalized["weight_kg"] = cls._require_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
|
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
|
|
|
|
for field in (
|
|
"minutes_per_game",
|
|
"points_per_game",
|
|
"rebounds_per_game",
|
|
"assists_per_game",
|
|
"steals_per_game",
|
|
"blocks_per_game",
|
|
"turnovers_per_game",
|
|
"fg_pct",
|
|
"three_pt_pct",
|
|
"ft_pct",
|
|
):
|
|
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
|
|
|
|
return normalized
|
|
|
|
@classmethod
|
|
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
|
|
if not isinstance(payload, dict):
|
|
raise SnapshotValidationError("Snapshot root must be an object")
|
|
|
|
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
|
|
if unknown:
|
|
raise SnapshotValidationError(
|
|
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
|
|
)
|
|
|
|
source_name = cls._require_string(payload.get("source_name"), "source_name")
|
|
|
|
snapshot_date_raw = payload.get("snapshot_date")
|
|
snapshot_date = parse_date(str(snapshot_date_raw))
|
|
if not snapshot_date:
|
|
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
|
|
|
|
records = payload.get("records")
|
|
if not isinstance(records, list) or not records:
|
|
raise SnapshotValidationError("records must be a non-empty array")
|
|
|
|
normalized_records: list[dict[str, Any]] = []
|
|
for index, record in enumerate(records):
|
|
if not isinstance(record, dict):
|
|
raise SnapshotValidationError(f"record[{index}] must be an object")
|
|
normalized_records.append(cls._validate_record(record, index=index))
|
|
|
|
return SnapshotValidationResult(
|
|
source_name=source_name,
|
|
snapshot_date=snapshot_date,
|
|
records=normalized_records,
|
|
)
|