204 lines
6.6 KiB
Python
204 lines
6.6 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Any
|
|
|
|
from django.utils.dateparse import parse_date
|
|
|
|
REQUIRED_RECORD_FIELDS = {
|
|
"competition_external_id",
|
|
"competition_name",
|
|
"season",
|
|
"team_external_id",
|
|
"team_name",
|
|
"player_external_id",
|
|
"full_name",
|
|
"games_played",
|
|
"minutes_per_game",
|
|
"points_per_game",
|
|
"rebounds_per_game",
|
|
"assists_per_game",
|
|
"steals_per_game",
|
|
"blocks_per_game",
|
|
"turnovers_per_game",
|
|
"fg_pct",
|
|
"three_pt_pct",
|
|
"ft_pct",
|
|
}
|
|
|
|
OPTIONAL_RECORD_FIELDS = {
|
|
"first_name",
|
|
"last_name",
|
|
"birth_date",
|
|
"nationality",
|
|
"height_cm",
|
|
"weight_kg",
|
|
"position",
|
|
}
|
|
|
|
ALLOWED_TOP_LEVEL_FIELDS = {
|
|
"source_name",
|
|
"snapshot_date",
|
|
"records",
|
|
"source_metadata",
|
|
"raw_payload",
|
|
}
|
|
|
|
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | {
|
|
"role",
|
|
"source_metadata",
|
|
"raw_payload",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class SnapshotValidationResult:
|
|
source_name: str
|
|
snapshot_date: date
|
|
records: list[dict[str, Any]]
|
|
|
|
|
|
class SnapshotValidationError(ValueError):
|
|
pass
|
|
|
|
|
|
class SnapshotSchemaValidator:
|
|
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
|
|
|
|
@staticmethod
|
|
def _require_string(value: Any, field: str) -> str:
|
|
if not isinstance(value, str) or not value.strip():
|
|
raise SnapshotValidationError(f"{field} must be a non-empty string")
|
|
return value.strip()
|
|
|
|
@staticmethod
|
|
def _optional_string(value: Any, field: str) -> str | None:
|
|
if value in (None, ""):
|
|
return None
|
|
if not isinstance(value, str):
|
|
raise SnapshotValidationError(f"{field} must be a string when provided")
|
|
stripped = value.strip()
|
|
return stripped or None
|
|
|
|
@staticmethod
|
|
def _require_non_negative_int(value: Any, field: str) -> int:
|
|
if isinstance(value, bool):
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
|
try:
|
|
parsed = int(value)
|
|
except (TypeError, ValueError) as exc:
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
|
|
if parsed < 0:
|
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
|
return parsed
|
|
|
|
@classmethod
|
|
def _optional_non_negative_int(cls, value: Any, field: str) -> int | None:
|
|
if value in (None, ""):
|
|
return None
|
|
return cls._require_non_negative_int(value, field)
|
|
|
|
@staticmethod
|
|
def _require_float(value: Any, field: str) -> float:
|
|
try:
|
|
parsed = float(value)
|
|
except (TypeError, ValueError) as exc:
|
|
raise SnapshotValidationError(f"{field} must be numeric") from exc
|
|
return parsed
|
|
|
|
@classmethod
|
|
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
|
|
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
|
|
if unknown:
|
|
raise SnapshotValidationError(
|
|
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
|
|
)
|
|
|
|
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
|
|
if missing:
|
|
raise SnapshotValidationError(
|
|
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
|
|
)
|
|
|
|
normalized = dict(record)
|
|
for field in (
|
|
"competition_external_id",
|
|
"competition_name",
|
|
"season",
|
|
"team_external_id",
|
|
"team_name",
|
|
"player_external_id",
|
|
"full_name",
|
|
):
|
|
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
|
|
|
|
for field in ("first_name", "last_name", "nationality", "position"):
|
|
normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}")
|
|
|
|
if record.get("role") is not None:
|
|
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
|
|
|
|
birth_date_raw = record.get("birth_date")
|
|
if birth_date_raw in (None, ""):
|
|
normalized["birth_date"] = None
|
|
else:
|
|
birth_date = parse_date(str(birth_date_raw))
|
|
if not birth_date:
|
|
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
|
normalized["birth_date"] = birth_date.isoformat()
|
|
|
|
normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
|
normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
|
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
|
|
|
|
for field in (
|
|
"minutes_per_game",
|
|
"points_per_game",
|
|
"rebounds_per_game",
|
|
"assists_per_game",
|
|
"steals_per_game",
|
|
"blocks_per_game",
|
|
"turnovers_per_game",
|
|
"fg_pct",
|
|
"three_pt_pct",
|
|
"ft_pct",
|
|
):
|
|
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
|
|
|
|
return normalized
|
|
|
|
@classmethod
|
|
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
|
|
if not isinstance(payload, dict):
|
|
raise SnapshotValidationError("Snapshot root must be an object")
|
|
|
|
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
|
|
if unknown:
|
|
raise SnapshotValidationError(
|
|
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
|
|
)
|
|
|
|
source_name = cls._require_string(payload.get("source_name"), "source_name")
|
|
|
|
snapshot_date_raw = payload.get("snapshot_date")
|
|
snapshot_date = parse_date(str(snapshot_date_raw))
|
|
if not snapshot_date:
|
|
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
|
|
|
|
records = payload.get("records")
|
|
if not isinstance(records, list) or not records:
|
|
raise SnapshotValidationError("records must be a non-empty array")
|
|
|
|
normalized_records: list[dict[str, Any]] = []
|
|
for index, record in enumerate(records):
|
|
if not isinstance(record, dict):
|
|
raise SnapshotValidationError(f"record[{index}] must be an object")
|
|
normalized_records.append(cls._validate_record(record, index=index))
|
|
|
|
return SnapshotValidationResult(
|
|
source_name=source_name,
|
|
snapshot_date=snapshot_date,
|
|
records=normalized_records,
|
|
)
|