from __future__ import annotations from dataclasses import dataclass from datetime import date from typing import Any from django.utils.dateparse import parse_date REQUIRED_RECORD_FIELDS = { "competition_external_id", "competition_name", "season", "team_external_id", "team_name", "player_external_id", "full_name", "games_played", "minutes_per_game", "points_per_game", "rebounds_per_game", "assists_per_game", "steals_per_game", "blocks_per_game", "turnovers_per_game", "fg_pct", "three_pt_pct", "ft_pct", } OPTIONAL_RECORD_FIELDS = { "first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", } ALLOWED_TOP_LEVEL_FIELDS = { "source_name", "snapshot_date", "records", "source_metadata", "raw_payload", } ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | { "role", "source_metadata", "raw_payload", } @dataclass class SnapshotValidationResult: source_name: str snapshot_date: date records: list[dict[str, Any]] class SnapshotValidationError(ValueError): pass class SnapshotSchemaValidator: """Strict JSON schema validator for HoopScout v2 player-season snapshots.""" @staticmethod def _require_string(value: Any, field: str) -> str: if not isinstance(value, str) or not value.strip(): raise SnapshotValidationError(f"{field} must be a non-empty string") return value.strip() @staticmethod def _optional_string(value: Any, field: str) -> str | None: if value in (None, ""): return None if not isinstance(value, str): raise SnapshotValidationError(f"{field} must be a string when provided") stripped = value.strip() return stripped or None @staticmethod def _require_non_negative_int(value: Any, field: str) -> int: if isinstance(value, bool): raise SnapshotValidationError(f"{field} must be a non-negative integer") try: parsed = int(value) except (TypeError, ValueError) as exc: raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc if parsed < 0: raise SnapshotValidationError(f"{field} must be a non-negative integer") return parsed @classmethod def _optional_non_negative_int(cls, value: Any, field: str) -> int | None: if value in (None, ""): return None return cls._require_non_negative_int(value, field) @staticmethod def _require_float(value: Any, field: str) -> float: try: parsed = float(value) except (TypeError, ValueError) as exc: raise SnapshotValidationError(f"{field} must be numeric") from exc return parsed @classmethod def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]: unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS if unknown: raise SnapshotValidationError( f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}" ) missing = REQUIRED_RECORD_FIELDS - set(record.keys()) if missing: raise SnapshotValidationError( f"record[{index}] missing required fields: {', '.join(sorted(missing))}" ) normalized = dict(record) for field in ( "competition_external_id", "competition_name", "season", "team_external_id", "team_name", "player_external_id", "full_name", ): normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}") for field in ("first_name", "last_name", "nationality", "position"): normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}") if record.get("role") is not None: normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role") birth_date_raw = record.get("birth_date") if birth_date_raw in (None, ""): normalized["birth_date"] = None else: birth_date = parse_date(str(birth_date_raw)) if not birth_date: raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD") normalized["birth_date"] = birth_date.isoformat() normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm") normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg") normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played") for field in ( "minutes_per_game", "points_per_game", "rebounds_per_game", "assists_per_game", "steals_per_game", "blocks_per_game", "turnovers_per_game", "fg_pct", "three_pt_pct", "ft_pct", ): normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}") return normalized @classmethod def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult: if not isinstance(payload, dict): raise SnapshotValidationError("Snapshot root must be an object") unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS if unknown: raise SnapshotValidationError( f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}" ) source_name = cls._require_string(payload.get("source_name"), "source_name") snapshot_date_raw = payload.get("snapshot_date") snapshot_date = parse_date(str(snapshot_date_raw)) if not snapshot_date: raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD") records = payload.get("records") if not isinstance(records, list) or not records: raise SnapshotValidationError("records must be a non-empty array") normalized_records: list[dict[str, Any]] = [] for index, record in enumerate(records): if not isinstance(record, dict): raise SnapshotValidationError(f"record[{index}] must be an object") normalized_records.append(cls._validate_record(record, index=index)) return SnapshotValidationResult( source_name=source_name, snapshot_date=snapshot_date, records=normalized_records, )