from __future__ import annotations from typing import Any from django.conf import settings from .base import BaseSnapshotExtractor, ExtractorConfigError, ExtractorNormalizationError, ExtractorParseError from .http import ResponsibleHttpClient def _first_non_empty(record: dict[str, Any], *keys: str) -> Any: for key in keys: value = record.get(key) if value not in (None, ""): return value return None ESSENTIAL_FIELDS = { "competition_external_id", "competition_name", "season", "team_external_id", "team_name", "player_external_id", "full_name", "games_played", "minutes_per_game", "points_per_game", "rebounds_per_game", "assists_per_game", "steals_per_game", "blocks_per_game", "turnovers_per_game", "fg_pct", "three_pt_pct", "ft_pct", } class LBASnapshotExtractor(BaseSnapshotExtractor): """ LBA (Lega Basket Serie A) MVP extractor. Scope is intentionally conservative: - one configured public stats endpoint - one configured season label - normalized player-season rows only """ extractor_name = "lba" source_name = "lba" def __init__(self, *, http_client: ResponsibleHttpClient | None = None): self.url = settings.EXTRACTOR_LBA_STATS_URL.strip() self.season_label = settings.EXTRACTOR_LBA_SEASON_LABEL.strip() self.competition_external_id = settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID.strip() self.competition_name = settings.EXTRACTOR_LBA_COMPETITION_NAME.strip() self.include_raw_payload = settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD self.http_client = http_client or ResponsibleHttpClient( user_agent=settings.EXTRACTOR_USER_AGENT, timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS, retries=settings.EXTRACTOR_HTTP_RETRIES, retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS, request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS, ) if not self.url: raise ExtractorConfigError("EXTRACTOR_LBA_STATS_URL is required for lba extractor.") if not self.season_label: raise ExtractorConfigError("EXTRACTOR_LBA_SEASON_LABEL is required for lba extractor.") if not self.competition_external_id: raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID is required.") if not self.competition_name: raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_NAME is required.") def fetch(self) -> Any: return self.http_client.get_json(self.url) def parse(self, payload: Any) -> list[dict[str, Any]]: if isinstance(payload, list): return payload if not isinstance(payload, dict): raise ExtractorParseError("LBA payload must be a JSON object or array.") for key in ("records", "data", "players", "items"): rows = payload.get(key) if isinstance(rows, list): return rows raise ExtractorParseError("LBA payload must contain one of: records, data, players, items.") def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]: player_obj = source_record.get("player") if isinstance(source_record.get("player"), dict) else {} team_obj = source_record.get("team") if isinstance(source_record.get("team"), dict) else {} full_name = _first_non_empty( source_record, "full_name", "player_name", "name", ) or _first_non_empty(player_obj, "full_name", "name") first_name = _first_non_empty(source_record, "first_name") or _first_non_empty(player_obj, "first_name") last_name = _first_non_empty(source_record, "last_name") or _first_non_empty(player_obj, "last_name") player_external_id = _first_non_empty( source_record, "player_external_id", "player_id", "athlete_id" ) or _first_non_empty(player_obj, "id", "player_id") team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty( team_obj, "id", "team_id" ) team_name = _first_non_empty(source_record, "team_name", "team") or _first_non_empty(team_obj, "name") normalized = { "competition_external_id": self.competition_external_id, "competition_name": self.competition_name, "season": self.season_label, "team_external_id": team_external_id, "team_name": team_name, "player_external_id": player_external_id, "full_name": full_name, "first_name": first_name, "last_name": last_name, "birth_date": _first_non_empty(source_record, "birth_date") or _first_non_empty( player_obj, "birth_date", "dob" ), "nationality": _first_non_empty(source_record, "nationality") or _first_non_empty(player_obj, "nationality", "country"), "height_cm": _first_non_empty(source_record, "height_cm") or _first_non_empty(player_obj, "height_cm"), "weight_kg": _first_non_empty(source_record, "weight_kg") or _first_non_empty(player_obj, "weight_kg"), "position": _first_non_empty(source_record, "position") or _first_non_empty(player_obj, "position"), "role": _first_non_empty(source_record, "role"), "games_played": _first_non_empty(source_record, "games_played", "gp"), "minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"), "points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"), "rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"), "assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"), "steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"), "blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"), "turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"), "fg_pct": _first_non_empty(source_record, "fg_pct", "fg_percentage"), "three_pt_pct": _first_non_empty( source_record, "three_pt_pct", "three_point_pct", "3p_pct", "three_pct" ), "ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"), } missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")] if missing: raise ExtractorNormalizationError(f"lba row missing required fields: {', '.join(sorted(missing))}") normalized["team_external_id"] = str(normalized["team_external_id"]).strip() normalized["player_external_id"] = str(normalized["player_external_id"]).strip() normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip() normalized["season"] = str(normalized["season"]).strip() if self.include_raw_payload: normalized["raw_payload"] = source_record return normalized