From acfccbea0823276434edfb4f29daaca74f7cb29a Mon Sep 17 00:00:00 2001 From: Alfredo Di Stasio Date: Tue, 10 Mar 2026 12:11:03 +0100 Subject: [PATCH] feat(providers): add balldontlie http adapter with backend selection --- .env.example | 13 +- README.md | 16 +- apps/ingestion/admin.py | 24 +- .../adapters/balldontlie_provider.py | 147 ++++++++++ apps/providers/clients/__init__.py | 3 + apps/providers/clients/balldontlie.py | 128 +++++++++ apps/providers/registry.py | 15 +- .../services/balldontlie_mappings.py | 260 ++++++++++++++++++ config/settings/base.py | 17 +- requirements/base.txt | 1 + tests/test_ingestion_sync.py | 126 +++++++++ tests/test_provider_balldontlie.py | 183 ++++++++++++ 12 files changed, 917 insertions(+), 16 deletions(-) create mode 100644 apps/providers/adapters/balldontlie_provider.py create mode 100644 apps/providers/clients/__init__.py create mode 100644 apps/providers/clients/balldontlie.py create mode 100644 apps/providers/services/balldontlie_mappings.py create mode 100644 tests/test_provider_balldontlie.py diff --git a/.env.example b/.env.example index 4fb58bb..ec337a9 100644 --- a/.env.example +++ b/.env.example @@ -29,10 +29,21 @@ AUTO_COLLECTSTATIC=1 GUNICORN_WORKERS=3 # Providers / ingestion -PROVIDER_DEFAULT_NAMESPACE=mvp_demo +PROVIDER_BACKEND=demo +PROVIDER_NAMESPACE_DEMO=mvp_demo +PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie +PROVIDER_DEFAULT_NAMESPACE= PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json PROVIDER_REQUEST_RETRIES=3 PROVIDER_REQUEST_RETRY_SLEEP=1 +PROVIDER_HTTP_TIMEOUT_SECONDS=10 +PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/v1 +PROVIDER_BALLDONTLIE_API_KEY= +PROVIDER_BALLDONTLIE_SEASONS=2024 +PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5 +PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100 +PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10 +PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100 CELERY_TASK_TIME_LIMIT=1800 CELERY_TASK_SOFT_TIME_LIMIT=1500 API_THROTTLE_ANON=100/hour diff --git a/README.md b/README.md index e218ed7..fb36e2e 100644 --- a/README.md +++ b/README.md @@ -155,8 +155,8 @@ Default auth routes: - Open `/admin/` -> `IngestionRun` - Use admin actions: - - `Queue full MVP sync` - - `Queue incremental MVP sync` + - `Queue full sync (default provider)` + - `Queue incremental sync (default provider)` - `Retry selected ingestion runs` ### Trigger from shell (manual) @@ -167,7 +167,7 @@ docker compose exec web python manage.py shell ```python from apps.ingestion.tasks import trigger_full_sync -trigger_full_sync.delay(provider_namespace="mvp_demo") +trigger_full_sync.delay(provider_namespace="balldontlie") ``` ### Logs and diagnostics @@ -176,6 +176,16 @@ trigger_full_sync.delay(provider_namespace="mvp_demo") - Structured error records: `IngestionError` - Provider entity mappings + diagnostic payload snippets: `ExternalMapping` +## Provider Backend Selection + +Provider backend is selected via environment variables: + +- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`) +- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`) +- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly + +The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP). + ## GitFlow Workflow GitFlow is required in this repository: diff --git a/apps/ingestion/admin.py b/apps/ingestion/admin.py index 4824e72..f863d44 100644 --- a/apps/ingestion/admin.py +++ b/apps/ingestion/admin.py @@ -1,6 +1,8 @@ from django.contrib import admin from django.contrib import messages +from apps.providers.registry import get_default_provider_namespace + from .models import IngestionError, IngestionRun from .tasks import trigger_full_sync, trigger_incremental_sync @@ -41,20 +43,22 @@ class IngestionRunAdmin(admin.ModelAdmin): "created_at", ) actions = ( - "enqueue_full_sync_mvp", - "enqueue_incremental_sync_mvp", + "enqueue_full_sync_default_provider", + "enqueue_incremental_sync_default_provider", "retry_selected_runs", ) - @admin.action(description="Queue full MVP sync") - def enqueue_full_sync_mvp(self, request, queryset): - trigger_full_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id) - self.message_user(request, "Queued full MVP sync task.", level=messages.SUCCESS) + @admin.action(description="Queue full sync (default provider)") + def enqueue_full_sync_default_provider(self, request, queryset): + provider_namespace = get_default_provider_namespace() + trigger_full_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id) + self.message_user(request, f"Queued full sync task for {provider_namespace}.", level=messages.SUCCESS) - @admin.action(description="Queue incremental MVP sync") - def enqueue_incremental_sync_mvp(self, request, queryset): - trigger_incremental_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id) - self.message_user(request, "Queued incremental MVP sync task.", level=messages.SUCCESS) + @admin.action(description="Queue incremental sync (default provider)") + def enqueue_incremental_sync_default_provider(self, request, queryset): + provider_namespace = get_default_provider_namespace() + trigger_incremental_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id) + self.message_user(request, f"Queued incremental sync task for {provider_namespace}.", level=messages.SUCCESS) @admin.action(description="Retry selected ingestion runs") def retry_selected_runs(self, request, queryset): diff --git a/apps/providers/adapters/balldontlie_provider.py b/apps/providers/adapters/balldontlie_provider.py new file mode 100644 index 0000000..cc0cfa6 --- /dev/null +++ b/apps/providers/adapters/balldontlie_provider.py @@ -0,0 +1,147 @@ +import logging + +from django.conf import settings + +from apps.providers.clients import BalldontlieClient +from apps.providers.interfaces import BaseProviderAdapter +from apps.providers.services.balldontlie_mappings import ( + map_competitions, + map_player_stats, + map_players, + map_seasons, + map_teams, +) + +logger = logging.getLogger(__name__) + + +class BalldontlieProviderAdapter(BaseProviderAdapter): + """HTTP MVP adapter for balldontlie (NBA-centric data source).""" + + namespace = "balldontlie" + + def __init__(self, client: BalldontlieClient | None = None): + self.client = client or BalldontlieClient() + + @property + def configured_seasons(self) -> list[int]: + return settings.PROVIDER_BALLDONTLIE_SEASONS + + def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]: + params = {"search": query} if query else None + rows = self.client.list_paginated( + "players", + params=params, + per_page=min(limit, settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE), + page_limit=1, + ) + mapped = map_players(rows) + return mapped[offset : offset + limit] + + def fetch_player(self, *, external_player_id: str) -> dict | None: + if not external_player_id.startswith("player-"): + return None + player_id = external_player_id.replace("player-", "", 1) + payload = self.client.get_json(f"players/{player_id}") + data = payload.get("data") + if not isinstance(data, dict): + return None + mapped = map_players([data]) + return mapped[0] if mapped else None + + def fetch_players(self) -> list[dict]: + rows = self.client.list_paginated( + "players", + per_page=settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT, + ) + return map_players(rows) + + def fetch_competitions(self) -> list[dict]: + return map_competitions() + + def fetch_teams(self) -> list[dict]: + payload = self.client.get_json("teams") + rows = payload.get("data") or [] + return map_teams(rows if isinstance(rows, list) else []) + + def fetch_seasons(self) -> list[dict]: + return map_seasons(self.configured_seasons) + + def fetch_player_stats(self) -> list[dict]: + all_rows: list[dict] = [] + for season in self.configured_seasons: + rows = self.client.list_paginated( + "stats", + params={"seasons[]": season}, + per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, + ) + all_rows.extend(rows) + + player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) + return player_stats + + def fetch_player_careers(self) -> list[dict]: + all_rows: list[dict] = [] + for season in self.configured_seasons: + rows = self.client.list_paginated( + "stats", + params={"seasons[]": season}, + per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, + ) + all_rows.extend(rows) + + _, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) + return player_careers + + def sync_all(self) -> dict: + logger.info( + "provider_sync_start", + extra={"provider": self.namespace, "seasons": self.configured_seasons}, + ) + competitions = self.fetch_competitions() + teams = self.fetch_teams() + seasons = self.fetch_seasons() + players = self.fetch_players() + + all_rows: list[dict] = [] + for season in self.configured_seasons: + rows = self.client.list_paginated( + "stats", + params={"seasons[]": season}, + per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, + ) + all_rows.extend(rows) + + player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) + + logger.info( + "provider_sync_complete", + extra={ + "provider": self.namespace, + "competitions": len(competitions), + "teams": len(teams), + "seasons": len(seasons), + "players": len(players), + "player_stats": len(player_stats), + "player_careers": len(player_careers), + }, + ) + + return { + "players": players, + "competitions": competitions, + "teams": teams, + "seasons": seasons, + "player_stats": player_stats, + "player_careers": player_careers, + "cursor": None, + } + + def sync_incremental(self, *, cursor: str | None = None) -> dict: + payload = self.sync_all() + payload["cursor"] = cursor + return payload diff --git a/apps/providers/clients/__init__.py b/apps/providers/clients/__init__.py new file mode 100644 index 0000000..ab4ac3c --- /dev/null +++ b/apps/providers/clients/__init__.py @@ -0,0 +1,3 @@ +from .balldontlie import BalldontlieClient + +__all__ = ["BalldontlieClient"] diff --git a/apps/providers/clients/balldontlie.py b/apps/providers/clients/balldontlie.py new file mode 100644 index 0000000..2318c77 --- /dev/null +++ b/apps/providers/clients/balldontlie.py @@ -0,0 +1,128 @@ +import logging +import time +from typing import Any + +import requests +from django.conf import settings + +from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError + +logger = logging.getLogger(__name__) + + +class BalldontlieClient: + """HTTP client for balldontlie with timeout/retry/rate-limit handling.""" + + def __init__(self, session: requests.Session | None = None): + self.base_url = settings.PROVIDER_BALLDONTLIE_BASE_URL.rstrip("/") + self.api_key = settings.PROVIDER_BALLDONTLIE_API_KEY + self.timeout_seconds = settings.PROVIDER_HTTP_TIMEOUT_SECONDS + self.max_retries = settings.PROVIDER_REQUEST_RETRIES + self.retry_sleep_seconds = settings.PROVIDER_REQUEST_RETRY_SLEEP + self.session = session or requests.Session() + + def _headers(self) -> dict[str, str]: + headers = {"Accept": "application/json"} + if self.api_key: + headers["Authorization"] = self.api_key + return headers + + def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]: + url = f"{self.base_url}/{path.lstrip('/')}" + + for attempt in range(1, self.max_retries + 1): + try: + response = self.session.get( + url, + params=params, + headers=self._headers(), + timeout=self.timeout_seconds, + ) + except requests.Timeout as exc: + logger.warning( + "provider_http_timeout", + extra={"provider": "balldontlie", "url": url, "attempt": attempt}, + ) + if attempt >= self.max_retries: + raise ProviderTransientError(f"Timeout calling balldontlie: {url}") from exc + time.sleep(self.retry_sleep_seconds * attempt) + continue + except requests.RequestException as exc: + logger.warning( + "provider_http_error", + extra={"provider": "balldontlie", "url": url, "attempt": attempt}, + ) + if attempt >= self.max_retries: + raise ProviderTransientError(f"Network error calling balldontlie: {url}") from exc + time.sleep(self.retry_sleep_seconds * attempt) + continue + + status = response.status_code + if status == 429: + retry_after = int(response.headers.get("Retry-After", "30") or "30") + logger.warning( + "provider_rate_limited", + extra={ + "provider": "balldontlie", + "url": url, + "attempt": attempt, + "retry_after": retry_after, + }, + ) + if attempt >= self.max_retries: + raise ProviderRateLimitError( + "balldontlie rate limit reached", + retry_after_seconds=retry_after, + ) + time.sleep(max(retry_after, self.retry_sleep_seconds * attempt)) + continue + + if status >= 500: + logger.warning( + "provider_server_error", + extra={"provider": "balldontlie", "url": url, "attempt": attempt, "status": status}, + ) + if attempt >= self.max_retries: + raise ProviderTransientError(f"balldontlie server error: {status}") + time.sleep(self.retry_sleep_seconds * attempt) + continue + + if status >= 400: + body_preview = response.text[:240] + raise ProviderTransientError( + f"balldontlie client error status={status} path={path} body={body_preview}" + ) + + try: + return response.json() + except ValueError as exc: + raise ProviderTransientError(f"Invalid JSON from balldontlie for {path}") from exc + + raise ProviderTransientError(f"Failed to call balldontlie path={path}") + + def list_paginated( + self, + path: str, + *, + params: dict[str, Any] | None = None, + per_page: int = 100, + page_limit: int = 1, + ) -> list[dict[str, Any]]: + page = 1 + rows: list[dict[str, Any]] = [] + query = dict(params or {}) + + while page <= page_limit: + query.update({"page": page, "per_page": per_page}) + payload = self.get_json(path, params=query) + data = payload.get("data") or [] + if isinstance(data, list): + rows.extend(data) + + meta = payload.get("meta") or {} + next_page = meta.get("next_page") + if not next_page: + break + page = int(next_page) + + return rows diff --git a/apps/providers/registry.py b/apps/providers/registry.py index c76744b..9235c8b 100644 --- a/apps/providers/registry.py +++ b/apps/providers/registry.py @@ -1,16 +1,29 @@ from django.conf import settings +from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter from apps.providers.exceptions import ProviderNotFoundError PROVIDER_REGISTRY = { MvpDemoProviderAdapter.namespace: MvpDemoProviderAdapter, + BalldontlieProviderAdapter.namespace: BalldontlieProviderAdapter, } +def get_default_provider_namespace() -> str: + if settings.PROVIDER_DEFAULT_NAMESPACE: + return settings.PROVIDER_DEFAULT_NAMESPACE + + backend_map = { + "demo": settings.PROVIDER_NAMESPACE_DEMO, + "balldontlie": settings.PROVIDER_NAMESPACE_BALLDONTLIE, + } + return backend_map.get(settings.PROVIDER_BACKEND, settings.PROVIDER_NAMESPACE_DEMO) + + def get_provider(namespace: str | None = None): - provider_namespace = namespace or settings.PROVIDER_DEFAULT_NAMESPACE + provider_namespace = namespace or get_default_provider_namespace() provider_cls = PROVIDER_REGISTRY.get(provider_namespace) if not provider_cls: raise ProviderNotFoundError(f"Unknown provider namespace: {provider_namespace}") diff --git a/apps/providers/services/balldontlie_mappings.py b/apps/providers/services/balldontlie_mappings.py new file mode 100644 index 0000000..135867a --- /dev/null +++ b/apps/providers/services/balldontlie_mappings.py @@ -0,0 +1,260 @@ +from __future__ import annotations + +from collections import defaultdict +from datetime import date +from typing import Any + +from django.utils.text import slugify + + +def map_competitions() -> list[dict[str, Any]]: + return [ + { + "external_id": "competition-nba", + "name": "NBA", + "slug": "nba", + "competition_type": "league", + "gender": "men", + "level": 1, + "country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"}, + "is_active": True, + } + ] + + +def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + mapped: list[dict[str, Any]] = [] + for row in rows: + team_id = row.get("id") + if not team_id: + continue + full_name = row.get("full_name") or row.get("name") or f"Team {team_id}" + abbreviation = (row.get("abbreviation") or "").strip() + mapped.append( + { + "external_id": f"team-{team_id}", + "name": full_name, + "short_name": abbreviation, + "slug": slugify(full_name) or f"team-{team_id}", + "country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"}, + "is_national_team": False, + } + ) + return mapped + + +def _map_position(position: str | None) -> dict[str, str] | None: + if not position: + return None + normalized = position.upper().strip() + position_map = { + "G": ("PG", "Point Guard"), + "G-F": ("SG", "Shooting Guard"), + "F-G": ("SF", "Small Forward"), + "F": ("PF", "Power Forward"), + "F-C": ("PF", "Power Forward"), + "C-F": ("C", "Center"), + "C": ("C", "Center"), + } + code_name = position_map.get(normalized) + if not code_name: + return None + return {"code": code_name[0], "name": code_name[1]} + + +def _map_role(position: str | None) -> dict[str, str] | None: + if not position: + return None + normalized = position.upper().strip() + if "G" in normalized: + return {"code": "playmaker", "name": "Playmaker"} + if "F" in normalized: + return {"code": "wing", "name": "Wing"} + if "C" in normalized: + return {"code": "big", "name": "Big"} + return None + + +def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + mapped: list[dict[str, Any]] = [] + for row in rows: + player_id = row.get("id") + if not player_id: + continue + + first_name = row.get("first_name", "") + last_name = row.get("last_name", "") + full_name = f"{first_name} {last_name}".strip() or f"Player {player_id}" + position_value = row.get("position") + team = row.get("team") or {} + + mapped.append( + { + "external_id": f"player-{player_id}", + "first_name": first_name, + "last_name": last_name, + "full_name": full_name, + "birth_date": None, + "nationality": {"name": "Unknown", "iso2_code": "ZZ", "iso3_code": "ZZZ"}, + "nominal_position": _map_position(position_value), + "inferred_role": _map_role(position_value), + "height_cm": None, + "weight_kg": None, + "dominant_hand": "unknown", + "is_active": True, + "aliases": [], + "current_team_external_id": f"team-{team['id']}" if team.get("id") else None, + } + ) + return mapped + + +def map_seasons(seasons: list[int]) -> list[dict[str, Any]]: + mapped: list[dict[str, Any]] = [] + for season in seasons: + mapped.append( + { + "external_id": f"season-{season}", + "label": f"{season}-{season + 1}", + "start_date": date(season, 10, 1).isoformat(), + "end_date": date(season + 1, 6, 30).isoformat(), + "is_current": False, + } + ) + return mapped + + +def _to_float(value: Any) -> float: + if value in (None, ""): + return 0.0 + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + +def _parse_minutes(value: Any) -> int: + if value in (None, ""): + return 0 + if isinstance(value, (int, float)): + return int(value) + + text = str(value) + if ":" in text: + minutes, _ = text.split(":", 1) + return int(_to_float(minutes)) + return int(_to_float(text)) + + +def _pct(value: Any, *, count: int) -> float | None: + if count <= 0: + return None + pct = _to_float(value) / count + if pct <= 1: + pct *= 100 + return round(pct, 2) + + +def map_player_stats( + rows: list[dict[str, Any]], + *, + allowed_seasons: list[int], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + aggregates: dict[tuple[int, int, int], dict[str, Any]] = defaultdict( + lambda: { + "games": 0, + "minutes": 0, + "points": 0.0, + "rebounds": 0.0, + "assists": 0.0, + "steals": 0.0, + "blocks": 0.0, + "turnovers": 0.0, + "fg_pct_sum": 0.0, + "fg_pct_count": 0, + "three_pct_sum": 0.0, + "three_pct_count": 0, + "ft_pct_sum": 0.0, + "ft_pct_count": 0, + } + ) + + for row in rows: + game = row.get("game") or {} + season = game.get("season") + player = row.get("player") or {} + team = row.get("team") or {} + player_id = player.get("id") + team_id = team.get("id") + + if not (season and player_id and team_id): + continue + if allowed_seasons and season not in allowed_seasons: + continue + + key = (season, player_id, team_id) + agg = aggregates[key] + agg["games"] += 1 + agg["minutes"] += _parse_minutes(row.get("min")) + agg["points"] += _to_float(row.get("pts")) + agg["rebounds"] += _to_float(row.get("reb")) + agg["assists"] += _to_float(row.get("ast")) + agg["steals"] += _to_float(row.get("stl")) + agg["blocks"] += _to_float(row.get("blk")) + agg["turnovers"] += _to_float(row.get("turnover")) + + if row.get("fg_pct") is not None: + agg["fg_pct_sum"] += _to_float(row.get("fg_pct")) + agg["fg_pct_count"] += 1 + if row.get("fg3_pct") is not None: + agg["three_pct_sum"] += _to_float(row.get("fg3_pct")) + agg["three_pct_count"] += 1 + if row.get("ft_pct") is not None: + agg["ft_pct_sum"] += _to_float(row.get("ft_pct")) + agg["ft_pct_count"] += 1 + + player_stats: list[dict[str, Any]] = [] + player_careers: list[dict[str, Any]] = [] + + for (season, player_id, team_id), agg in aggregates.items(): + games = agg["games"] or 1 + player_stats.append( + { + "external_id": f"ps-{season}-{player_id}-{team_id}", + "player_external_id": f"player-{player_id}", + "team_external_id": f"team-{team_id}", + "competition_external_id": "competition-nba", + "season_external_id": f"season-{season}", + "games_played": agg["games"], + "games_started": 0, + "minutes_played": agg["minutes"], + "points": round(agg["points"] / games, 2), + "rebounds": round(agg["rebounds"] / games, 2), + "assists": round(agg["assists"] / games, 2), + "steals": round(agg["steals"] / games, 2), + "blocks": round(agg["blocks"] / games, 2), + "turnovers": round(agg["turnovers"] / games, 2), + "fg_pct": _pct(agg["fg_pct_sum"], count=agg["fg_pct_count"]), + "three_pct": _pct(agg["three_pct_sum"], count=agg["three_pct_count"]), + "ft_pct": _pct(agg["ft_pct_sum"], count=agg["ft_pct_count"]), + "usage_rate": None, + "true_shooting_pct": None, + "player_efficiency_rating": None, + } + ) + player_careers.append( + { + "external_id": f"career-{season}-{player_id}-{team_id}", + "player_external_id": f"player-{player_id}", + "team_external_id": f"team-{team_id}", + "competition_external_id": "competition-nba", + "season_external_id": f"season-{season}", + "role_code": "", + "shirt_number": None, + "start_date": date(season, 10, 1).isoformat(), + "end_date": date(season + 1, 6, 30).isoformat(), + "notes": "Imported from balldontlie aggregated box scores", + } + ) + + return player_stats, player_careers diff --git a/config/settings/base.py b/config/settings/base.py index 4cd767e..1f63b77 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -118,13 +118,28 @@ CELERY_TIMEZONE = TIME_ZONE CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800")) CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500")) -PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "mvp_demo") +PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower() +PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo") +PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie") +PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip() PROVIDER_MVP_DATA_FILE = os.getenv( "PROVIDER_MVP_DATA_FILE", str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"), ) PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) +PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) +PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/v1") +PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") +PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) +PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) +PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) +PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100")) +PROVIDER_BALLDONTLIE_SEASONS = [ + int(value.strip()) + for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") + if value.strip().isdigit() +] REST_FRAMEWORK = { "DEFAULT_PERMISSION_CLASSES": [ diff --git a/requirements/base.txt b/requirements/base.txt index f008adc..10519be 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -5,3 +5,4 @@ gunicorn>=22.0,<23.0 celery[redis]>=5.4,<6.0 redis>=5.2,<6.0 python-dotenv>=1.0,<2.0 +requests>=2.32,<3.0 diff --git a/tests/test_ingestion_sync.py b/tests/test_ingestion_sync.py index 1624cba..8864796 100644 --- a/tests/test_ingestion_sync.py +++ b/tests/test_ingestion_sync.py @@ -7,6 +7,7 @@ from apps.ingestion.models import IngestionError, IngestionRun from apps.ingestion.services.sync import run_sync_job from apps.players.models import Player from apps.providers.exceptions import ProviderRateLimitError +from apps.providers.models import ExternalMapping from apps.stats.models import PlayerSeason, PlayerSeasonStats from apps.teams.models import Team @@ -81,3 +82,128 @@ def test_run_sync_handles_rate_limit(settings): assert IngestionError.objects.filter(ingestion_run=run).exists() os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None) + + +@pytest.mark.django_db +def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch): + class StableProvider: + def sync_all(self): + return { + "competitions": [ + { + "external_id": "competition-nba", + "name": "NBA", + "slug": "nba", + "competition_type": "league", + "gender": "men", + "level": 1, + "country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"}, + "is_active": True, + } + ], + "teams": [ + { + "external_id": "team-14", + "name": "Los Angeles Lakers", + "short_name": "LAL", + "slug": "los-angeles-lakers", + "country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"}, + "is_national_team": False, + } + ], + "seasons": [ + { + "external_id": "season-2024", + "label": "2024-2025", + "start_date": "2024-10-01", + "end_date": "2025-06-30", + "is_current": False, + } + ], + "players": [ + { + "external_id": "player-237", + "first_name": "LeBron", + "last_name": "James", + "full_name": "LeBron James", + "birth_date": None, + "nationality": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"}, + "nominal_position": {"code": "SF", "name": "Small Forward"}, + "inferred_role": {"code": "wing", "name": "Wing"}, + "height_cm": None, + "weight_kg": None, + "dominant_hand": "unknown", + "is_active": True, + "aliases": [], + } + ], + "player_stats": [ + { + "external_id": "ps-2024-237-14", + "player_external_id": "player-237", + "team_external_id": "team-14", + "competition_external_id": "competition-nba", + "season_external_id": "season-2024", + "games_played": 2, + "games_started": 0, + "minutes_played": 68, + "points": 25, + "rebounds": 9, + "assists": 8, + "steals": 1.5, + "blocks": 0.5, + "turnovers": 3.5, + "fg_pct": 55.0, + "three_pct": 45.0, + "ft_pct": 95.0, + "usage_rate": None, + "true_shooting_pct": None, + "player_efficiency_rating": None, + } + ], + "player_careers": [ + { + "external_id": "career-2024-237-14", + "player_external_id": "player-237", + "team_external_id": "team-14", + "competition_external_id": "competition-nba", + "season_external_id": "season-2024", + "role_code": "", + "shirt_number": None, + "start_date": "2024-10-01", + "end_date": "2025-06-30", + "notes": "Imported from balldontlie aggregated box scores", + } + ], + } + + def sync_incremental(self, *, cursor: str | None = None): + payload = self.sync_all() + payload["cursor"] = cursor + return payload + + monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider()) + + run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC) + counts_first = { + "competition": Competition.objects.count(), + "team": Team.objects.count(), + "season": Season.objects.count(), + "player": Player.objects.count(), + "player_season": PlayerSeason.objects.count(), + "player_stats": PlayerSeasonStats.objects.count(), + "mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(), + } + + run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC) + counts_second = { + "competition": Competition.objects.count(), + "team": Team.objects.count(), + "season": Season.objects.count(), + "player": Player.objects.count(), + "player_season": PlayerSeason.objects.count(), + "player_stats": PlayerSeasonStats.objects.count(), + "mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(), + } + + assert counts_first == counts_second diff --git a/tests/test_provider_balldontlie.py b/tests/test_provider_balldontlie.py new file mode 100644 index 0000000..be28cf5 --- /dev/null +++ b/tests/test_provider_balldontlie.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +import time +from typing import Any + +import pytest +import requests + +from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter +from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter +from apps.providers.clients.balldontlie import BalldontlieClient +from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError +from apps.providers.registry import get_default_provider_namespace, get_provider + + +class _FakeResponse: + def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""): + self.status_code = status_code + self._payload = payload or {} + self.headers = headers or {} + self.text = text + + def json(self): + return self._payload + + +class _FakeSession: + def __init__(self, responses: list[Any]): + self._responses = responses + + def get(self, *args, **kwargs): + item = self._responses.pop(0) + if isinstance(item, Exception): + raise item + return item + + +class _FakeBalldontlieClient: + def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]: + if path == "teams": + return { + "data": [ + { + "id": 14, + "full_name": "Los Angeles Lakers", + "abbreviation": "LAL", + } + ] + } + return {"data": []} + + def list_paginated( + self, + path: str, + *, + params: dict[str, Any] | None = None, + per_page: int = 100, + page_limit: int = 1, + ) -> list[dict[str, Any]]: + if path == "players": + return [ + { + "id": 237, + "first_name": "LeBron", + "last_name": "James", + "position": "F", + "team": {"id": 14}, + } + ] + if path == "stats": + return [ + { + "pts": 20, + "reb": 8, + "ast": 7, + "stl": 1, + "blk": 1, + "turnover": 3, + "fg_pct": 0.5, + "fg3_pct": 0.4, + "ft_pct": 0.9, + "min": "35:12", + "player": {"id": 237}, + "team": {"id": 14}, + "game": {"season": 2024}, + }, + { + "pts": 30, + "reb": 10, + "ast": 9, + "stl": 2, + "blk": 0, + "turnover": 4, + "fg_pct": 0.6, + "fg3_pct": 0.5, + "ft_pct": 1.0, + "min": "33:00", + "player": {"id": 237}, + "team": {"id": 14}, + "game": {"season": 2024}, + }, + ] + return [] + + +@pytest.mark.django_db +def test_provider_registry_backend_selection(settings): + settings.PROVIDER_DEFAULT_NAMESPACE = "" + settings.PROVIDER_BACKEND = "demo" + assert get_default_provider_namespace() == "mvp_demo" + assert isinstance(get_provider(), MvpDemoProviderAdapter) + + settings.PROVIDER_BACKEND = "balldontlie" + assert get_default_provider_namespace() == "balldontlie" + assert isinstance(get_provider(), BalldontlieProviderAdapter) + + settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" + assert get_default_provider_namespace() == "mvp_demo" + + +@pytest.mark.django_db +def test_balldontlie_adapter_maps_payloads(settings): + settings.PROVIDER_BALLDONTLIE_SEASONS = [2024] + adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient()) + + payload = adapter.sync_all() + + assert payload["competitions"][0]["external_id"] == "competition-nba" + assert payload["teams"][0]["external_id"] == "team-14" + assert payload["players"][0]["external_id"] == "player-237" + assert payload["seasons"][0]["external_id"] == "season-2024" + assert payload["player_stats"][0]["games_played"] == 2 + assert payload["player_stats"][0]["points"] == 25.0 + assert payload["player_stats"][0]["fg_pct"] == 55.0 + + +@pytest.mark.django_db +def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings): + monkeypatch.setattr(time, "sleep", lambda _: None) + settings.PROVIDER_REQUEST_RETRIES = 2 + settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 + + session = _FakeSession( + responses=[ + _FakeResponse(status_code=429, headers={"Retry-After": "0"}), + _FakeResponse(status_code=200, payload={"data": []}), + ] + ) + client = BalldontlieClient(session=session) + + payload = client.get_json("players") + assert payload == {"data": []} + + +@pytest.mark.django_db +def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings): + monkeypatch.setattr(time, "sleep", lambda _: None) + settings.PROVIDER_REQUEST_RETRIES = 2 + settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 + + session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")]) + client = BalldontlieClient(session=session) + + with pytest.raises(ProviderTransientError): + client.get_json("players") + + +@pytest.mark.django_db +def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings): + monkeypatch.setattr(time, "sleep", lambda _: None) + settings.PROVIDER_REQUEST_RETRIES = 2 + settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 + + session = _FakeSession( + responses=[ + _FakeResponse(status_code=429, headers={"Retry-After": "1"}), + _FakeResponse(status_code=429, headers={"Retry-After": "1"}), + ] + ) + client = BalldontlieClient(session=session) + + with pytest.raises(ProviderRateLimitError): + client.get_json("players")