feat(providers): add balldontlie http adapter with backend selection

This commit is contained in:
Alfredo Di Stasio
2026-03-10 12:11:03 +01:00
parent f9329df64f
commit acfccbea08
12 changed files with 917 additions and 16 deletions

View File

@ -29,10 +29,21 @@ AUTO_COLLECTSTATIC=1
GUNICORN_WORKERS=3 GUNICORN_WORKERS=3
# Providers / ingestion # Providers / ingestion
PROVIDER_DEFAULT_NAMESPACE=mvp_demo PROVIDER_BACKEND=demo
PROVIDER_NAMESPACE_DEMO=mvp_demo
PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie
PROVIDER_DEFAULT_NAMESPACE=
PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
PROVIDER_REQUEST_RETRIES=3 PROVIDER_REQUEST_RETRIES=3
PROVIDER_REQUEST_RETRY_SLEEP=1 PROVIDER_REQUEST_RETRY_SLEEP=1
PROVIDER_HTTP_TIMEOUT_SECONDS=10
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/v1
PROVIDER_BALLDONTLIE_API_KEY=
PROVIDER_BALLDONTLIE_SEASONS=2024
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
CELERY_TASK_TIME_LIMIT=1800 CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500 CELERY_TASK_SOFT_TIME_LIMIT=1500
API_THROTTLE_ANON=100/hour API_THROTTLE_ANON=100/hour

View File

@ -155,8 +155,8 @@ Default auth routes:
- Open `/admin/` -> `IngestionRun` - Open `/admin/` -> `IngestionRun`
- Use admin actions: - Use admin actions:
- `Queue full MVP sync` - `Queue full sync (default provider)`
- `Queue incremental MVP sync` - `Queue incremental sync (default provider)`
- `Retry selected ingestion runs` - `Retry selected ingestion runs`
### Trigger from shell (manual) ### Trigger from shell (manual)
@ -167,7 +167,7 @@ docker compose exec web python manage.py shell
```python ```python
from apps.ingestion.tasks import trigger_full_sync from apps.ingestion.tasks import trigger_full_sync
trigger_full_sync.delay(provider_namespace="mvp_demo") trigger_full_sync.delay(provider_namespace="balldontlie")
``` ```
### Logs and diagnostics ### Logs and diagnostics
@ -176,6 +176,16 @@ trigger_full_sync.delay(provider_namespace="mvp_demo")
- Structured error records: `IngestionError` - Structured error records: `IngestionError`
- Provider entity mappings + diagnostic payload snippets: `ExternalMapping` - Provider entity mappings + diagnostic payload snippets: `ExternalMapping`
## Provider Backend Selection
Provider backend is selected via environment variables:
- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`)
- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`)
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
## GitFlow Workflow ## GitFlow Workflow
GitFlow is required in this repository: GitFlow is required in this repository:

View File

@ -1,6 +1,8 @@
from django.contrib import admin from django.contrib import admin
from django.contrib import messages from django.contrib import messages
from apps.providers.registry import get_default_provider_namespace
from .models import IngestionError, IngestionRun from .models import IngestionError, IngestionRun
from .tasks import trigger_full_sync, trigger_incremental_sync from .tasks import trigger_full_sync, trigger_incremental_sync
@ -41,20 +43,22 @@ class IngestionRunAdmin(admin.ModelAdmin):
"created_at", "created_at",
) )
actions = ( actions = (
"enqueue_full_sync_mvp", "enqueue_full_sync_default_provider",
"enqueue_incremental_sync_mvp", "enqueue_incremental_sync_default_provider",
"retry_selected_runs", "retry_selected_runs",
) )
@admin.action(description="Queue full MVP sync") @admin.action(description="Queue full sync (default provider)")
def enqueue_full_sync_mvp(self, request, queryset): def enqueue_full_sync_default_provider(self, request, queryset):
trigger_full_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id) provider_namespace = get_default_provider_namespace()
self.message_user(request, "Queued full MVP sync task.", level=messages.SUCCESS) trigger_full_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
self.message_user(request, f"Queued full sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Queue incremental MVP sync") @admin.action(description="Queue incremental sync (default provider)")
def enqueue_incremental_sync_mvp(self, request, queryset): def enqueue_incremental_sync_default_provider(self, request, queryset):
trigger_incremental_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id) provider_namespace = get_default_provider_namespace()
self.message_user(request, "Queued incremental MVP sync task.", level=messages.SUCCESS) trigger_incremental_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
self.message_user(request, f"Queued incremental sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Retry selected ingestion runs") @admin.action(description="Retry selected ingestion runs")
def retry_selected_runs(self, request, queryset): def retry_selected_runs(self, request, queryset):

View File

@ -0,0 +1,147 @@
import logging
from django.conf import settings
from apps.providers.clients import BalldontlieClient
from apps.providers.interfaces import BaseProviderAdapter
from apps.providers.services.balldontlie_mappings import (
map_competitions,
map_player_stats,
map_players,
map_seasons,
map_teams,
)
logger = logging.getLogger(__name__)
class BalldontlieProviderAdapter(BaseProviderAdapter):
"""HTTP MVP adapter for balldontlie (NBA-centric data source)."""
namespace = "balldontlie"
def __init__(self, client: BalldontlieClient | None = None):
self.client = client or BalldontlieClient()
@property
def configured_seasons(self) -> list[int]:
return settings.PROVIDER_BALLDONTLIE_SEASONS
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
params = {"search": query} if query else None
rows = self.client.list_paginated(
"players",
params=params,
per_page=min(limit, settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE),
page_limit=1,
)
mapped = map_players(rows)
return mapped[offset : offset + limit]
def fetch_player(self, *, external_player_id: str) -> dict | None:
if not external_player_id.startswith("player-"):
return None
player_id = external_player_id.replace("player-", "", 1)
payload = self.client.get_json(f"players/{player_id}")
data = payload.get("data")
if not isinstance(data, dict):
return None
mapped = map_players([data])
return mapped[0] if mapped else None
def fetch_players(self) -> list[dict]:
rows = self.client.list_paginated(
"players",
per_page=settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT,
)
return map_players(rows)
def fetch_competitions(self) -> list[dict]:
return map_competitions()
def fetch_teams(self) -> list[dict]:
payload = self.client.get_json("teams")
rows = payload.get("data") or []
return map_teams(rows if isinstance(rows, list) else [])
def fetch_seasons(self) -> list[dict]:
return map_seasons(self.configured_seasons)
def fetch_player_stats(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_stats
def fetch_player_careers(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
_, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_careers
def sync_all(self) -> dict:
logger.info(
"provider_sync_start",
extra={"provider": self.namespace, "seasons": self.configured_seasons},
)
competitions = self.fetch_competitions()
teams = self.fetch_teams()
seasons = self.fetch_seasons()
players = self.fetch_players()
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
logger.info(
"provider_sync_complete",
extra={
"provider": self.namespace,
"competitions": len(competitions),
"teams": len(teams),
"seasons": len(seasons),
"players": len(players),
"player_stats": len(player_stats),
"player_careers": len(player_careers),
},
)
return {
"players": players,
"competitions": competitions,
"teams": teams,
"seasons": seasons,
"player_stats": player_stats,
"player_careers": player_careers,
"cursor": None,
}
def sync_incremental(self, *, cursor: str | None = None) -> dict:
payload = self.sync_all()
payload["cursor"] = cursor
return payload

View File

@ -0,0 +1,3 @@
from .balldontlie import BalldontlieClient
__all__ = ["BalldontlieClient"]

View File

@ -0,0 +1,128 @@
import logging
import time
from typing import Any
import requests
from django.conf import settings
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
logger = logging.getLogger(__name__)
class BalldontlieClient:
"""HTTP client for balldontlie with timeout/retry/rate-limit handling."""
def __init__(self, session: requests.Session | None = None):
self.base_url = settings.PROVIDER_BALLDONTLIE_BASE_URL.rstrip("/")
self.api_key = settings.PROVIDER_BALLDONTLIE_API_KEY
self.timeout_seconds = settings.PROVIDER_HTTP_TIMEOUT_SECONDS
self.max_retries = settings.PROVIDER_REQUEST_RETRIES
self.retry_sleep_seconds = settings.PROVIDER_REQUEST_RETRY_SLEEP
self.session = session or requests.Session()
def _headers(self) -> dict[str, str]:
headers = {"Accept": "application/json"}
if self.api_key:
headers["Authorization"] = self.api_key
return headers
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
url = f"{self.base_url}/{path.lstrip('/')}"
for attempt in range(1, self.max_retries + 1):
try:
response = self.session.get(
url,
params=params,
headers=self._headers(),
timeout=self.timeout_seconds,
)
except requests.Timeout as exc:
logger.warning(
"provider_http_timeout",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Timeout calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
except requests.RequestException as exc:
logger.warning(
"provider_http_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Network error calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
status = response.status_code
if status == 429:
retry_after = int(response.headers.get("Retry-After", "30") or "30")
logger.warning(
"provider_rate_limited",
extra={
"provider": "balldontlie",
"url": url,
"attempt": attempt,
"retry_after": retry_after,
},
)
if attempt >= self.max_retries:
raise ProviderRateLimitError(
"balldontlie rate limit reached",
retry_after_seconds=retry_after,
)
time.sleep(max(retry_after, self.retry_sleep_seconds * attempt))
continue
if status >= 500:
logger.warning(
"provider_server_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt, "status": status},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"balldontlie server error: {status}")
time.sleep(self.retry_sleep_seconds * attempt)
continue
if status >= 400:
body_preview = response.text[:240]
raise ProviderTransientError(
f"balldontlie client error status={status} path={path} body={body_preview}"
)
try:
return response.json()
except ValueError as exc:
raise ProviderTransientError(f"Invalid JSON from balldontlie for {path}") from exc
raise ProviderTransientError(f"Failed to call balldontlie path={path}")
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
page = 1
rows: list[dict[str, Any]] = []
query = dict(params or {})
while page <= page_limit:
query.update({"page": page, "per_page": per_page})
payload = self.get_json(path, params=query)
data = payload.get("data") or []
if isinstance(data, list):
rows.extend(data)
meta = payload.get("meta") or {}
next_page = meta.get("next_page")
if not next_page:
break
page = int(next_page)
return rows

View File

@ -1,16 +1,29 @@
from django.conf import settings from django.conf import settings
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.exceptions import ProviderNotFoundError from apps.providers.exceptions import ProviderNotFoundError
PROVIDER_REGISTRY = { PROVIDER_REGISTRY = {
MvpDemoProviderAdapter.namespace: MvpDemoProviderAdapter, MvpDemoProviderAdapter.namespace: MvpDemoProviderAdapter,
BalldontlieProviderAdapter.namespace: BalldontlieProviderAdapter,
} }
def get_default_provider_namespace() -> str:
if settings.PROVIDER_DEFAULT_NAMESPACE:
return settings.PROVIDER_DEFAULT_NAMESPACE
backend_map = {
"demo": settings.PROVIDER_NAMESPACE_DEMO,
"balldontlie": settings.PROVIDER_NAMESPACE_BALLDONTLIE,
}
return backend_map.get(settings.PROVIDER_BACKEND, settings.PROVIDER_NAMESPACE_DEMO)
def get_provider(namespace: str | None = None): def get_provider(namespace: str | None = None):
provider_namespace = namespace or settings.PROVIDER_DEFAULT_NAMESPACE provider_namespace = namespace or get_default_provider_namespace()
provider_cls = PROVIDER_REGISTRY.get(provider_namespace) provider_cls = PROVIDER_REGISTRY.get(provider_namespace)
if not provider_cls: if not provider_cls:
raise ProviderNotFoundError(f"Unknown provider namespace: {provider_namespace}") raise ProviderNotFoundError(f"Unknown provider namespace: {provider_namespace}")

View File

@ -0,0 +1,260 @@
from __future__ import annotations
from collections import defaultdict
from datetime import date
from typing import Any
from django.utils.text import slugify
def map_competitions() -> list[dict[str, Any]]:
return [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_active": True,
}
]
def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
team_id = row.get("id")
if not team_id:
continue
full_name = row.get("full_name") or row.get("name") or f"Team {team_id}"
abbreviation = (row.get("abbreviation") or "").strip()
mapped.append(
{
"external_id": f"team-{team_id}",
"name": full_name,
"short_name": abbreviation,
"slug": slugify(full_name) or f"team-{team_id}",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_national_team": False,
}
)
return mapped
def _map_position(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
position_map = {
"G": ("PG", "Point Guard"),
"G-F": ("SG", "Shooting Guard"),
"F-G": ("SF", "Small Forward"),
"F": ("PF", "Power Forward"),
"F-C": ("PF", "Power Forward"),
"C-F": ("C", "Center"),
"C": ("C", "Center"),
}
code_name = position_map.get(normalized)
if not code_name:
return None
return {"code": code_name[0], "name": code_name[1]}
def _map_role(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
if "G" in normalized:
return {"code": "playmaker", "name": "Playmaker"}
if "F" in normalized:
return {"code": "wing", "name": "Wing"}
if "C" in normalized:
return {"code": "big", "name": "Big"}
return None
def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
player_id = row.get("id")
if not player_id:
continue
first_name = row.get("first_name", "")
last_name = row.get("last_name", "")
full_name = f"{first_name} {last_name}".strip() or f"Player {player_id}"
position_value = row.get("position")
team = row.get("team") or {}
mapped.append(
{
"external_id": f"player-{player_id}",
"first_name": first_name,
"last_name": last_name,
"full_name": full_name,
"birth_date": None,
"nationality": {"name": "Unknown", "iso2_code": "ZZ", "iso3_code": "ZZZ"},
"nominal_position": _map_position(position_value),
"inferred_role": _map_role(position_value),
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
"current_team_external_id": f"team-{team['id']}" if team.get("id") else None,
}
)
return mapped
def map_seasons(seasons: list[int]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for season in seasons:
mapped.append(
{
"external_id": f"season-{season}",
"label": f"{season}-{season + 1}",
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"is_current": False,
}
)
return mapped
def _to_float(value: Any) -> float:
if value in (None, ""):
return 0.0
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _parse_minutes(value: Any) -> int:
if value in (None, ""):
return 0
if isinstance(value, (int, float)):
return int(value)
text = str(value)
if ":" in text:
minutes, _ = text.split(":", 1)
return int(_to_float(minutes))
return int(_to_float(text))
def _pct(value: Any, *, count: int) -> float | None:
if count <= 0:
return None
pct = _to_float(value) / count
if pct <= 1:
pct *= 100
return round(pct, 2)
def map_player_stats(
rows: list[dict[str, Any]],
*,
allowed_seasons: list[int],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
aggregates: dict[tuple[int, int, int], dict[str, Any]] = defaultdict(
lambda: {
"games": 0,
"minutes": 0,
"points": 0.0,
"rebounds": 0.0,
"assists": 0.0,
"steals": 0.0,
"blocks": 0.0,
"turnovers": 0.0,
"fg_pct_sum": 0.0,
"fg_pct_count": 0,
"three_pct_sum": 0.0,
"three_pct_count": 0,
"ft_pct_sum": 0.0,
"ft_pct_count": 0,
}
)
for row in rows:
game = row.get("game") or {}
season = game.get("season")
player = row.get("player") or {}
team = row.get("team") or {}
player_id = player.get("id")
team_id = team.get("id")
if not (season and player_id and team_id):
continue
if allowed_seasons and season not in allowed_seasons:
continue
key = (season, player_id, team_id)
agg = aggregates[key]
agg["games"] += 1
agg["minutes"] += _parse_minutes(row.get("min"))
agg["points"] += _to_float(row.get("pts"))
agg["rebounds"] += _to_float(row.get("reb"))
agg["assists"] += _to_float(row.get("ast"))
agg["steals"] += _to_float(row.get("stl"))
agg["blocks"] += _to_float(row.get("blk"))
agg["turnovers"] += _to_float(row.get("turnover"))
if row.get("fg_pct") is not None:
agg["fg_pct_sum"] += _to_float(row.get("fg_pct"))
agg["fg_pct_count"] += 1
if row.get("fg3_pct") is not None:
agg["three_pct_sum"] += _to_float(row.get("fg3_pct"))
agg["three_pct_count"] += 1
if row.get("ft_pct") is not None:
agg["ft_pct_sum"] += _to_float(row.get("ft_pct"))
agg["ft_pct_count"] += 1
player_stats: list[dict[str, Any]] = []
player_careers: list[dict[str, Any]] = []
for (season, player_id, team_id), agg in aggregates.items():
games = agg["games"] or 1
player_stats.append(
{
"external_id": f"ps-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"games_played": agg["games"],
"games_started": 0,
"minutes_played": agg["minutes"],
"points": round(agg["points"] / games, 2),
"rebounds": round(agg["rebounds"] / games, 2),
"assists": round(agg["assists"] / games, 2),
"steals": round(agg["steals"] / games, 2),
"blocks": round(agg["blocks"] / games, 2),
"turnovers": round(agg["turnovers"] / games, 2),
"fg_pct": _pct(agg["fg_pct_sum"], count=agg["fg_pct_count"]),
"three_pct": _pct(agg["three_pct_sum"], count=agg["three_pct_count"]),
"ft_pct": _pct(agg["ft_pct_sum"], count=agg["ft_pct_count"]),
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
)
player_careers.append(
{
"external_id": f"career-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"role_code": "",
"shirt_number": None,
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"notes": "Imported from balldontlie aggregated box scores",
}
)
return player_stats, player_careers

View File

@ -118,13 +118,28 @@ CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800")) CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500")) CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "mvp_demo") PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
PROVIDER_MVP_DATA_FILE = os.getenv( PROVIDER_MVP_DATA_FILE = os.getenv(
"PROVIDER_MVP_DATA_FILE", "PROVIDER_MVP_DATA_FILE",
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"), str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
) )
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/v1")
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_SEASONS = [
int(value.strip())
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
if value.strip().isdigit()
]
REST_FRAMEWORK = { REST_FRAMEWORK = {
"DEFAULT_PERMISSION_CLASSES": [ "DEFAULT_PERMISSION_CLASSES": [

View File

@ -5,3 +5,4 @@ gunicorn>=22.0,<23.0
celery[redis]>=5.4,<6.0 celery[redis]>=5.4,<6.0
redis>=5.2,<6.0 redis>=5.2,<6.0
python-dotenv>=1.0,<2.0 python-dotenv>=1.0,<2.0
requests>=2.32,<3.0

View File

@ -7,6 +7,7 @@ from apps.ingestion.models import IngestionError, IngestionRun
from apps.ingestion.services.sync import run_sync_job from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Player from apps.players.models import Player
from apps.providers.exceptions import ProviderRateLimitError from apps.providers.exceptions import ProviderRateLimitError
from apps.providers.models import ExternalMapping
from apps.stats.models import PlayerSeason, PlayerSeasonStats from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team from apps.teams.models import Team
@ -81,3 +82,128 @@ def test_run_sync_handles_rate_limit(settings):
assert IngestionError.objects.filter(ingestion_run=run).exists() assert IngestionError.objects.filter(ingestion_run=run).exists()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None) os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
class StableProvider:
def sync_all(self):
return {
"competitions": [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_active": True,
}
],
"teams": [
{
"external_id": "team-14",
"name": "Los Angeles Lakers",
"short_name": "LAL",
"slug": "los-angeles-lakers",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_national_team": False,
}
],
"seasons": [
{
"external_id": "season-2024",
"label": "2024-2025",
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"is_current": False,
}
],
"players": [
{
"external_id": "player-237",
"first_name": "LeBron",
"last_name": "James",
"full_name": "LeBron James",
"birth_date": None,
"nationality": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"nominal_position": {"code": "SF", "name": "Small Forward"},
"inferred_role": {"code": "wing", "name": "Wing"},
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
}
],
"player_stats": [
{
"external_id": "ps-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"games_played": 2,
"games_started": 0,
"minutes_played": 68,
"points": 25,
"rebounds": 9,
"assists": 8,
"steals": 1.5,
"blocks": 0.5,
"turnovers": 3.5,
"fg_pct": 55.0,
"three_pct": 45.0,
"ft_pct": 95.0,
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
],
"player_careers": [
{
"external_id": "career-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"role_code": "",
"shirt_number": None,
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"notes": "Imported from balldontlie aggregated box scores",
}
],
}
def sync_incremental(self, *, cursor: str | None = None):
payload = self.sync_all()
payload["cursor"] = cursor
return payload
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
assert counts_first == counts_second

View File

@ -0,0 +1,183 @@
from __future__ import annotations
import time
from typing import Any
import pytest
import requests
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.clients.balldontlie import BalldontlieClient
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.registry import get_default_provider_namespace, get_provider
class _FakeResponse:
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
self.status_code = status_code
self._payload = payload or {}
self.headers = headers or {}
self.text = text
def json(self):
return self._payload
class _FakeSession:
def __init__(self, responses: list[Any]):
self._responses = responses
def get(self, *args, **kwargs):
item = self._responses.pop(0)
if isinstance(item, Exception):
raise item
return item
class _FakeBalldontlieClient:
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
if path == "teams":
return {
"data": [
{
"id": 14,
"full_name": "Los Angeles Lakers",
"abbreviation": "LAL",
}
]
}
return {"data": []}
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
if path == "players":
return [
{
"id": 237,
"first_name": "LeBron",
"last_name": "James",
"position": "F",
"team": {"id": 14},
}
]
if path == "stats":
return [
{
"pts": 20,
"reb": 8,
"ast": 7,
"stl": 1,
"blk": 1,
"turnover": 3,
"fg_pct": 0.5,
"fg3_pct": 0.4,
"ft_pct": 0.9,
"min": "35:12",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
{
"pts": 30,
"reb": 10,
"ast": 9,
"stl": 2,
"blk": 0,
"turnover": 4,
"fg_pct": 0.6,
"fg3_pct": 0.5,
"ft_pct": 1.0,
"min": "33:00",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
]
return []
@pytest.mark.django_db
def test_provider_registry_backend_selection(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = ""
settings.PROVIDER_BACKEND = "demo"
assert get_default_provider_namespace() == "mvp_demo"
assert isinstance(get_provider(), MvpDemoProviderAdapter)
settings.PROVIDER_BACKEND = "balldontlie"
assert get_default_provider_namespace() == "balldontlie"
assert isinstance(get_provider(), BalldontlieProviderAdapter)
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
assert get_default_provider_namespace() == "mvp_demo"
@pytest.mark.django_db
def test_balldontlie_adapter_maps_payloads(settings):
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
payload = adapter.sync_all()
assert payload["competitions"][0]["external_id"] == "competition-nba"
assert payload["teams"][0]["external_id"] == "team-14"
assert payload["players"][0]["external_id"] == "player-237"
assert payload["seasons"][0]["external_id"] == "season-2024"
assert payload["player_stats"][0]["games_played"] == 2
assert payload["player_stats"][0]["points"] == 25.0
assert payload["player_stats"][0]["fg_pct"] == 55.0
@pytest.mark.django_db
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
_FakeResponse(status_code=200, payload={"data": []}),
]
)
client = BalldontlieClient(session=session)
payload = client.get_json("players")
assert payload == {"data": []}
@pytest.mark.django_db
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
client = BalldontlieClient(session=session)
with pytest.raises(ProviderTransientError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
]
)
client = BalldontlieClient(session=session)
with pytest.raises(ProviderRateLimitError):
client.get_json("players")