feat(providers): add balldontlie http adapter with backend selection

This commit is contained in:
Alfredo Di Stasio
2026-03-10 12:11:03 +01:00
parent f9329df64f
commit acfccbea08
12 changed files with 917 additions and 16 deletions

View File

@ -29,10 +29,21 @@ AUTO_COLLECTSTATIC=1
GUNICORN_WORKERS=3
# Providers / ingestion
PROVIDER_DEFAULT_NAMESPACE=mvp_demo
PROVIDER_BACKEND=demo
PROVIDER_NAMESPACE_DEMO=mvp_demo
PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie
PROVIDER_DEFAULT_NAMESPACE=
PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
PROVIDER_REQUEST_RETRIES=3
PROVIDER_REQUEST_RETRY_SLEEP=1
PROVIDER_HTTP_TIMEOUT_SECONDS=10
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/v1
PROVIDER_BALLDONTLIE_API_KEY=
PROVIDER_BALLDONTLIE_SEASONS=2024
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500
API_THROTTLE_ANON=100/hour

View File

@ -155,8 +155,8 @@ Default auth routes:
- Open `/admin/` -> `IngestionRun`
- Use admin actions:
- `Queue full MVP sync`
- `Queue incremental MVP sync`
- `Queue full sync (default provider)`
- `Queue incremental sync (default provider)`
- `Retry selected ingestion runs`
### Trigger from shell (manual)
@ -167,7 +167,7 @@ docker compose exec web python manage.py shell
```python
from apps.ingestion.tasks import trigger_full_sync
trigger_full_sync.delay(provider_namespace="mvp_demo")
trigger_full_sync.delay(provider_namespace="balldontlie")
```
### Logs and diagnostics
@ -176,6 +176,16 @@ trigger_full_sync.delay(provider_namespace="mvp_demo")
- Structured error records: `IngestionError`
- Provider entity mappings + diagnostic payload snippets: `ExternalMapping`
## Provider Backend Selection
Provider backend is selected via environment variables:
- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`)
- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`)
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
## GitFlow Workflow
GitFlow is required in this repository:

View File

@ -1,6 +1,8 @@
from django.contrib import admin
from django.contrib import messages
from apps.providers.registry import get_default_provider_namespace
from .models import IngestionError, IngestionRun
from .tasks import trigger_full_sync, trigger_incremental_sync
@ -41,20 +43,22 @@ class IngestionRunAdmin(admin.ModelAdmin):
"created_at",
)
actions = (
"enqueue_full_sync_mvp",
"enqueue_incremental_sync_mvp",
"enqueue_full_sync_default_provider",
"enqueue_incremental_sync_default_provider",
"retry_selected_runs",
)
@admin.action(description="Queue full MVP sync")
def enqueue_full_sync_mvp(self, request, queryset):
trigger_full_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id)
self.message_user(request, "Queued full MVP sync task.", level=messages.SUCCESS)
@admin.action(description="Queue full sync (default provider)")
def enqueue_full_sync_default_provider(self, request, queryset):
provider_namespace = get_default_provider_namespace()
trigger_full_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
self.message_user(request, f"Queued full sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Queue incremental MVP sync")
def enqueue_incremental_sync_mvp(self, request, queryset):
trigger_incremental_sync.delay(provider_namespace="mvp_demo", triggered_by_id=request.user.id)
self.message_user(request, "Queued incremental MVP sync task.", level=messages.SUCCESS)
@admin.action(description="Queue incremental sync (default provider)")
def enqueue_incremental_sync_default_provider(self, request, queryset):
provider_namespace = get_default_provider_namespace()
trigger_incremental_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
self.message_user(request, f"Queued incremental sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Retry selected ingestion runs")
def retry_selected_runs(self, request, queryset):

View File

@ -0,0 +1,147 @@
import logging
from django.conf import settings
from apps.providers.clients import BalldontlieClient
from apps.providers.interfaces import BaseProviderAdapter
from apps.providers.services.balldontlie_mappings import (
map_competitions,
map_player_stats,
map_players,
map_seasons,
map_teams,
)
logger = logging.getLogger(__name__)
class BalldontlieProviderAdapter(BaseProviderAdapter):
"""HTTP MVP adapter for balldontlie (NBA-centric data source)."""
namespace = "balldontlie"
def __init__(self, client: BalldontlieClient | None = None):
self.client = client or BalldontlieClient()
@property
def configured_seasons(self) -> list[int]:
return settings.PROVIDER_BALLDONTLIE_SEASONS
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
params = {"search": query} if query else None
rows = self.client.list_paginated(
"players",
params=params,
per_page=min(limit, settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE),
page_limit=1,
)
mapped = map_players(rows)
return mapped[offset : offset + limit]
def fetch_player(self, *, external_player_id: str) -> dict | None:
if not external_player_id.startswith("player-"):
return None
player_id = external_player_id.replace("player-", "", 1)
payload = self.client.get_json(f"players/{player_id}")
data = payload.get("data")
if not isinstance(data, dict):
return None
mapped = map_players([data])
return mapped[0] if mapped else None
def fetch_players(self) -> list[dict]:
rows = self.client.list_paginated(
"players",
per_page=settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT,
)
return map_players(rows)
def fetch_competitions(self) -> list[dict]:
return map_competitions()
def fetch_teams(self) -> list[dict]:
payload = self.client.get_json("teams")
rows = payload.get("data") or []
return map_teams(rows if isinstance(rows, list) else [])
def fetch_seasons(self) -> list[dict]:
return map_seasons(self.configured_seasons)
def fetch_player_stats(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_stats
def fetch_player_careers(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
_, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_careers
def sync_all(self) -> dict:
logger.info(
"provider_sync_start",
extra={"provider": self.namespace, "seasons": self.configured_seasons},
)
competitions = self.fetch_competitions()
teams = self.fetch_teams()
seasons = self.fetch_seasons()
players = self.fetch_players()
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
logger.info(
"provider_sync_complete",
extra={
"provider": self.namespace,
"competitions": len(competitions),
"teams": len(teams),
"seasons": len(seasons),
"players": len(players),
"player_stats": len(player_stats),
"player_careers": len(player_careers),
},
)
return {
"players": players,
"competitions": competitions,
"teams": teams,
"seasons": seasons,
"player_stats": player_stats,
"player_careers": player_careers,
"cursor": None,
}
def sync_incremental(self, *, cursor: str | None = None) -> dict:
payload = self.sync_all()
payload["cursor"] = cursor
return payload

View File

@ -0,0 +1,3 @@
from .balldontlie import BalldontlieClient
__all__ = ["BalldontlieClient"]

View File

@ -0,0 +1,128 @@
import logging
import time
from typing import Any
import requests
from django.conf import settings
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
logger = logging.getLogger(__name__)
class BalldontlieClient:
"""HTTP client for balldontlie with timeout/retry/rate-limit handling."""
def __init__(self, session: requests.Session | None = None):
self.base_url = settings.PROVIDER_BALLDONTLIE_BASE_URL.rstrip("/")
self.api_key = settings.PROVIDER_BALLDONTLIE_API_KEY
self.timeout_seconds = settings.PROVIDER_HTTP_TIMEOUT_SECONDS
self.max_retries = settings.PROVIDER_REQUEST_RETRIES
self.retry_sleep_seconds = settings.PROVIDER_REQUEST_RETRY_SLEEP
self.session = session or requests.Session()
def _headers(self) -> dict[str, str]:
headers = {"Accept": "application/json"}
if self.api_key:
headers["Authorization"] = self.api_key
return headers
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
url = f"{self.base_url}/{path.lstrip('/')}"
for attempt in range(1, self.max_retries + 1):
try:
response = self.session.get(
url,
params=params,
headers=self._headers(),
timeout=self.timeout_seconds,
)
except requests.Timeout as exc:
logger.warning(
"provider_http_timeout",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Timeout calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
except requests.RequestException as exc:
logger.warning(
"provider_http_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Network error calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
status = response.status_code
if status == 429:
retry_after = int(response.headers.get("Retry-After", "30") or "30")
logger.warning(
"provider_rate_limited",
extra={
"provider": "balldontlie",
"url": url,
"attempt": attempt,
"retry_after": retry_after,
},
)
if attempt >= self.max_retries:
raise ProviderRateLimitError(
"balldontlie rate limit reached",
retry_after_seconds=retry_after,
)
time.sleep(max(retry_after, self.retry_sleep_seconds * attempt))
continue
if status >= 500:
logger.warning(
"provider_server_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt, "status": status},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"balldontlie server error: {status}")
time.sleep(self.retry_sleep_seconds * attempt)
continue
if status >= 400:
body_preview = response.text[:240]
raise ProviderTransientError(
f"balldontlie client error status={status} path={path} body={body_preview}"
)
try:
return response.json()
except ValueError as exc:
raise ProviderTransientError(f"Invalid JSON from balldontlie for {path}") from exc
raise ProviderTransientError(f"Failed to call balldontlie path={path}")
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
page = 1
rows: list[dict[str, Any]] = []
query = dict(params or {})
while page <= page_limit:
query.update({"page": page, "per_page": per_page})
payload = self.get_json(path, params=query)
data = payload.get("data") or []
if isinstance(data, list):
rows.extend(data)
meta = payload.get("meta") or {}
next_page = meta.get("next_page")
if not next_page:
break
page = int(next_page)
return rows

View File

@ -1,16 +1,29 @@
from django.conf import settings
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.exceptions import ProviderNotFoundError
PROVIDER_REGISTRY = {
MvpDemoProviderAdapter.namespace: MvpDemoProviderAdapter,
BalldontlieProviderAdapter.namespace: BalldontlieProviderAdapter,
}
def get_default_provider_namespace() -> str:
if settings.PROVIDER_DEFAULT_NAMESPACE:
return settings.PROVIDER_DEFAULT_NAMESPACE
backend_map = {
"demo": settings.PROVIDER_NAMESPACE_DEMO,
"balldontlie": settings.PROVIDER_NAMESPACE_BALLDONTLIE,
}
return backend_map.get(settings.PROVIDER_BACKEND, settings.PROVIDER_NAMESPACE_DEMO)
def get_provider(namespace: str | None = None):
provider_namespace = namespace or settings.PROVIDER_DEFAULT_NAMESPACE
provider_namespace = namespace or get_default_provider_namespace()
provider_cls = PROVIDER_REGISTRY.get(provider_namespace)
if not provider_cls:
raise ProviderNotFoundError(f"Unknown provider namespace: {provider_namespace}")

View File

@ -0,0 +1,260 @@
from __future__ import annotations
from collections import defaultdict
from datetime import date
from typing import Any
from django.utils.text import slugify
def map_competitions() -> list[dict[str, Any]]:
return [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_active": True,
}
]
def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
team_id = row.get("id")
if not team_id:
continue
full_name = row.get("full_name") or row.get("name") or f"Team {team_id}"
abbreviation = (row.get("abbreviation") or "").strip()
mapped.append(
{
"external_id": f"team-{team_id}",
"name": full_name,
"short_name": abbreviation,
"slug": slugify(full_name) or f"team-{team_id}",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_national_team": False,
}
)
return mapped
def _map_position(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
position_map = {
"G": ("PG", "Point Guard"),
"G-F": ("SG", "Shooting Guard"),
"F-G": ("SF", "Small Forward"),
"F": ("PF", "Power Forward"),
"F-C": ("PF", "Power Forward"),
"C-F": ("C", "Center"),
"C": ("C", "Center"),
}
code_name = position_map.get(normalized)
if not code_name:
return None
return {"code": code_name[0], "name": code_name[1]}
def _map_role(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
if "G" in normalized:
return {"code": "playmaker", "name": "Playmaker"}
if "F" in normalized:
return {"code": "wing", "name": "Wing"}
if "C" in normalized:
return {"code": "big", "name": "Big"}
return None
def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
player_id = row.get("id")
if not player_id:
continue
first_name = row.get("first_name", "")
last_name = row.get("last_name", "")
full_name = f"{first_name} {last_name}".strip() or f"Player {player_id}"
position_value = row.get("position")
team = row.get("team") or {}
mapped.append(
{
"external_id": f"player-{player_id}",
"first_name": first_name,
"last_name": last_name,
"full_name": full_name,
"birth_date": None,
"nationality": {"name": "Unknown", "iso2_code": "ZZ", "iso3_code": "ZZZ"},
"nominal_position": _map_position(position_value),
"inferred_role": _map_role(position_value),
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
"current_team_external_id": f"team-{team['id']}" if team.get("id") else None,
}
)
return mapped
def map_seasons(seasons: list[int]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for season in seasons:
mapped.append(
{
"external_id": f"season-{season}",
"label": f"{season}-{season + 1}",
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"is_current": False,
}
)
return mapped
def _to_float(value: Any) -> float:
if value in (None, ""):
return 0.0
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _parse_minutes(value: Any) -> int:
if value in (None, ""):
return 0
if isinstance(value, (int, float)):
return int(value)
text = str(value)
if ":" in text:
minutes, _ = text.split(":", 1)
return int(_to_float(minutes))
return int(_to_float(text))
def _pct(value: Any, *, count: int) -> float | None:
if count <= 0:
return None
pct = _to_float(value) / count
if pct <= 1:
pct *= 100
return round(pct, 2)
def map_player_stats(
rows: list[dict[str, Any]],
*,
allowed_seasons: list[int],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
aggregates: dict[tuple[int, int, int], dict[str, Any]] = defaultdict(
lambda: {
"games": 0,
"minutes": 0,
"points": 0.0,
"rebounds": 0.0,
"assists": 0.0,
"steals": 0.0,
"blocks": 0.0,
"turnovers": 0.0,
"fg_pct_sum": 0.0,
"fg_pct_count": 0,
"three_pct_sum": 0.0,
"three_pct_count": 0,
"ft_pct_sum": 0.0,
"ft_pct_count": 0,
}
)
for row in rows:
game = row.get("game") or {}
season = game.get("season")
player = row.get("player") or {}
team = row.get("team") or {}
player_id = player.get("id")
team_id = team.get("id")
if not (season and player_id and team_id):
continue
if allowed_seasons and season not in allowed_seasons:
continue
key = (season, player_id, team_id)
agg = aggregates[key]
agg["games"] += 1
agg["minutes"] += _parse_minutes(row.get("min"))
agg["points"] += _to_float(row.get("pts"))
agg["rebounds"] += _to_float(row.get("reb"))
agg["assists"] += _to_float(row.get("ast"))
agg["steals"] += _to_float(row.get("stl"))
agg["blocks"] += _to_float(row.get("blk"))
agg["turnovers"] += _to_float(row.get("turnover"))
if row.get("fg_pct") is not None:
agg["fg_pct_sum"] += _to_float(row.get("fg_pct"))
agg["fg_pct_count"] += 1
if row.get("fg3_pct") is not None:
agg["three_pct_sum"] += _to_float(row.get("fg3_pct"))
agg["three_pct_count"] += 1
if row.get("ft_pct") is not None:
agg["ft_pct_sum"] += _to_float(row.get("ft_pct"))
agg["ft_pct_count"] += 1
player_stats: list[dict[str, Any]] = []
player_careers: list[dict[str, Any]] = []
for (season, player_id, team_id), agg in aggregates.items():
games = agg["games"] or 1
player_stats.append(
{
"external_id": f"ps-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"games_played": agg["games"],
"games_started": 0,
"minutes_played": agg["minutes"],
"points": round(agg["points"] / games, 2),
"rebounds": round(agg["rebounds"] / games, 2),
"assists": round(agg["assists"] / games, 2),
"steals": round(agg["steals"] / games, 2),
"blocks": round(agg["blocks"] / games, 2),
"turnovers": round(agg["turnovers"] / games, 2),
"fg_pct": _pct(agg["fg_pct_sum"], count=agg["fg_pct_count"]),
"three_pct": _pct(agg["three_pct_sum"], count=agg["three_pct_count"]),
"ft_pct": _pct(agg["ft_pct_sum"], count=agg["ft_pct_count"]),
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
)
player_careers.append(
{
"external_id": f"career-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"role_code": "",
"shirt_number": None,
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"notes": "Imported from balldontlie aggregated box scores",
}
)
return player_stats, player_careers

View File

@ -118,13 +118,28 @@ CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "mvp_demo")
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
PROVIDER_MVP_DATA_FILE = os.getenv(
"PROVIDER_MVP_DATA_FILE",
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
)
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/v1")
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_SEASONS = [
int(value.strip())
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
if value.strip().isdigit()
]
REST_FRAMEWORK = {
"DEFAULT_PERMISSION_CLASSES": [

View File

@ -5,3 +5,4 @@ gunicorn>=22.0,<23.0
celery[redis]>=5.4,<6.0
redis>=5.2,<6.0
python-dotenv>=1.0,<2.0
requests>=2.32,<3.0

View File

@ -7,6 +7,7 @@ from apps.ingestion.models import IngestionError, IngestionRun
from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Player
from apps.providers.exceptions import ProviderRateLimitError
from apps.providers.models import ExternalMapping
from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team
@ -81,3 +82,128 @@ def test_run_sync_handles_rate_limit(settings):
assert IngestionError.objects.filter(ingestion_run=run).exists()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
class StableProvider:
def sync_all(self):
return {
"competitions": [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_active": True,
}
],
"teams": [
{
"external_id": "team-14",
"name": "Los Angeles Lakers",
"short_name": "LAL",
"slug": "los-angeles-lakers",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_national_team": False,
}
],
"seasons": [
{
"external_id": "season-2024",
"label": "2024-2025",
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"is_current": False,
}
],
"players": [
{
"external_id": "player-237",
"first_name": "LeBron",
"last_name": "James",
"full_name": "LeBron James",
"birth_date": None,
"nationality": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"nominal_position": {"code": "SF", "name": "Small Forward"},
"inferred_role": {"code": "wing", "name": "Wing"},
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
}
],
"player_stats": [
{
"external_id": "ps-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"games_played": 2,
"games_started": 0,
"minutes_played": 68,
"points": 25,
"rebounds": 9,
"assists": 8,
"steals": 1.5,
"blocks": 0.5,
"turnovers": 3.5,
"fg_pct": 55.0,
"three_pct": 45.0,
"ft_pct": 95.0,
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
],
"player_careers": [
{
"external_id": "career-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"role_code": "",
"shirt_number": None,
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"notes": "Imported from balldontlie aggregated box scores",
}
],
}
def sync_incremental(self, *, cursor: str | None = None):
payload = self.sync_all()
payload["cursor"] = cursor
return payload
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
assert counts_first == counts_second

View File

@ -0,0 +1,183 @@
from __future__ import annotations
import time
from typing import Any
import pytest
import requests
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.clients.balldontlie import BalldontlieClient
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.registry import get_default_provider_namespace, get_provider
class _FakeResponse:
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
self.status_code = status_code
self._payload = payload or {}
self.headers = headers or {}
self.text = text
def json(self):
return self._payload
class _FakeSession:
def __init__(self, responses: list[Any]):
self._responses = responses
def get(self, *args, **kwargs):
item = self._responses.pop(0)
if isinstance(item, Exception):
raise item
return item
class _FakeBalldontlieClient:
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
if path == "teams":
return {
"data": [
{
"id": 14,
"full_name": "Los Angeles Lakers",
"abbreviation": "LAL",
}
]
}
return {"data": []}
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
if path == "players":
return [
{
"id": 237,
"first_name": "LeBron",
"last_name": "James",
"position": "F",
"team": {"id": 14},
}
]
if path == "stats":
return [
{
"pts": 20,
"reb": 8,
"ast": 7,
"stl": 1,
"blk": 1,
"turnover": 3,
"fg_pct": 0.5,
"fg3_pct": 0.4,
"ft_pct": 0.9,
"min": "35:12",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
{
"pts": 30,
"reb": 10,
"ast": 9,
"stl": 2,
"blk": 0,
"turnover": 4,
"fg_pct": 0.6,
"fg3_pct": 0.5,
"ft_pct": 1.0,
"min": "33:00",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
]
return []
@pytest.mark.django_db
def test_provider_registry_backend_selection(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = ""
settings.PROVIDER_BACKEND = "demo"
assert get_default_provider_namespace() == "mvp_demo"
assert isinstance(get_provider(), MvpDemoProviderAdapter)
settings.PROVIDER_BACKEND = "balldontlie"
assert get_default_provider_namespace() == "balldontlie"
assert isinstance(get_provider(), BalldontlieProviderAdapter)
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
assert get_default_provider_namespace() == "mvp_demo"
@pytest.mark.django_db
def test_balldontlie_adapter_maps_payloads(settings):
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
payload = adapter.sync_all()
assert payload["competitions"][0]["external_id"] == "competition-nba"
assert payload["teams"][0]["external_id"] == "team-14"
assert payload["players"][0]["external_id"] == "player-237"
assert payload["seasons"][0]["external_id"] == "season-2024"
assert payload["player_stats"][0]["games_played"] == 2
assert payload["player_stats"][0]["points"] == 25.0
assert payload["player_stats"][0]["fg_pct"] == 55.0
@pytest.mark.django_db
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
_FakeResponse(status_code=200, payload={"data": []}),
]
)
client = BalldontlieClient(session=session)
payload = client.get_json("players")
assert payload == {"data": []}
@pytest.mark.django_db
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
client = BalldontlieClient(session=session)
with pytest.raises(ProviderTransientError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
]
)
client = BalldontlieClient(session=session)
with pytest.raises(ProviderRateLimitError):
client.get_json("players")