feat(providers): add balldontlie http adapter with backend selection

This commit is contained in:
Alfredo Di Stasio
2026-03-10 12:11:03 +01:00
parent f9329df64f
commit acfccbea08
12 changed files with 917 additions and 16 deletions

View File

@ -0,0 +1,147 @@
import logging
from django.conf import settings
from apps.providers.clients import BalldontlieClient
from apps.providers.interfaces import BaseProviderAdapter
from apps.providers.services.balldontlie_mappings import (
map_competitions,
map_player_stats,
map_players,
map_seasons,
map_teams,
)
logger = logging.getLogger(__name__)
class BalldontlieProviderAdapter(BaseProviderAdapter):
"""HTTP MVP adapter for balldontlie (NBA-centric data source)."""
namespace = "balldontlie"
def __init__(self, client: BalldontlieClient | None = None):
self.client = client or BalldontlieClient()
@property
def configured_seasons(self) -> list[int]:
return settings.PROVIDER_BALLDONTLIE_SEASONS
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
params = {"search": query} if query else None
rows = self.client.list_paginated(
"players",
params=params,
per_page=min(limit, settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE),
page_limit=1,
)
mapped = map_players(rows)
return mapped[offset : offset + limit]
def fetch_player(self, *, external_player_id: str) -> dict | None:
if not external_player_id.startswith("player-"):
return None
player_id = external_player_id.replace("player-", "", 1)
payload = self.client.get_json(f"players/{player_id}")
data = payload.get("data")
if not isinstance(data, dict):
return None
mapped = map_players([data])
return mapped[0] if mapped else None
def fetch_players(self) -> list[dict]:
rows = self.client.list_paginated(
"players",
per_page=settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT,
)
return map_players(rows)
def fetch_competitions(self) -> list[dict]:
return map_competitions()
def fetch_teams(self) -> list[dict]:
payload = self.client.get_json("teams")
rows = payload.get("data") or []
return map_teams(rows if isinstance(rows, list) else [])
def fetch_seasons(self) -> list[dict]:
return map_seasons(self.configured_seasons)
def fetch_player_stats(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_stats
def fetch_player_careers(self) -> list[dict]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
_, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_careers
def sync_all(self) -> dict:
logger.info(
"provider_sync_start",
extra={"provider": self.namespace, "seasons": self.configured_seasons},
)
competitions = self.fetch_competitions()
teams = self.fetch_teams()
seasons = self.fetch_seasons()
players = self.fetch_players()
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
logger.info(
"provider_sync_complete",
extra={
"provider": self.namespace,
"competitions": len(competitions),
"teams": len(teams),
"seasons": len(seasons),
"players": len(players),
"player_stats": len(player_stats),
"player_careers": len(player_careers),
},
)
return {
"players": players,
"competitions": competitions,
"teams": teams,
"seasons": seasons,
"player_stats": player_stats,
"player_careers": player_careers,
"cursor": None,
}
def sync_incremental(self, *, cursor: str | None = None) -> dict:
payload = self.sync_all()
payload["cursor"] = cursor
return payload

View File

@ -0,0 +1,3 @@
from .balldontlie import BalldontlieClient
__all__ = ["BalldontlieClient"]

View File

@ -0,0 +1,128 @@
import logging
import time
from typing import Any
import requests
from django.conf import settings
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
logger = logging.getLogger(__name__)
class BalldontlieClient:
"""HTTP client for balldontlie with timeout/retry/rate-limit handling."""
def __init__(self, session: requests.Session | None = None):
self.base_url = settings.PROVIDER_BALLDONTLIE_BASE_URL.rstrip("/")
self.api_key = settings.PROVIDER_BALLDONTLIE_API_KEY
self.timeout_seconds = settings.PROVIDER_HTTP_TIMEOUT_SECONDS
self.max_retries = settings.PROVIDER_REQUEST_RETRIES
self.retry_sleep_seconds = settings.PROVIDER_REQUEST_RETRY_SLEEP
self.session = session or requests.Session()
def _headers(self) -> dict[str, str]:
headers = {"Accept": "application/json"}
if self.api_key:
headers["Authorization"] = self.api_key
return headers
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
url = f"{self.base_url}/{path.lstrip('/')}"
for attempt in range(1, self.max_retries + 1):
try:
response = self.session.get(
url,
params=params,
headers=self._headers(),
timeout=self.timeout_seconds,
)
except requests.Timeout as exc:
logger.warning(
"provider_http_timeout",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Timeout calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
except requests.RequestException as exc:
logger.warning(
"provider_http_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"Network error calling balldontlie: {url}") from exc
time.sleep(self.retry_sleep_seconds * attempt)
continue
status = response.status_code
if status == 429:
retry_after = int(response.headers.get("Retry-After", "30") or "30")
logger.warning(
"provider_rate_limited",
extra={
"provider": "balldontlie",
"url": url,
"attempt": attempt,
"retry_after": retry_after,
},
)
if attempt >= self.max_retries:
raise ProviderRateLimitError(
"balldontlie rate limit reached",
retry_after_seconds=retry_after,
)
time.sleep(max(retry_after, self.retry_sleep_seconds * attempt))
continue
if status >= 500:
logger.warning(
"provider_server_error",
extra={"provider": "balldontlie", "url": url, "attempt": attempt, "status": status},
)
if attempt >= self.max_retries:
raise ProviderTransientError(f"balldontlie server error: {status}")
time.sleep(self.retry_sleep_seconds * attempt)
continue
if status >= 400:
body_preview = response.text[:240]
raise ProviderTransientError(
f"balldontlie client error status={status} path={path} body={body_preview}"
)
try:
return response.json()
except ValueError as exc:
raise ProviderTransientError(f"Invalid JSON from balldontlie for {path}") from exc
raise ProviderTransientError(f"Failed to call balldontlie path={path}")
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
page = 1
rows: list[dict[str, Any]] = []
query = dict(params or {})
while page <= page_limit:
query.update({"page": page, "per_page": per_page})
payload = self.get_json(path, params=query)
data = payload.get("data") or []
if isinstance(data, list):
rows.extend(data)
meta = payload.get("meta") or {}
next_page = meta.get("next_page")
if not next_page:
break
page = int(next_page)
return rows

View File

@ -1,16 +1,29 @@
from django.conf import settings
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.exceptions import ProviderNotFoundError
PROVIDER_REGISTRY = {
MvpDemoProviderAdapter.namespace: MvpDemoProviderAdapter,
BalldontlieProviderAdapter.namespace: BalldontlieProviderAdapter,
}
def get_default_provider_namespace() -> str:
if settings.PROVIDER_DEFAULT_NAMESPACE:
return settings.PROVIDER_DEFAULT_NAMESPACE
backend_map = {
"demo": settings.PROVIDER_NAMESPACE_DEMO,
"balldontlie": settings.PROVIDER_NAMESPACE_BALLDONTLIE,
}
return backend_map.get(settings.PROVIDER_BACKEND, settings.PROVIDER_NAMESPACE_DEMO)
def get_provider(namespace: str | None = None):
provider_namespace = namespace or settings.PROVIDER_DEFAULT_NAMESPACE
provider_namespace = namespace or get_default_provider_namespace()
provider_cls = PROVIDER_REGISTRY.get(provider_namespace)
if not provider_cls:
raise ProviderNotFoundError(f"Unknown provider namespace: {provider_namespace}")

View File

@ -0,0 +1,260 @@
from __future__ import annotations
from collections import defaultdict
from datetime import date
from typing import Any
from django.utils.text import slugify
def map_competitions() -> list[dict[str, Any]]:
return [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_active": True,
}
]
def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
team_id = row.get("id")
if not team_id:
continue
full_name = row.get("full_name") or row.get("name") or f"Team {team_id}"
abbreviation = (row.get("abbreviation") or "").strip()
mapped.append(
{
"external_id": f"team-{team_id}",
"name": full_name,
"short_name": abbreviation,
"slug": slugify(full_name) or f"team-{team_id}",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"is_national_team": False,
}
)
return mapped
def _map_position(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
position_map = {
"G": ("PG", "Point Guard"),
"G-F": ("SG", "Shooting Guard"),
"F-G": ("SF", "Small Forward"),
"F": ("PF", "Power Forward"),
"F-C": ("PF", "Power Forward"),
"C-F": ("C", "Center"),
"C": ("C", "Center"),
}
code_name = position_map.get(normalized)
if not code_name:
return None
return {"code": code_name[0], "name": code_name[1]}
def _map_role(position: str | None) -> dict[str, str] | None:
if not position:
return None
normalized = position.upper().strip()
if "G" in normalized:
return {"code": "playmaker", "name": "Playmaker"}
if "F" in normalized:
return {"code": "wing", "name": "Wing"}
if "C" in normalized:
return {"code": "big", "name": "Big"}
return None
def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for row in rows:
player_id = row.get("id")
if not player_id:
continue
first_name = row.get("first_name", "")
last_name = row.get("last_name", "")
full_name = f"{first_name} {last_name}".strip() or f"Player {player_id}"
position_value = row.get("position")
team = row.get("team") or {}
mapped.append(
{
"external_id": f"player-{player_id}",
"first_name": first_name,
"last_name": last_name,
"full_name": full_name,
"birth_date": None,
"nationality": {"name": "Unknown", "iso2_code": "ZZ", "iso3_code": "ZZZ"},
"nominal_position": _map_position(position_value),
"inferred_role": _map_role(position_value),
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
"current_team_external_id": f"team-{team['id']}" if team.get("id") else None,
}
)
return mapped
def map_seasons(seasons: list[int]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for season in seasons:
mapped.append(
{
"external_id": f"season-{season}",
"label": f"{season}-{season + 1}",
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"is_current": False,
}
)
return mapped
def _to_float(value: Any) -> float:
if value in (None, ""):
return 0.0
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _parse_minutes(value: Any) -> int:
if value in (None, ""):
return 0
if isinstance(value, (int, float)):
return int(value)
text = str(value)
if ":" in text:
minutes, _ = text.split(":", 1)
return int(_to_float(minutes))
return int(_to_float(text))
def _pct(value: Any, *, count: int) -> float | None:
if count <= 0:
return None
pct = _to_float(value) / count
if pct <= 1:
pct *= 100
return round(pct, 2)
def map_player_stats(
rows: list[dict[str, Any]],
*,
allowed_seasons: list[int],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
aggregates: dict[tuple[int, int, int], dict[str, Any]] = defaultdict(
lambda: {
"games": 0,
"minutes": 0,
"points": 0.0,
"rebounds": 0.0,
"assists": 0.0,
"steals": 0.0,
"blocks": 0.0,
"turnovers": 0.0,
"fg_pct_sum": 0.0,
"fg_pct_count": 0,
"three_pct_sum": 0.0,
"three_pct_count": 0,
"ft_pct_sum": 0.0,
"ft_pct_count": 0,
}
)
for row in rows:
game = row.get("game") or {}
season = game.get("season")
player = row.get("player") or {}
team = row.get("team") or {}
player_id = player.get("id")
team_id = team.get("id")
if not (season and player_id and team_id):
continue
if allowed_seasons and season not in allowed_seasons:
continue
key = (season, player_id, team_id)
agg = aggregates[key]
agg["games"] += 1
agg["minutes"] += _parse_minutes(row.get("min"))
agg["points"] += _to_float(row.get("pts"))
agg["rebounds"] += _to_float(row.get("reb"))
agg["assists"] += _to_float(row.get("ast"))
agg["steals"] += _to_float(row.get("stl"))
agg["blocks"] += _to_float(row.get("blk"))
agg["turnovers"] += _to_float(row.get("turnover"))
if row.get("fg_pct") is not None:
agg["fg_pct_sum"] += _to_float(row.get("fg_pct"))
agg["fg_pct_count"] += 1
if row.get("fg3_pct") is not None:
agg["three_pct_sum"] += _to_float(row.get("fg3_pct"))
agg["three_pct_count"] += 1
if row.get("ft_pct") is not None:
agg["ft_pct_sum"] += _to_float(row.get("ft_pct"))
agg["ft_pct_count"] += 1
player_stats: list[dict[str, Any]] = []
player_careers: list[dict[str, Any]] = []
for (season, player_id, team_id), agg in aggregates.items():
games = agg["games"] or 1
player_stats.append(
{
"external_id": f"ps-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"games_played": agg["games"],
"games_started": 0,
"minutes_played": agg["minutes"],
"points": round(agg["points"] / games, 2),
"rebounds": round(agg["rebounds"] / games, 2),
"assists": round(agg["assists"] / games, 2),
"steals": round(agg["steals"] / games, 2),
"blocks": round(agg["blocks"] / games, 2),
"turnovers": round(agg["turnovers"] / games, 2),
"fg_pct": _pct(agg["fg_pct_sum"], count=agg["fg_pct_count"]),
"three_pct": _pct(agg["three_pct_sum"], count=agg["three_pct_count"]),
"ft_pct": _pct(agg["ft_pct_sum"], count=agg["ft_pct_count"]),
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
)
player_careers.append(
{
"external_id": f"career-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"season_external_id": f"season-{season}",
"role_code": "",
"shirt_number": None,
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"notes": "Imported from balldontlie aggregated box scores",
}
)
return player_stats, player_careers