Improve balldontlie query flow and dev container write stability

This commit is contained in:
Alfredo Di Stasio
2026-03-12 11:13:05 +01:00
parent e0e75cfb0c
commit c9dd10a438
10 changed files with 196 additions and 46 deletions

View File

@ -32,6 +32,9 @@ AUTO_APPLY_MIGRATIONS=1
AUTO_COLLECTSTATIC=1
AUTO_BUILD_TAILWIND=1
GUNICORN_WORKERS=3
# Development container UID/GID for bind-mounted source write permissions.
LOCAL_UID=1000
LOCAL_GID=1000
# Production-minded security toggles
DJANGO_SECURE_SSL_REDIRECT=1
@ -56,14 +59,18 @@ PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
PROVIDER_REQUEST_RETRIES=3
PROVIDER_REQUEST_RETRY_SLEEP=1
PROVIDER_HTTP_TIMEOUT_SECONDS=10
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/v1
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/nba/v1
PROVIDER_BALLDONTLIE_API_KEY=
# NBA-centric MVP provider seasons to ingest (comma-separated years).
PROVIDER_BALLDONTLIE_SEASONS=2024
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_GAMES_PER_PAGE=100
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
# When 0, a 401 on stats endpoint degrades to players/teams-only sync.
PROVIDER_BALLDONTLIE_STATS_STRICT=0
CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500
INGESTION_SCHEDULE_ENABLED=0

View File

@ -73,6 +73,7 @@ docker compose up --build
```
This starts the development-oriented topology (source bind mounts enabled).
In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs).
3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually:
@ -118,6 +119,7 @@ Notes:
- In release-style mode, `web`, `celery_worker`, and `celery_beat` run from the built image filesystem.
- `tailwind` is marked as `dev` profile in release override and is not started unless `--profile dev` is used.
- `nginx`, `postgres`, and `redis` service naming remains unchanged.
- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`.
## Setup and Run Notes
@ -192,6 +194,12 @@ Build Tailwind once:
docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build'
```
If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once:
```bash
sudo chown -R "$(id -u):$(id -g)" static
```
Run Tailwind in watch mode during development:
```bash
@ -317,6 +325,8 @@ Provider backend is selected via environment variables:
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
The adapter uses balldontlie getting-started query style (`/nba/v1`, cursor pagination, stats by `game_ids[]`).
Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized.
Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md).

View File

@ -1,4 +1,5 @@
import logging
from itertools import islice
from django.conf import settings
@ -13,6 +14,7 @@ from apps.providers.contracts import (
TeamPayload,
)
from apps.providers.interfaces import BaseProviderAdapter
from apps.providers.exceptions import ProviderUnauthorizedError
from apps.providers.services.balldontlie_mappings import (
map_competitions,
map_player_stats,
@ -36,6 +38,66 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
def configured_seasons(self) -> list[int]:
return settings.PROVIDER_BALLDONTLIE_SEASONS
@staticmethod
def _chunked(values: list[int], size: int):
iterator = iter(values)
while True:
chunk = list(islice(iterator, size))
if not chunk:
return
yield chunk
def _fetch_game_ids(self) -> list[int]:
game_ids: set[int] = set()
for season in self.configured_seasons:
rows = self.client.list_paginated(
"games",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_GAMES_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT,
)
for row in rows:
game_id = row.get("id")
if isinstance(game_id, int):
game_ids.add(game_id)
return sorted(game_ids)
def _fetch_stats_rows(self) -> list[dict]:
game_ids = self._fetch_game_ids()
if not game_ids:
logger.info(
"provider_stats_skipped_no_games",
extra={"provider": self.namespace, "seasons": self.configured_seasons},
)
return []
all_rows: list[dict] = []
try:
# Use game_ids[] query as documented in balldontlie getting-started flow.
for game_id_chunk in self._chunked(game_ids, 25):
rows = self.client.list_paginated(
"stats",
params={"game_ids[]": game_id_chunk},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
except ProviderUnauthorizedError as exc:
if settings.PROVIDER_BALLDONTLIE_STATS_STRICT:
raise
logger.warning(
"provider_stats_unauthorized_degraded",
extra={
"provider": self.namespace,
"path": exc.path,
"status_code": exc.status_code,
"detail": exc.detail,
},
)
return []
return all_rows
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[PlayerPayload]:
params = {"search": query} if query else None
rows = self.client.list_paginated(
@ -78,30 +140,12 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
return map_seasons(self.configured_seasons)
def fetch_player_stats(self) -> list[PlayerStatsPayload]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
all_rows = self._fetch_stats_rows()
player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_stats
def fetch_player_careers(self) -> list[PlayerCareerPayload]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
all_rows = self._fetch_stats_rows()
_, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_careers
@ -115,16 +159,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
seasons = self.fetch_seasons()
players = self.fetch_players()
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
"stats",
params={"seasons[]": season},
per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE,
page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT,
)
all_rows.extend(rows)
all_rows = self._fetch_stats_rows()
player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
logger.info(

View File

@ -5,7 +5,7 @@ from typing import Any
import requests
from django.conf import settings
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
logger = logging.getLogger(__name__)
@ -89,9 +89,14 @@ class BalldontlieClient:
if status >= 400:
body_preview = response.text[:240]
raise ProviderTransientError(
f"balldontlie client error status={status} path={path} body={body_preview}"
if status == 401:
raise ProviderUnauthorizedError(
provider="balldontlie",
path=path,
status_code=status,
detail=body_preview,
)
raise ProviderTransientError(f"balldontlie client error status={status} path={path} body={body_preview}")
try:
return response.json()
@ -109,20 +114,36 @@ class BalldontlieClient:
page_limit: int = 1,
) -> list[dict[str, Any]]:
page = 1
cursor = None
rows: list[dict[str, Any]] = []
query = dict(params or {})
while page <= page_limit:
query.update({"page": page, "per_page": per_page})
payload = self.get_json(path, params=query)
request_query = dict(query)
request_query["per_page"] = per_page
if cursor is not None:
request_query["cursor"] = cursor
else:
# Keep backwards compatibility for endpoints still supporting page-based pagination.
request_query["page"] = page
payload = self.get_json(path, params=request_query)
data = payload.get("data") or []
if isinstance(data, list):
rows.extend(data)
meta = payload.get("meta") or {}
next_cursor = meta.get("next_cursor")
if next_cursor:
cursor = next_cursor
page += 1
continue
next_page = meta.get("next_page")
if not next_page:
break
if next_page:
page = int(next_page)
continue
break
return rows

View File

@ -6,6 +6,17 @@ class ProviderTransientError(ProviderError):
"""Temporary provider failure that can be retried."""
class ProviderUnauthorizedError(ProviderError):
"""Raised when provider credentials are valid format but not authorized for an endpoint."""
def __init__(self, *, provider: str, path: str, status_code: int, detail: str = ""):
super().__init__(f"{provider} unauthorized status={status_code} path={path} detail={detail}")
self.provider = provider
self.path = path
self.status_code = status_code
self.detail = detail
class ProviderRateLimitError(ProviderTransientError):
"""Raised when provider rate limit is hit."""

View File

@ -171,12 +171,15 @@ PROVIDER_MVP_DATA_FILE = os.getenv(
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/v1")
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/nba/v1")
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_GAMES_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_GAMES_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
PROVIDER_BALLDONTLIE_SEASONS = [
int(value.strip())
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")

View File

@ -1,5 +1,6 @@
services:
web:
user: "10001:10001"
volumes:
- static_data:/app/staticfiles
- media_data:/app/media
@ -9,6 +10,7 @@ services:
DJANGO_DEBUG: "0"
celery_worker:
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
environment:
@ -16,6 +18,7 @@ services:
DJANGO_DEBUG: "0"
celery_beat:
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
environment:
@ -25,4 +28,3 @@ services:
tailwind:
profiles:
- dev

View File

@ -34,6 +34,7 @@ services:
redis:
condition: service_healthy
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- node_modules_data:/app/node_modules
@ -57,6 +58,7 @@ services:
env_file:
- .env
command: npm run dev
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- node_modules_data:/app/node_modules
@ -74,6 +76,7 @@ services:
redis:
condition: service_healthy
command: celery -A config worker -l info
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- runtime_data:/app/runtime
@ -97,6 +100,7 @@ services:
redis:
condition: service_healthy
command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- runtime_data:/app/runtime

View File

@ -6,7 +6,7 @@
"scripts": {
"build:vendor": "mkdir -p ./static/vendor && cp ./node_modules/htmx.org/dist/htmx.min.js ./static/vendor/htmx.min.js",
"build": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --minify",
"dev": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --watch"
"dev": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --watch=always"
},
"dependencies": {
"htmx.org": "^1.9.12"

View File

@ -9,7 +9,7 @@ import requests
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.clients.balldontlie import BalldontlieClient
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
from apps.providers.registry import get_default_provider_namespace, get_provider
from apps.providers.services.balldontlie_mappings import map_seasons
@ -28,8 +28,10 @@ class _FakeResponse:
class _FakeSession:
def __init__(self, responses: list[Any]):
self._responses = responses
self.calls: list[dict[str, Any]] = []
def get(self, *args, **kwargs):
self.calls.append(kwargs)
item = self._responses.pop(0)
if isinstance(item, Exception):
raise item
@ -69,6 +71,9 @@ class _FakeBalldontlieClient:
}
]
if path == "stats":
requested_ids = (params or {}).get("game_ids[]") or []
if requested_ids and 9902 not in requested_ids:
return []
return [
{
"pts": 20,
@ -83,7 +88,7 @@ class _FakeBalldontlieClient:
"min": "35:12",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
"game": {"id": 9901, "season": 2024},
},
{
"pts": 30,
@ -98,9 +103,14 @@ class _FakeBalldontlieClient:
"min": "33:00",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
"game": {"id": 9902, "season": 2024},
},
]
if path == "games":
return [
{"id": 9901, "season": 2024},
{"id": 9902, "season": 2024},
]
return []
@ -165,6 +175,30 @@ def test_balldontlie_map_seasons_marks_latest_as_current():
assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"]
@pytest.mark.django_db
def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings):
class _UnauthorizedStatsClient(_FakeBalldontlieClient):
def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1):
if path == "stats":
raise ProviderUnauthorizedError(
provider="balldontlie",
path="stats",
status_code=401,
detail="Unauthorized",
)
return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit)
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False
adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient())
payload = adapter.sync_all()
assert payload["players"]
assert payload["teams"]
assert payload["player_stats"] == []
assert payload["player_careers"] == []
@pytest.mark.django_db
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
@ -212,3 +246,26 @@ def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, set
with pytest.raises(ProviderRateLimitError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_cursor_pagination(settings):
session = _FakeSession(
responses=[
_FakeResponse(
status_code=200,
payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}},
),
_FakeResponse(
status_code=200,
payload={"data": [{"id": 2}], "meta": {"next_cursor": None}},
),
]
)
client = BalldontlieClient(session=session)
rows = client.list_paginated("players", per_page=1, page_limit=5)
assert rows == [{"id": 1}, {"id": 2}]
assert session.calls[0]["params"]["page"] == 1
assert "cursor" not in session.calls[0]["params"]
assert session.calls[1]["params"]["cursor"] == 101