From c9dd10a43853d4b520eaac03b8b4ac9be438cd83 Mon Sep 17 00:00:00 2001 From: Alfredo Di Stasio Date: Thu, 12 Mar 2026 11:13:05 +0100 Subject: [PATCH] Improve balldontlie query flow and dev container write stability --- .env.example | 9 +- README.md | 10 ++ .../adapters/balldontlie_provider.py | 95 +++++++++++++------ apps/providers/clients/balldontlie.py | 39 ++++++-- apps/providers/exceptions.py | 11 +++ config/settings/base.py | 5 +- docker-compose.release.yml | 4 +- docker-compose.yml | 4 + package.json | 2 +- tests/test_provider_balldontlie.py | 63 +++++++++++- 10 files changed, 196 insertions(+), 46 deletions(-) diff --git a/.env.example b/.env.example index 8192d3f..7fd0a5f 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,9 @@ AUTO_APPLY_MIGRATIONS=1 AUTO_COLLECTSTATIC=1 AUTO_BUILD_TAILWIND=1 GUNICORN_WORKERS=3 +# Development container UID/GID for bind-mounted source write permissions. +LOCAL_UID=1000 +LOCAL_GID=1000 # Production-minded security toggles DJANGO_SECURE_SSL_REDIRECT=1 @@ -56,14 +59,18 @@ PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json PROVIDER_REQUEST_RETRIES=3 PROVIDER_REQUEST_RETRY_SLEEP=1 PROVIDER_HTTP_TIMEOUT_SECONDS=10 -PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/v1 +PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io/nba/v1 PROVIDER_BALLDONTLIE_API_KEY= # NBA-centric MVP provider seasons to ingest (comma-separated years). PROVIDER_BALLDONTLIE_SEASONS=2024 PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5 PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100 +PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT=5 +PROVIDER_BALLDONTLIE_GAMES_PER_PAGE=100 PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10 PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100 +# When 0, a 401 on stats endpoint degrades to players/teams-only sync. +PROVIDER_BALLDONTLIE_STATS_STRICT=0 CELERY_TASK_TIME_LIMIT=1800 CELERY_TASK_SOFT_TIME_LIMIT=1500 INGESTION_SCHEDULE_ENABLED=0 diff --git a/README.md b/README.md index 4fd2a4e..974f1f0 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ docker compose up --build ``` This starts the development-oriented topology (source bind mounts enabled). +In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs). 3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually: @@ -118,6 +119,7 @@ Notes: - In release-style mode, `web`, `celery_worker`, and `celery_beat` run from the built image filesystem. - `tailwind` is marked as `dev` profile in release override and is not started unless `--profile dev` is used. - `nginx`, `postgres`, and `redis` service naming remains unchanged. +- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`. ## Setup and Run Notes @@ -192,6 +194,12 @@ Build Tailwind once: docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build' ``` +If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once: + +```bash +sudo chown -R "$(id -u):$(id -g)" static +``` + Run Tailwind in watch mode during development: ```bash @@ -317,6 +325,8 @@ Provider backend is selected via environment variables: - `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP). +The adapter uses balldontlie getting-started query style (`/nba/v1`, cursor pagination, stats by `game_ids[]`). +Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized. Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md). diff --git a/apps/providers/adapters/balldontlie_provider.py b/apps/providers/adapters/balldontlie_provider.py index 1857b18..8dc3c2b 100644 --- a/apps/providers/adapters/balldontlie_provider.py +++ b/apps/providers/adapters/balldontlie_provider.py @@ -1,4 +1,5 @@ import logging +from itertools import islice from django.conf import settings @@ -13,6 +14,7 @@ from apps.providers.contracts import ( TeamPayload, ) from apps.providers.interfaces import BaseProviderAdapter +from apps.providers.exceptions import ProviderUnauthorizedError from apps.providers.services.balldontlie_mappings import ( map_competitions, map_player_stats, @@ -36,6 +38,66 @@ class BalldontlieProviderAdapter(BaseProviderAdapter): def configured_seasons(self) -> list[int]: return settings.PROVIDER_BALLDONTLIE_SEASONS + @staticmethod + def _chunked(values: list[int], size: int): + iterator = iter(values) + while True: + chunk = list(islice(iterator, size)) + if not chunk: + return + yield chunk + + def _fetch_game_ids(self) -> list[int]: + game_ids: set[int] = set() + for season in self.configured_seasons: + rows = self.client.list_paginated( + "games", + params={"seasons[]": season}, + per_page=settings.PROVIDER_BALLDONTLIE_GAMES_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT, + ) + for row in rows: + game_id = row.get("id") + if isinstance(game_id, int): + game_ids.add(game_id) + return sorted(game_ids) + + def _fetch_stats_rows(self) -> list[dict]: + game_ids = self._fetch_game_ids() + if not game_ids: + logger.info( + "provider_stats_skipped_no_games", + extra={"provider": self.namespace, "seasons": self.configured_seasons}, + ) + return [] + + all_rows: list[dict] = [] + try: + # Use game_ids[] query as documented in balldontlie getting-started flow. + for game_id_chunk in self._chunked(game_ids, 25): + rows = self.client.list_paginated( + "stats", + params={"game_ids[]": game_id_chunk}, + per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, + page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, + ) + all_rows.extend(rows) + except ProviderUnauthorizedError as exc: + if settings.PROVIDER_BALLDONTLIE_STATS_STRICT: + raise + logger.warning( + "provider_stats_unauthorized_degraded", + extra={ + "provider": self.namespace, + "path": exc.path, + "status_code": exc.status_code, + "detail": exc.detail, + }, + ) + return [] + + return all_rows + def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[PlayerPayload]: params = {"search": query} if query else None rows = self.client.list_paginated( @@ -78,30 +140,12 @@ class BalldontlieProviderAdapter(BaseProviderAdapter): return map_seasons(self.configured_seasons) def fetch_player_stats(self) -> list[PlayerStatsPayload]: - all_rows: list[dict] = [] - for season in self.configured_seasons: - rows = self.client.list_paginated( - "stats", - params={"seasons[]": season}, - per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, - page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, - ) - all_rows.extend(rows) - + all_rows = self._fetch_stats_rows() player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) return player_stats def fetch_player_careers(self) -> list[PlayerCareerPayload]: - all_rows: list[dict] = [] - for season in self.configured_seasons: - rows = self.client.list_paginated( - "stats", - params={"seasons[]": season}, - per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, - page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, - ) - all_rows.extend(rows) - + all_rows = self._fetch_stats_rows() _, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) return player_careers @@ -115,16 +159,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter): seasons = self.fetch_seasons() players = self.fetch_players() - all_rows: list[dict] = [] - for season in self.configured_seasons: - rows = self.client.list_paginated( - "stats", - params={"seasons[]": season}, - per_page=settings.PROVIDER_BALLDONTLIE_STATS_PER_PAGE, - page_limit=settings.PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT, - ) - all_rows.extend(rows) - + all_rows = self._fetch_stats_rows() player_stats, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons) logger.info( diff --git a/apps/providers/clients/balldontlie.py b/apps/providers/clients/balldontlie.py index 2318c77..bfc02c6 100644 --- a/apps/providers/clients/balldontlie.py +++ b/apps/providers/clients/balldontlie.py @@ -5,7 +5,7 @@ from typing import Any import requests from django.conf import settings -from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError +from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError logger = logging.getLogger(__name__) @@ -89,9 +89,14 @@ class BalldontlieClient: if status >= 400: body_preview = response.text[:240] - raise ProviderTransientError( - f"balldontlie client error status={status} path={path} body={body_preview}" - ) + if status == 401: + raise ProviderUnauthorizedError( + provider="balldontlie", + path=path, + status_code=status, + detail=body_preview, + ) + raise ProviderTransientError(f"balldontlie client error status={status} path={path} body={body_preview}") try: return response.json() @@ -109,20 +114,36 @@ class BalldontlieClient: page_limit: int = 1, ) -> list[dict[str, Any]]: page = 1 + cursor = None rows: list[dict[str, Any]] = [] query = dict(params or {}) while page <= page_limit: - query.update({"page": page, "per_page": per_page}) - payload = self.get_json(path, params=query) + request_query = dict(query) + request_query["per_page"] = per_page + if cursor is not None: + request_query["cursor"] = cursor + else: + # Keep backwards compatibility for endpoints still supporting page-based pagination. + request_query["page"] = page + + payload = self.get_json(path, params=request_query) data = payload.get("data") or [] if isinstance(data, list): rows.extend(data) meta = payload.get("meta") or {} + next_cursor = meta.get("next_cursor") + if next_cursor: + cursor = next_cursor + page += 1 + continue + next_page = meta.get("next_page") - if not next_page: - break - page = int(next_page) + if next_page: + page = int(next_page) + continue + + break return rows diff --git a/apps/providers/exceptions.py b/apps/providers/exceptions.py index b76a5fd..e08908e 100644 --- a/apps/providers/exceptions.py +++ b/apps/providers/exceptions.py @@ -6,6 +6,17 @@ class ProviderTransientError(ProviderError): """Temporary provider failure that can be retried.""" +class ProviderUnauthorizedError(ProviderError): + """Raised when provider credentials are valid format but not authorized for an endpoint.""" + + def __init__(self, *, provider: str, path: str, status_code: int, detail: str = ""): + super().__init__(f"{provider} unauthorized status={status_code} path={path} detail={detail}") + self.provider = provider + self.path = path + self.status_code = status_code + self.detail = detail + + class ProviderRateLimitError(ProviderTransientError): """Raised when provider rate limit is hit.""" diff --git a/config/settings/base.py b/config/settings/base.py index 036f299..a0f9e90 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -171,12 +171,15 @@ PROVIDER_MVP_DATA_FILE = os.getenv( PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) -PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/v1") +PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io/nba/v1") PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100")) +PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_GAMES_PAGE_LIMIT", "5")) +PROVIDER_BALLDONTLIE_GAMES_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_GAMES_PER_PAGE", "100")) +PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False) PROVIDER_BALLDONTLIE_SEASONS = [ int(value.strip()) for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") diff --git a/docker-compose.release.yml b/docker-compose.release.yml index 1223faa..b3e573a 100644 --- a/docker-compose.release.yml +++ b/docker-compose.release.yml @@ -1,5 +1,6 @@ services: web: + user: "10001:10001" volumes: - static_data:/app/staticfiles - media_data:/app/media @@ -9,6 +10,7 @@ services: DJANGO_DEBUG: "0" celery_worker: + user: "10001:10001" volumes: - runtime_data:/app/runtime environment: @@ -16,6 +18,7 @@ services: DJANGO_DEBUG: "0" celery_beat: + user: "10001:10001" volumes: - runtime_data:/app/runtime environment: @@ -25,4 +28,3 @@ services: tailwind: profiles: - dev - diff --git a/docker-compose.yml b/docker-compose.yml index 6e29e90..2c0e5b6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,6 +34,7 @@ services: redis: condition: service_healthy command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile - + user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" volumes: - .:/app - node_modules_data:/app/node_modules @@ -57,6 +58,7 @@ services: env_file: - .env command: npm run dev + user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" volumes: - .:/app - node_modules_data:/app/node_modules @@ -74,6 +76,7 @@ services: redis: condition: service_healthy command: celery -A config worker -l info + user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" volumes: - .:/app - runtime_data:/app/runtime @@ -97,6 +100,7 @@ services: redis: condition: service_healthy command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule + user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" volumes: - .:/app - runtime_data:/app/runtime diff --git a/package.json b/package.json index 952ca71..d9b1978 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "scripts": { "build:vendor": "mkdir -p ./static/vendor && cp ./node_modules/htmx.org/dist/htmx.min.js ./static/vendor/htmx.min.js", "build": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --minify", - "dev": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --watch" + "dev": "npm run build:vendor && tailwindcss -c tailwind.config.js -i ./static/src/tailwind.css -o ./static/css/main.css --watch=always" }, "dependencies": { "htmx.org": "^1.9.12" diff --git a/tests/test_provider_balldontlie.py b/tests/test_provider_balldontlie.py index 100b85f..bd40281 100644 --- a/tests/test_provider_balldontlie.py +++ b/tests/test_provider_balldontlie.py @@ -9,7 +9,7 @@ import requests from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter from apps.providers.clients.balldontlie import BalldontlieClient -from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError +from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError from apps.providers.registry import get_default_provider_namespace, get_provider from apps.providers.services.balldontlie_mappings import map_seasons @@ -28,8 +28,10 @@ class _FakeResponse: class _FakeSession: def __init__(self, responses: list[Any]): self._responses = responses + self.calls: list[dict[str, Any]] = [] def get(self, *args, **kwargs): + self.calls.append(kwargs) item = self._responses.pop(0) if isinstance(item, Exception): raise item @@ -69,6 +71,9 @@ class _FakeBalldontlieClient: } ] if path == "stats": + requested_ids = (params or {}).get("game_ids[]") or [] + if requested_ids and 9902 not in requested_ids: + return [] return [ { "pts": 20, @@ -83,7 +88,7 @@ class _FakeBalldontlieClient: "min": "35:12", "player": {"id": 237}, "team": {"id": 14}, - "game": {"season": 2024}, + "game": {"id": 9901, "season": 2024}, }, { "pts": 30, @@ -98,9 +103,14 @@ class _FakeBalldontlieClient: "min": "33:00", "player": {"id": 237}, "team": {"id": 14}, - "game": {"season": 2024}, + "game": {"id": 9902, "season": 2024}, }, ] + if path == "games": + return [ + {"id": 9901, "season": 2024}, + {"id": 9902, "season": 2024}, + ] return [] @@ -165,6 +175,30 @@ def test_balldontlie_map_seasons_marks_latest_as_current(): assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"] +@pytest.mark.django_db +def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings): + class _UnauthorizedStatsClient(_FakeBalldontlieClient): + def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1): + if path == "stats": + raise ProviderUnauthorizedError( + provider="balldontlie", + path="stats", + status_code=401, + detail="Unauthorized", + ) + return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit) + + settings.PROVIDER_BALLDONTLIE_SEASONS = [2024] + settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False + adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient()) + + payload = adapter.sync_all() + assert payload["players"] + assert payload["teams"] + assert payload["player_stats"] == [] + assert payload["player_careers"] == [] + + @pytest.mark.django_db def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings): monkeypatch.setattr(time, "sleep", lambda _: None) @@ -212,3 +246,26 @@ def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, set with pytest.raises(ProviderRateLimitError): client.get_json("players") + + +@pytest.mark.django_db +def test_balldontlie_client_cursor_pagination(settings): + session = _FakeSession( + responses=[ + _FakeResponse( + status_code=200, + payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}}, + ), + _FakeResponse( + status_code=200, + payload={"data": [{"id": 2}], "meta": {"next_cursor": None}}, + ), + ] + ) + client = BalldontlieClient(session=session) + rows = client.list_paginated("players", per_page=1, page_limit=5) + + assert rows == [{"id": 1}, {"id": 2}] + assert session.calls[0]["params"]["page"] == 1 + assert "cursor" not in session.calls[0]["params"] + assert session.calls[1]["params"]["cursor"] == 101