Tighten provider normalization contract and fallback semantics

This commit is contained in:
Alfredo Di Stasio
2026-03-10 16:47:39 +01:00
parent 2252821daf
commit e0e75cfb0c
11 changed files with 340 additions and 59 deletions

View File

@ -3,6 +3,15 @@ import logging
from django.conf import settings
from apps.providers.clients import BalldontlieClient
from apps.providers.contracts import (
CompetitionPayload,
NormalizedSyncPayload,
PlayerCareerPayload,
PlayerPayload,
PlayerStatsPayload,
SeasonPayload,
TeamPayload,
)
from apps.providers.interfaces import BaseProviderAdapter
from apps.providers.services.balldontlie_mappings import (
map_competitions,
@ -27,7 +36,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
def configured_seasons(self) -> list[int]:
return settings.PROVIDER_BALLDONTLIE_SEASONS
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[PlayerPayload]:
params = {"search": query} if query else None
rows = self.client.list_paginated(
"players",
@ -38,7 +47,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
mapped = map_players(rows)
return mapped[offset : offset + limit]
def fetch_player(self, *, external_player_id: str) -> dict | None:
def fetch_player(self, *, external_player_id: str) -> PlayerPayload | None:
if not external_player_id.startswith("player-"):
return None
player_id = external_player_id.replace("player-", "", 1)
@ -49,7 +58,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
mapped = map_players([data])
return mapped[0] if mapped else None
def fetch_players(self) -> list[dict]:
def fetch_players(self) -> list[PlayerPayload]:
rows = self.client.list_paginated(
"players",
per_page=settings.PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE,
@ -57,18 +66,18 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
)
return map_players(rows)
def fetch_competitions(self) -> list[dict]:
def fetch_competitions(self) -> list[CompetitionPayload]:
return map_competitions()
def fetch_teams(self) -> list[dict]:
def fetch_teams(self) -> list[TeamPayload]:
payload = self.client.get_json("teams")
rows = payload.get("data") or []
return map_teams(rows if isinstance(rows, list) else [])
def fetch_seasons(self) -> list[dict]:
def fetch_seasons(self) -> list[SeasonPayload]:
return map_seasons(self.configured_seasons)
def fetch_player_stats(self) -> list[dict]:
def fetch_player_stats(self) -> list[PlayerStatsPayload]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
@ -82,7 +91,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
player_stats, _ = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_stats
def fetch_player_careers(self) -> list[dict]:
def fetch_player_careers(self) -> list[PlayerCareerPayload]:
all_rows: list[dict] = []
for season in self.configured_seasons:
rows = self.client.list_paginated(
@ -96,7 +105,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
_, player_careers = map_player_stats(all_rows, allowed_seasons=self.configured_seasons)
return player_careers
def sync_all(self) -> dict:
def sync_all(self) -> NormalizedSyncPayload:
logger.info(
"provider_sync_start",
extra={"provider": self.namespace, "seasons": self.configured_seasons},
@ -141,7 +150,7 @@ class BalldontlieProviderAdapter(BaseProviderAdapter):
"cursor": None,
}
def sync_incremental(self, *, cursor: str | None = None) -> dict:
def sync_incremental(self, *, cursor: str | None = None) -> NormalizedSyncPayload:
payload = self.sync_all()
payload["cursor"] = cursor
return payload

View File

@ -6,6 +6,15 @@ from pathlib import Path
from django.conf import settings
from apps.providers.contracts import (
CompetitionPayload,
NormalizedSyncPayload,
PlayerCareerPayload,
PlayerPayload,
PlayerStatsPayload,
SeasonPayload,
TeamPayload,
)
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.interfaces import BaseProviderAdapter
@ -50,38 +59,38 @@ class MvpDemoProviderAdapter(BaseProviderAdapter):
value = payload.get(key, [])
return value if isinstance(value, list) else []
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[PlayerPayload]:
players = self.fetch_players()
if query:
query_lower = query.lower()
players = [p for p in players if query_lower in p.get("full_name", "").lower()]
return players[offset : offset + limit]
def fetch_player(self, *, external_player_id: str) -> dict | None:
def fetch_player(self, *, external_player_id: str) -> PlayerPayload | None:
for payload in self.fetch_players():
if payload.get("external_id") == external_player_id:
return payload
return None
def fetch_players(self) -> list[dict]:
return self._payload_list("players")
def fetch_players(self) -> list[PlayerPayload]:
return self._payload_list("players") # type: ignore[return-value]
def fetch_competitions(self) -> list[dict]:
return self._payload_list("competitions")
def fetch_competitions(self) -> list[CompetitionPayload]:
return self._payload_list("competitions") # type: ignore[return-value]
def fetch_teams(self) -> list[dict]:
return self._payload_list("teams")
def fetch_teams(self) -> list[TeamPayload]:
return self._payload_list("teams") # type: ignore[return-value]
def fetch_seasons(self) -> list[dict]:
return self._payload_list("seasons")
def fetch_seasons(self) -> list[SeasonPayload]:
return self._payload_list("seasons") # type: ignore[return-value]
def fetch_player_stats(self) -> list[dict]:
return self._payload_list("player_stats")
def fetch_player_stats(self) -> list[PlayerStatsPayload]:
return self._payload_list("player_stats") # type: ignore[return-value]
def fetch_player_careers(self) -> list[dict]:
return self._payload_list("player_careers")
def fetch_player_careers(self) -> list[PlayerCareerPayload]:
return self._payload_list("player_careers") # type: ignore[return-value]
def sync_all(self) -> dict:
def sync_all(self) -> NormalizedSyncPayload:
return {
"players": self.fetch_players(),
"competitions": self.fetch_competitions(),
@ -92,7 +101,7 @@ class MvpDemoProviderAdapter(BaseProviderAdapter):
"cursor": None,
}
def sync_incremental(self, *, cursor: str | None = None) -> dict:
def sync_incremental(self, *, cursor: str | None = None) -> NormalizedSyncPayload:
payload = self.sync_all()
# MVP source has no change feed yet; returns full snapshot.
payload["cursor"] = cursor

109
apps/providers/contracts.py Normal file
View File

@ -0,0 +1,109 @@
from __future__ import annotations
from typing import NotRequired, TypedDict
class NationalityPayload(TypedDict):
name: str
iso2_code: str
iso3_code: NotRequired[str | None]
class PositionPayload(TypedDict):
code: str
name: str
class RolePayload(TypedDict):
code: str
name: str
class PlayerPayload(TypedDict):
external_id: str
first_name: str
last_name: str
full_name: str
birth_date: str | None
nationality: NationalityPayload | None
nominal_position: PositionPayload | None
inferred_role: RolePayload | None
height_cm: int | None
weight_kg: int | None
dominant_hand: str
is_active: bool
aliases: list[str]
class CompetitionPayload(TypedDict):
external_id: str
name: str
slug: str
competition_type: str
gender: str
level: int
country: NationalityPayload | None
is_active: bool
class TeamPayload(TypedDict):
external_id: str
name: str
short_name: str
slug: str
country: NationalityPayload | None
is_national_team: bool
class SeasonPayload(TypedDict):
external_id: str
label: str
start_date: str
end_date: str
is_current: bool
class PlayerStatsPayload(TypedDict):
external_id: str
player_external_id: str
team_external_id: str | None
competition_external_id: str | None
season_external_id: str
games_played: int
games_started: int
minutes_played: int
points: float
rebounds: float
assists: float
steals: float
blocks: float
turnovers: float
fg_pct: float | None
three_pct: float | None
ft_pct: float | None
usage_rate: float | None
true_shooting_pct: float | None
player_efficiency_rating: float | None
class PlayerCareerPayload(TypedDict):
external_id: str
player_external_id: str
team_external_id: str | None
competition_external_id: str | None
season_external_id: str | None
role_code: str
shirt_number: int | None
start_date: str | None
end_date: str | None
notes: str
class NormalizedSyncPayload(TypedDict):
players: list[PlayerPayload]
competitions: list[CompetitionPayload]
teams: list[TeamPayload]
seasons: list[SeasonPayload]
player_stats: list[PlayerStatsPayload]
player_careers: list[PlayerCareerPayload]
cursor: str | None

View File

@ -1,45 +1,63 @@
from abc import ABC, abstractmethod
from apps.providers.contracts import (
CompetitionPayload,
NormalizedSyncPayload,
PlayerCareerPayload,
PlayerPayload,
PlayerStatsPayload,
SeasonPayload,
TeamPayload,
)
class BaseProviderAdapter(ABC):
"""
Provider contract for normalized entity payloads consumed by ingestion services.
Adapters must return provider-agnostic entity dictionaries (see
``apps.providers.contracts``) and keep provider-specific response shapes
internal to the adapter/client/mapping layer.
"""
namespace: str
@abstractmethod
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[dict]:
def search_players(self, *, query: str = "", limit: int = 50, offset: int = 0) -> list[PlayerPayload]:
raise NotImplementedError
@abstractmethod
def fetch_player(self, *, external_player_id: str) -> dict | None:
def fetch_player(self, *, external_player_id: str) -> PlayerPayload | None:
raise NotImplementedError
@abstractmethod
def fetch_players(self) -> list[dict]:
def fetch_players(self) -> list[PlayerPayload]:
raise NotImplementedError
@abstractmethod
def fetch_competitions(self) -> list[dict]:
def fetch_competitions(self) -> list[CompetitionPayload]:
raise NotImplementedError
@abstractmethod
def fetch_teams(self) -> list[dict]:
def fetch_teams(self) -> list[TeamPayload]:
raise NotImplementedError
@abstractmethod
def fetch_seasons(self) -> list[dict]:
def fetch_seasons(self) -> list[SeasonPayload]:
raise NotImplementedError
@abstractmethod
def fetch_player_stats(self) -> list[dict]:
def fetch_player_stats(self) -> list[PlayerStatsPayload]:
raise NotImplementedError
@abstractmethod
def fetch_player_careers(self) -> list[dict]:
def fetch_player_careers(self) -> list[PlayerCareerPayload]:
raise NotImplementedError
@abstractmethod
def sync_all(self) -> dict:
def sync_all(self) -> NormalizedSyncPayload:
raise NotImplementedError
@abstractmethod
def sync_incremental(self, *, cursor: str | None = None) -> dict:
def sync_incremental(self, *, cursor: str | None = None) -> NormalizedSyncPayload:
raise NotImplementedError

View File

@ -6,11 +6,28 @@ from typing import Any
from django.utils.text import slugify
from apps.providers.contracts import (
CompetitionPayload,
PlayerCareerPayload,
PlayerPayload,
PlayerStatsPayload,
SeasonPayload,
TeamPayload,
)
def map_competitions() -> list[dict[str, Any]]:
NBA_COMPETITION_EXTERNAL_ID = "competition-nba"
def map_competitions() -> list[CompetitionPayload]:
"""
balldontlie assumptions:
- The API is NBA-focused, so competition is normalized as a single NBA league.
- Competition country is set to US (league home country), not player/team nationality.
"""
return [
{
"external_id": "competition-nba",
"external_id": NBA_COMPETITION_EXTERNAL_ID,
"name": "NBA",
"slug": "nba",
"competition_type": "league",
@ -22,8 +39,11 @@ def map_competitions() -> list[dict[str, Any]]:
]
def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
def map_teams(rows: list[dict[str, Any]]) -> list[TeamPayload]:
"""
Team country is unknown from balldontlie team payloads and stays null.
"""
mapped: list[TeamPayload] = []
for row in rows:
team_id = row.get("id")
if not team_id:
@ -36,7 +56,7 @@ def map_teams(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
"name": full_name,
"short_name": abbreviation,
"slug": slugify(full_name) or f"team-{team_id}",
"country": {"name": "United States", "iso2_code": "US", "iso3_code": "USA"},
"country": None,
"is_national_team": False,
}
)
@ -75,8 +95,12 @@ def _map_role(position: str | None) -> dict[str, str] | None:
return None
def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
def map_players(rows: list[dict[str, Any]]) -> list[PlayerPayload]:
"""
Player-level nationality/birth/physical details are not exposed by this provider's
players endpoint in the current MVP integration, so they are left null.
"""
mapped: list[PlayerPayload] = []
for row in rows:
player_id = row.get("id")
if not player_id:
@ -86,7 +110,6 @@ def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
last_name = row.get("last_name", "")
full_name = f"{first_name} {last_name}".strip() or f"Player {player_id}"
position_value = row.get("position")
team = row.get("team") or {}
mapped.append(
{
@ -95,7 +118,7 @@ def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
"last_name": last_name,
"full_name": full_name,
"birth_date": None,
"nationality": {"name": "Unknown", "iso2_code": "ZZ", "iso3_code": "ZZZ"},
"nationality": None,
"nominal_position": _map_position(position_value),
"inferred_role": _map_role(position_value),
"height_cm": None,
@ -103,22 +126,27 @@ def map_players(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
"current_team_external_id": f"team-{team['id']}" if team.get("id") else None,
}
)
return mapped
def map_seasons(seasons: list[int]) -> list[dict[str, Any]]:
mapped: list[dict[str, Any]] = []
for season in seasons:
def map_seasons(seasons: list[int]) -> list[SeasonPayload]:
"""
Current-season fallback:
- if configured seasons are supplied, the maximum season year is treated as current.
"""
normalized_seasons = sorted(set(seasons))
current = max(normalized_seasons) if normalized_seasons else None
mapped: list[SeasonPayload] = []
for season in normalized_seasons:
mapped.append(
{
"external_id": f"season-{season}",
"label": f"{season}-{season + 1}",
"start_date": date(season, 10, 1).isoformat(),
"end_date": date(season + 1, 6, 30).isoformat(),
"is_current": False,
"is_current": season == current,
}
)
return mapped
@ -159,7 +187,7 @@ def map_player_stats(
rows: list[dict[str, Any]],
*,
allowed_seasons: list[int],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
) -> tuple[list[PlayerStatsPayload], list[PlayerCareerPayload]]:
aggregates: dict[tuple[int, int, int], dict[str, Any]] = defaultdict(
lambda: {
"games": 0,
@ -213,8 +241,8 @@ def map_player_stats(
agg["ft_pct_sum"] += _to_float(row.get("ft_pct"))
agg["ft_pct_count"] += 1
player_stats: list[dict[str, Any]] = []
player_careers: list[dict[str, Any]] = []
player_stats: list[PlayerStatsPayload] = []
player_careers: list[PlayerCareerPayload] = []
for (season, player_id, team_id), agg in aggregates.items():
games = agg["games"] or 1
@ -223,7 +251,7 @@ def map_player_stats(
"external_id": f"ps-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"competition_external_id": NBA_COMPETITION_EXTERNAL_ID,
"season_external_id": f"season-{season}",
"games_played": agg["games"],
"games_started": 0,
@ -247,7 +275,7 @@ def map_player_stats(
"external_id": f"career-{season}-{player_id}-{team_id}",
"player_external_id": f"player-{player_id}",
"team_external_id": f"team-{team_id}",
"competition_external_id": "competition-nba",
"competition_external_id": NBA_COMPETITION_EXTERNAL_ID,
"season_external_id": f"season-{season}",
"role_code": "",
"shirt_number": None,