Merge branch 'feature/v2-domain-pruning-legacy-models' into feature/hoopscout-v2-static-architecture

This commit is contained in:
Alfredo Di Stasio
2026-03-20 15:57:23 +01:00
15 changed files with 76 additions and 796 deletions

View File

@ -62,6 +62,12 @@ SCHEDULER_INTERVAL_SECONDS=900
# When scheduler is disabled but container is started, keep it idle (avoid restart loops) # When scheduler is disabled but container is started, keep it idle (avoid restart loops)
SCHEDULER_DISABLED_SLEEP_SECONDS=300 SCHEDULER_DISABLED_SLEEP_SECONDS=300
# Legacy provider-sync stack (v1-style) is disabled by default in v2.
LEGACY_PROVIDER_STACK_ENABLED=0
# Optional legacy provider settings (only when LEGACY_PROVIDER_STACK_ENABLED=1):
# PROVIDER_BACKEND=demo
# PROVIDER_DEFAULT_NAMESPACE=mvp_demo
# API safeguards (read-only API is optional) # API safeguards (read-only API is optional)
API_THROTTLE_ANON=100/hour API_THROTTLE_ANON=100/hour
API_THROTTLE_USER=1000/hour API_THROTTLE_USER=1000/hour

View File

@ -87,6 +87,7 @@ docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -
- Keep PostgreSQL as source of truth. - Keep PostgreSQL as source of truth.
- Keep snapshot storage file-based and volume-backed. - Keep snapshot storage file-based and volume-backed.
- Do not introduce MongoDB or Elasticsearch as source of truth. - Do not introduce MongoDB or Elasticsearch as source of truth.
- Keep legacy provider/Celery sync code isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
## Repository Bootstrap Commands ## Repository Bootstrap Commands

View File

@ -22,7 +22,8 @@ Runtime services are intentionally small:
- optional `scheduler` profile service (runs daily extractor/import loop) - optional `scheduler` profile service (runs daily extractor/import loop)
No Redis/Celery services are part of the v2 default runtime topology. No Redis/Celery services are part of the v2 default runtime topology.
Legacy Celery/provider code is still in repository history/codebase but de-emphasized for v2. Legacy Celery/provider code remains in-repo but is isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
Default v2 runtime keeps that stack disabled.
## Image Strategy ## Image Strategy
@ -96,6 +97,7 @@ Core groups:
- snapshot directory vars (`STATIC_DATASET_*`) - snapshot directory vars (`STATIC_DATASET_*`)
- optional future scheduler vars (`SCHEDULER_*`) - optional future scheduler vars (`SCHEDULER_*`)
- daily orchestration vars (`DAILY_ORCHESTRATION_*`) - daily orchestration vars (`DAILY_ORCHESTRATION_*`)
- optional legacy provider-sync toggle (`LEGACY_PROVIDER_STACK_ENABLED`)
## Snapshot Storage Convention ## Snapshot Storage Convention
@ -404,3 +406,7 @@ This v2 work branch is:
Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation. Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation.
They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks. They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks.
By default:
- `apps.providers` is not installed
- `/providers/` routes are not mounted
- legacy provider-specific settings are not required

View File

@ -1,4 +1,5 @@
from django.contrib import admin from django.contrib import admin
from django.conf import settings
from .models import ImportFile, ImportRun, IngestionError, IngestionRun from .models import ImportFile, ImportRun, IngestionError, IngestionRun
@ -91,15 +92,18 @@ class ImportFileAdmin(admin.ModelAdmin):
) )
@admin.register(IngestionRun)
class LegacyIngestionRunAdmin(admin.ModelAdmin): class LegacyIngestionRunAdmin(admin.ModelAdmin):
list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at") list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at")
list_filter = ("provider_namespace", "job_type", "status") list_filter = ("provider_namespace", "job_type", "status")
search_fields = ("provider_namespace", "error_summary") search_fields = ("provider_namespace", "error_summary")
@admin.register(IngestionError)
class LegacyIngestionErrorAdmin(admin.ModelAdmin): class LegacyIngestionErrorAdmin(admin.ModelAdmin):
list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at") list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at")
list_filter = ("severity", "provider_namespace") list_filter = ("severity", "provider_namespace")
search_fields = ("entity_type", "external_id", "message") search_fields = ("entity_type", "external_id", "message")
if settings.LEGACY_PROVIDER_STACK_ENABLED:
admin.site.register(IngestionRun, LegacyIngestionRunAdmin)
admin.site.register(IngestionError, LegacyIngestionErrorAdmin)

View File

@ -1,9 +1,14 @@
from django.conf import settings
from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
from .sync import run_sync_job
__all__ = [ __all__ = [
"start_ingestion_run", "start_ingestion_run",
"finish_ingestion_run", "finish_ingestion_run",
"log_ingestion_error", "log_ingestion_error",
"run_sync_job",
] ]
if settings.LEGACY_PROVIDER_STACK_ENABLED:
from .sync import run_sync_job # pragma: no cover - legacy provider stack only.
__all__.append("run_sync_job")

View File

@ -72,10 +72,14 @@ INSTALLED_APPS = [
"apps.teams", "apps.teams",
"apps.stats", "apps.stats",
"apps.scouting", "apps.scouting",
"apps.providers",
"apps.ingestion", "apps.ingestion",
] ]
# v2 default runtime is snapshot-first. Legacy provider stack is opt-in.
LEGACY_PROVIDER_STACK_ENABLED = env_bool("LEGACY_PROVIDER_STACK_ENABLED", False)
if LEGACY_PROVIDER_STACK_ENABLED:
INSTALLED_APPS.append("apps.providers")
MIDDLEWARE = [ MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware", "django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware", "django.contrib.sessions.middleware.SessionMiddleware",
@ -195,29 +199,30 @@ SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))
if SCHEDULER_INTERVAL_SECONDS < 30: if SCHEDULER_INTERVAL_SECONDS < 30:
raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.") raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.")
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower() if LEGACY_PROVIDER_STACK_ENABLED:
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo") PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie") PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip() PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
PROVIDER_MVP_DATA_FILE = os.getenv( PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
"PROVIDER_MVP_DATA_FILE", PROVIDER_MVP_DATA_FILE = os.getenv(
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"), "PROVIDER_MVP_DATA_FILE",
) str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) )
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io") PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100")) PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False) PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_SEASONS = [ PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
int(value.strip()) PROVIDER_BALLDONTLIE_SEASONS = [
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") int(value.strip())
if value.strip().isdigit() for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
] if value.strip().isdigit()
]
LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper() LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper()
LOG_SQL = env_bool("DJANGO_LOG_SQL", False) LOG_SQL = env_bool("DJANGO_LOG_SQL", False)

View File

@ -1,4 +1,5 @@
from django.contrib import admin from django.contrib import admin
from django.conf import settings
from django.urls import include, path from django.urls import include, path
urlpatterns = [ urlpatterns = [
@ -11,6 +12,8 @@ urlpatterns = [
path("teams/", include("apps.teams.urls")), path("teams/", include("apps.teams.urls")),
path("stats/", include("apps.stats.urls")), path("stats/", include("apps.stats.urls")),
path("scouting/", include("apps.scouting.urls")), path("scouting/", include("apps.scouting.urls")),
path("providers/", include("apps.providers.urls")),
path("ingestion/", include("apps.ingestion.urls")), path("ingestion/", include("apps.ingestion.urls")),
] ]
if settings.LEGACY_PROVIDER_STACK_ENABLED:
urlpatterns.append(path("providers/", include("apps.providers.urls")))

View File

@ -1,38 +0,0 @@
import os
import subprocess
import sys
import pytest
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
env = os.environ.copy()
env.update(env_overrides)
return subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
env=env,
check=False,
)
@pytest.mark.django_db
def test_invalid_cron_does_not_crash_config_import_path():
result = _run_python_import(
(
"import config; "
"from config.celery import app; "
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
),
{
"DJANGO_SETTINGS_MODULE": "config.settings.development",
"DJANGO_ENV": "development",
"DJANGO_DEBUG": "1",
"INGESTION_SCHEDULE_ENABLED": "1",
"INGESTION_SCHEDULE_CRON": "bad cron value",
},
)
assert result.returncode == 0
assert "beat_schedule_size=0" in result.stdout

View File

@ -1,251 +0,0 @@
import os
import pytest
from apps.competitions.models import Competition, Season
from apps.ingestion.models import IngestionError, IngestionRun
from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Nationality, Player
from apps.providers.exceptions import ProviderRateLimitError
from apps.providers.models import ExternalMapping
from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team
@pytest.mark.django_db
def test_run_full_sync_creates_domain_objects(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
assert run.status == IngestionRun.RunStatus.SUCCESS
assert Competition.objects.count() >= 1
assert Team.objects.count() >= 1
assert Season.objects.count() >= 1
assert Player.objects.count() >= 1
assert PlayerSeason.objects.count() >= 1
assert PlayerSeasonStats.objects.count() >= 1
assert Player.objects.filter(origin_competition__isnull=False).exists()
assert run.context.get("completed_steps") == [
"competitions",
"teams",
"seasons",
"players",
"player_stats",
"player_careers",
]
assert run.context.get("source_counts", {}).get("players", 0) >= 1
@pytest.mark.django_db
def test_full_sync_is_idempotent(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
counts_after_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
}
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
counts_after_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
}
assert counts_after_first == counts_after_second
@pytest.mark.django_db
def test_incremental_sync_runs_successfully(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(
provider_namespace="mvp_demo",
job_type=IngestionRun.JobType.INCREMENTAL,
cursor="demo-cursor",
)
assert run.status == IngestionRun.RunStatus.SUCCESS
assert run.records_processed > 0
assert run.started_at is not None
assert run.finished_at is not None
assert run.finished_at >= run.started_at
assert run.error_summary == ""
@pytest.mark.django_db
def test_run_sync_handles_rate_limit(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
with pytest.raises(ProviderRateLimitError):
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
run = IngestionRun.objects.order_by("-id").first()
assert run is not None
assert run.status == IngestionRun.RunStatus.FAILED
assert run.started_at is not None
assert run.finished_at is not None
assert "Rate limit" in run.error_summary
assert IngestionError.objects.filter(ingestion_run=run).exists()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
class StableProvider:
def sync_all(self):
return {
"competitions": [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": None,
"is_active": True,
}
],
"teams": [
{
"external_id": "team-14",
"name": "Los Angeles Lakers",
"short_name": "LAL",
"slug": "los-angeles-lakers",
"country": None,
"is_national_team": False,
}
],
"seasons": [
{
"external_id": "season-2024",
"label": "2024-2025",
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"is_current": False,
}
],
"players": [
{
"external_id": "player-237",
"first_name": "LeBron",
"last_name": "James",
"full_name": "LeBron James",
"birth_date": None,
"nationality": None,
"nominal_position": {"code": "SF", "name": "Small Forward"},
"inferred_role": {"code": "wing", "name": "Wing"},
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
}
],
"player_stats": [
{
"external_id": "ps-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"games_played": 2,
"games_started": 0,
"minutes_played": 68,
"points": 25,
"rebounds": 9,
"assists": 8,
"steals": 1.5,
"blocks": 0.5,
"turnovers": 3.5,
"fg_pct": 55.0,
"three_pct": 45.0,
"ft_pct": 95.0,
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
],
"player_careers": [
{
"external_id": "career-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"role_code": "",
"shirt_number": None,
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"notes": "Imported from balldontlie aggregated box scores",
}
],
}
def sync_incremental(self, *, cursor: str | None = None):
payload = self.sync_all()
payload["cursor"] = cursor
return payload
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
lebron = Player.objects.get(full_name="LeBron James")
assert lebron.nationality is None
assert not Nationality.objects.filter(iso2_code="ZZ").exists()
counts_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
assert counts_first == counts_second
@pytest.mark.django_db
def test_batch_transactions_preserve_prior_step_progress_on_failure(settings, monkeypatch):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
def boom(*args, **kwargs):
raise RuntimeError("teams-sync-failed")
monkeypatch.setattr("apps.ingestion.services.sync._sync_teams", boom)
with pytest.raises(RuntimeError):
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
run = IngestionRun.objects.order_by("-id").first()
assert run is not None
assert run.status == IngestionRun.RunStatus.FAILED
assert Competition.objects.exists()
assert Team.objects.count() == 0
assert run.context.get("completed_steps") == ["competitions"]
assert "Unhandled ingestion error" in run.error_summary

View File

@ -1,112 +0,0 @@
import pytest
from contextlib import contextmanager
from celery.schedules import crontab
import psycopg
from django.conf import settings
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.runs import _build_ingestion_lock_key, release_ingestion_lock, try_acquire_ingestion_lock
from apps.ingestion.tasks import scheduled_provider_sync, trigger_incremental_sync
from config.celery import app as celery_app, build_periodic_schedule
@pytest.mark.django_db
def test_periodic_task_registered():
assert "apps.ingestion.tasks.scheduled_provider_sync" in celery_app.tasks
@pytest.mark.django_db
def test_build_periodic_schedule_enabled(settings):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "15 * * * *"
schedule = build_periodic_schedule()
assert "ingestion.scheduled_provider_sync" in schedule
entry = schedule["ingestion.scheduled_provider_sync"]
assert entry["task"] == "apps.ingestion.tasks.scheduled_provider_sync"
assert isinstance(entry["schedule"], crontab)
assert entry["schedule"]._orig_minute == "15"
@pytest.mark.django_db
def test_build_periodic_schedule_disabled(settings):
settings.INGESTION_SCHEDULE_ENABLED = False
assert build_periodic_schedule() == {}
@pytest.mark.django_db
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
with caplog.at_level("ERROR"):
schedule = build_periodic_schedule()
assert schedule == {}
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
@pytest.mark.django_db
def test_trigger_incremental_sync_skips_when_advisory_lock_not_acquired(settings, monkeypatch):
settings.INGESTION_PREVENT_OVERLAP = True
@contextmanager
def fake_lock(**kwargs):
yield False
monkeypatch.setattr("apps.ingestion.tasks.ingestion_advisory_lock", fake_lock)
run_id = trigger_incremental_sync.apply(
kwargs={"provider_namespace": "mvp_demo"},
).get()
skipped_run = IngestionRun.objects.get(id=run_id)
assert skipped_run.status == IngestionRun.RunStatus.CANCELED
assert "advisory lock" in skipped_run.error_summary
@pytest.mark.django_db
def test_advisory_lock_prevents_concurrent_acquisition():
provider_namespace = "mvp_demo"
job_type = IngestionRun.JobType.INCREMENTAL
lock_key = _build_ingestion_lock_key(provider_namespace=provider_namespace, job_type=job_type)
conninfo = (
f"dbname={settings.DATABASES['default']['NAME']} "
f"user={settings.DATABASES['default']['USER']} "
f"password={settings.DATABASES['default']['PASSWORD']} "
f"host={settings.DATABASES['default']['HOST']} "
f"port={settings.DATABASES['default']['PORT']}"
)
with psycopg.connect(conninfo) as external_conn:
with external_conn.cursor() as cursor:
cursor.execute("SELECT pg_advisory_lock(%s);", [lock_key])
acquired, _ = try_acquire_ingestion_lock(
provider_namespace=provider_namespace,
job_type=job_type,
)
assert acquired is False
cursor.execute("SELECT pg_advisory_unlock(%s);", [lock_key])
acquired, django_key = try_acquire_ingestion_lock(
provider_namespace=provider_namespace,
job_type=job_type,
)
assert acquired is True
release_ingestion_lock(lock_key=django_key)
@pytest.mark.django_db
def test_scheduled_provider_sync_uses_configured_job_type(settings, monkeypatch):
settings.INGESTION_SCHEDULE_JOB_TYPE = IngestionRun.JobType.FULL_SYNC
settings.INGESTION_SCHEDULE_PROVIDER_NAMESPACE = "mvp_demo"
captured = {}
def fake_runner(**kwargs):
captured.update(kwargs)
return 99
monkeypatch.setattr("apps.ingestion.tasks._run_sync_with_overlap_guard", fake_runner)
result = scheduled_provider_sync.apply().get()
assert result == 99
assert captured["provider_namespace"] == "mvp_demo"
assert captured["job_type"] == IngestionRun.JobType.FULL_SYNC

View File

@ -4,8 +4,6 @@ import pytest
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.urls import reverse from django.urls import reverse
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Nationality, Player, Position, Role from apps.players.models import Nationality, Player, Position, Role
from apps.scouting.models import SavedSearch from apps.scouting.models import SavedSearch
@ -49,25 +47,3 @@ def test_saved_search_run_filters_player_results(client):
assert response.status_code == 200 assert response.status_code == 200
assert "Marco Rossi" in response.content.decode() assert "Marco Rossi" in response.content.decode()
assert "Luca Bianchi" not in response.content.decode() assert "Luca Bianchi" not in response.content.decode()
@pytest.mark.django_db
def test_ingestion_output_is_searchable_in_ui_and_api(settings, client):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
assert run.status == IngestionRun.RunStatus.SUCCESS
player = Player.objects.filter(origin_competition__isnull=False).order_by("id").first()
assert player is not None
assert player.origin_competition_id is not None
params = {"origin_competition": player.origin_competition_id}
ui_response = client.get(reverse("players:index"), data=params)
api_response = client.get(reverse("api:players"), data=params)
assert ui_response.status_code == 200
assert api_response.status_code == 200
ui_ids = {item.id for item in ui_response.context["players"]}
api_ids = {item["id"] for item in api_response.json()["results"]}
assert player.id in ui_ids
assert player.id in api_ids

View File

@ -4,7 +4,7 @@ import pytest
from django.urls import reverse from django.urls import reverse
from apps.competitions.models import Competition, Season from apps.competitions.models import Competition, Season
from apps.players.models import Nationality, Player, Position, Role from apps.players.models import Nationality, Player, PlayerAlias, Position, Role
from apps.stats.models import PlayerSeason, PlayerSeasonStats from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team from apps.teams.models import Team

View File

@ -1,77 +0,0 @@
import os
import pytest
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.exceptions import ProviderNotFoundError, ProviderRateLimitError
from apps.providers.registry import get_provider
@pytest.mark.django_db
def test_mvp_provider_fetch_and_search_players():
adapter = MvpDemoProviderAdapter()
players = adapter.fetch_players()
assert len(players) >= 2
results = adapter.search_players(query="luca")
assert any("Luca" in item["full_name"] for item in results)
detail = adapter.fetch_player(external_player_id="player-001")
assert detail is not None
assert detail["full_name"] == "Luca Rinaldi"
@pytest.mark.django_db
def test_mvp_provider_rate_limit_signal():
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
adapter = MvpDemoProviderAdapter()
with pytest.raises(ProviderRateLimitError):
adapter.fetch_players()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_provider_registry_resolution(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
provider = get_provider()
assert isinstance(provider, MvpDemoProviderAdapter)
with pytest.raises(ProviderNotFoundError):
get_provider("does-not-exist")
@pytest.mark.django_db
def test_demo_provider_sync_payload_uses_normalized_shape():
adapter = MvpDemoProviderAdapter()
payload = adapter.sync_all()
assert set(payload.keys()) == {
"players",
"competitions",
"teams",
"seasons",
"player_stats",
"player_careers",
"cursor",
}
assert payload["cursor"] is None
player = payload["players"][0]
assert set(player.keys()) == {
"external_id",
"first_name",
"last_name",
"full_name",
"birth_date",
"nationality",
"nominal_position",
"inferred_role",
"height_cm",
"weight_kg",
"dominant_hand",
"is_active",
"aliases",
}

View File

@ -1,263 +0,0 @@
from __future__ import annotations
import time
from typing import Any
import pytest
import requests
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.clients.balldontlie import BalldontlieClient
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
from apps.providers.registry import get_default_provider_namespace, get_provider
from apps.providers.services.balldontlie_mappings import map_seasons
class _FakeResponse:
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
self.status_code = status_code
self._payload = payload or {}
self.headers = headers or {}
self.text = text
def json(self):
return self._payload
class _FakeSession:
def __init__(self, responses: list[Any]):
self._responses = responses
self.calls: list[dict[str, Any]] = []
def get(self, *args, **kwargs):
self.calls.append(kwargs)
item = self._responses.pop(0)
if isinstance(item, Exception):
raise item
return item
class _FakeBalldontlieClient:
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
if path == "/nba/v1/teams":
return {
"data": [
{
"id": 14,
"full_name": "Los Angeles Lakers",
"abbreviation": "LAL",
}
]
}
return {"data": []}
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
if path == "/nba/v1/players":
return [
{
"id": 237,
"first_name": "LeBron",
"last_name": "James",
"position": "F",
"team": {"id": 14},
}
]
if path == "/nba/v1/stats":
return [
{
"pts": 20,
"reb": 8,
"ast": 7,
"stl": 1,
"blk": 1,
"turnover": 3,
"fg_pct": 0.5,
"fg3_pct": 0.4,
"ft_pct": 0.9,
"min": "35:12",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
{
"pts": 30,
"reb": 10,
"ast": 9,
"stl": 2,
"blk": 0,
"turnover": 4,
"fg_pct": 0.6,
"fg3_pct": 0.5,
"ft_pct": 1.0,
"min": "33:00",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
]
return []
@pytest.mark.django_db
def test_provider_registry_backend_selection(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = ""
settings.PROVIDER_BACKEND = "demo"
assert get_default_provider_namespace() == "mvp_demo"
assert isinstance(get_provider(), MvpDemoProviderAdapter)
settings.PROVIDER_BACKEND = "balldontlie"
assert get_default_provider_namespace() == "balldontlie"
assert isinstance(get_provider(), BalldontlieProviderAdapter)
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
assert get_default_provider_namespace() == "mvp_demo"
@pytest.mark.django_db
def test_balldontlie_adapter_maps_payloads(settings):
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
payload = adapter.sync_all()
assert payload["competitions"][0]["external_id"] == "competition-nba"
assert payload["teams"][0]["external_id"] == "team-14"
assert payload["players"][0]["external_id"] == "player-237"
assert payload["seasons"][0]["external_id"] == "season-2024"
assert payload["player_stats"][0]["games_played"] == 2
assert payload["player_stats"][0]["points"] == 25.0
assert payload["player_stats"][0]["fg_pct"] == 55.0
player = payload["players"][0]
assert player["nationality"] is None
assert "current_team_external_id" not in player
expected_keys = {
"external_id",
"first_name",
"last_name",
"full_name",
"birth_date",
"nationality",
"nominal_position",
"inferred_role",
"height_cm",
"weight_kg",
"dominant_hand",
"is_active",
"aliases",
}
assert set(player.keys()) == expected_keys
@pytest.mark.django_db
def test_balldontlie_map_seasons_marks_latest_as_current():
seasons = map_seasons([2022, 2024, 2023, 2024])
current_rows = [row for row in seasons if row["is_current"]]
assert len(current_rows) == 1
assert current_rows[0]["external_id"] == "season-2024"
assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"]
@pytest.mark.django_db
def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings):
class _UnauthorizedStatsClient(_FakeBalldontlieClient):
def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1):
if path == "/nba/v1/stats":
raise ProviderUnauthorizedError(
provider="balldontlie",
path="stats",
status_code=401,
detail="Unauthorized",
)
return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit)
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False
adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient())
payload = adapter.sync_all()
assert payload["players"]
assert payload["teams"]
assert payload["player_stats"] == []
assert payload["player_careers"] == []
@pytest.mark.django_db
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
_FakeResponse(status_code=200, payload={"data": []}),
]
)
client = BalldontlieClient(session=session)
payload = client.get_json("players")
assert payload == {"data": []}
@pytest.mark.django_db
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
client = BalldontlieClient(session=session)
with pytest.raises(ProviderTransientError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
]
)
client = BalldontlieClient(session=session)
with pytest.raises(ProviderRateLimitError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_cursor_pagination(settings):
session = _FakeSession(
responses=[
_FakeResponse(
status_code=200,
payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}},
),
_FakeResponse(
status_code=200,
payload={"data": [{"id": 2}], "meta": {"next_cursor": None}},
),
]
)
client = BalldontlieClient(session=session)
rows = client.list_paginated("players", per_page=1, page_limit=5)
assert rows == [{"id": 1}, {"id": 2}]
assert "page" not in session.calls[0]["params"]
assert "cursor" not in session.calls[0]["params"]
assert session.calls[1]["params"]["cursor"] == 101

View File

@ -0,0 +1,15 @@
import pytest
from django.conf import settings
@pytest.mark.django_db
def test_legacy_provider_stack_disabled_by_default():
assert settings.LEGACY_PROVIDER_STACK_ENABLED is False
assert "apps.providers" not in settings.INSTALLED_APPS
@pytest.mark.django_db
def test_providers_route_not_mounted_by_default(client):
response = client.get("/providers/")
assert response.status_code == 404