From b6b67539310462f33ab19901c8b4b7ac57022686 Mon Sep 17 00:00:00 2001 From: Alfredo Di Stasio Date: Fri, 20 Mar 2026 15:57:20 +0100 Subject: [PATCH] refactor(v2): isolate legacy provider stack and prune obsolete tests --- .env.example | 6 + CONTRIBUTING.md | 1 + README.md | 8 +- apps/ingestion/admin.py | 8 +- apps/ingestion/services/__init__.py | 9 +- config/settings/base.py | 53 +++--- config/urls.py | 5 +- tests/test_celery_schedule_safety.py | 38 ---- tests/test_ingestion_sync.py | 251 ------------------------- tests/test_ingestion_tasks.py | 112 ------------ tests/test_integration_paths.py | 24 --- tests/test_players_views.py | 2 +- tests/test_provider_adapter.py | 77 -------- tests/test_provider_balldontlie.py | 263 --------------------------- tests/test_v2_runtime_boundaries.py | 15 ++ 15 files changed, 76 insertions(+), 796 deletions(-) delete mode 100644 tests/test_celery_schedule_safety.py delete mode 100644 tests/test_ingestion_sync.py delete mode 100644 tests/test_ingestion_tasks.py delete mode 100644 tests/test_provider_adapter.py delete mode 100644 tests/test_provider_balldontlie.py create mode 100644 tests/test_v2_runtime_boundaries.py diff --git a/.env.example b/.env.example index 4537838..dc67835 100644 --- a/.env.example +++ b/.env.example @@ -62,6 +62,12 @@ SCHEDULER_INTERVAL_SECONDS=900 # When scheduler is disabled but container is started, keep it idle (avoid restart loops) SCHEDULER_DISABLED_SLEEP_SECONDS=300 +# Legacy provider-sync stack (v1-style) is disabled by default in v2. +LEGACY_PROVIDER_STACK_ENABLED=0 +# Optional legacy provider settings (only when LEGACY_PROVIDER_STACK_ENABLED=1): +# PROVIDER_BACKEND=demo +# PROVIDER_DEFAULT_NAMESPACE=mvp_demo + # API safeguards (read-only API is optional) API_THROTTLE_ANON=100/hour API_THROTTLE_USER=1000/hour diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d105e9f..59712cf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,6 +87,7 @@ docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh - - Keep PostgreSQL as source of truth. - Keep snapshot storage file-based and volume-backed. - Do not introduce MongoDB or Elasticsearch as source of truth. +- Keep legacy provider/Celery sync code isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`. ## Repository Bootstrap Commands diff --git a/README.md b/README.md index 901f852..20ab074 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ Runtime services are intentionally small: - optional `scheduler` profile service (runs daily extractor/import loop) No Redis/Celery services are part of the v2 default runtime topology. -Legacy Celery/provider code is still in repository history/codebase but de-emphasized for v2. +Legacy Celery/provider code remains in-repo but is isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`. +Default v2 runtime keeps that stack disabled. ## Image Strategy @@ -96,6 +97,7 @@ Core groups: - snapshot directory vars (`STATIC_DATASET_*`) - optional future scheduler vars (`SCHEDULER_*`) - daily orchestration vars (`DAILY_ORCHESTRATION_*`) +- optional legacy provider-sync toggle (`LEGACY_PROVIDER_STACK_ENABLED`) ## Snapshot Storage Convention @@ -404,3 +406,7 @@ This v2 work branch is: Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation. They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks. +By default: +- `apps.providers` is not installed +- `/providers/` routes are not mounted +- legacy provider-specific settings are not required diff --git a/apps/ingestion/admin.py b/apps/ingestion/admin.py index fece4c6..5f7f853 100644 --- a/apps/ingestion/admin.py +++ b/apps/ingestion/admin.py @@ -1,4 +1,5 @@ from django.contrib import admin +from django.conf import settings from .models import ImportFile, ImportRun, IngestionError, IngestionRun @@ -91,15 +92,18 @@ class ImportFileAdmin(admin.ModelAdmin): ) -@admin.register(IngestionRun) class LegacyIngestionRunAdmin(admin.ModelAdmin): list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at") list_filter = ("provider_namespace", "job_type", "status") search_fields = ("provider_namespace", "error_summary") -@admin.register(IngestionError) class LegacyIngestionErrorAdmin(admin.ModelAdmin): list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at") list_filter = ("severity", "provider_namespace") search_fields = ("entity_type", "external_id", "message") + + +if settings.LEGACY_PROVIDER_STACK_ENABLED: + admin.site.register(IngestionRun, LegacyIngestionRunAdmin) + admin.site.register(IngestionError, LegacyIngestionErrorAdmin) diff --git a/apps/ingestion/services/__init__.py b/apps/ingestion/services/__init__.py index 4327fb2..b5af128 100644 --- a/apps/ingestion/services/__init__.py +++ b/apps/ingestion/services/__init__.py @@ -1,9 +1,14 @@ +from django.conf import settings + from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run -from .sync import run_sync_job __all__ = [ "start_ingestion_run", "finish_ingestion_run", "log_ingestion_error", - "run_sync_job", ] + +if settings.LEGACY_PROVIDER_STACK_ENABLED: + from .sync import run_sync_job # pragma: no cover - legacy provider stack only. + + __all__.append("run_sync_job") diff --git a/config/settings/base.py b/config/settings/base.py index c11befe..da084d8 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -72,10 +72,14 @@ INSTALLED_APPS = [ "apps.teams", "apps.stats", "apps.scouting", - "apps.providers", "apps.ingestion", ] +# v2 default runtime is snapshot-first. Legacy provider stack is opt-in. +LEGACY_PROVIDER_STACK_ENABLED = env_bool("LEGACY_PROVIDER_STACK_ENABLED", False) +if LEGACY_PROVIDER_STACK_ENABLED: + INSTALLED_APPS.append("apps.providers") + MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", @@ -195,29 +199,30 @@ SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900")) if SCHEDULER_INTERVAL_SECONDS < 30: raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.") -PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower() -PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo") -PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie") -PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip() -PROVIDER_MVP_DATA_FILE = os.getenv( - "PROVIDER_MVP_DATA_FILE", - str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"), -) -PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) -PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) -PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) -PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io") -PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") -PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) -PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) -PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) -PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100")) -PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False) -PROVIDER_BALLDONTLIE_SEASONS = [ - int(value.strip()) - for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") - if value.strip().isdigit() -] +if LEGACY_PROVIDER_STACK_ENABLED: + PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower() + PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo") + PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie") + PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip() + PROVIDER_MVP_DATA_FILE = os.getenv( + "PROVIDER_MVP_DATA_FILE", + str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"), + ) + PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) + PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) + PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) + PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io") + PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") + PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) + PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) + PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) + PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100")) + PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False) + PROVIDER_BALLDONTLIE_SEASONS = [ + int(value.strip()) + for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") + if value.strip().isdigit() + ] LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper() LOG_SQL = env_bool("DJANGO_LOG_SQL", False) diff --git a/config/urls.py b/config/urls.py index 4526a0d..c98ac2c 100644 --- a/config/urls.py +++ b/config/urls.py @@ -1,4 +1,5 @@ from django.contrib import admin +from django.conf import settings from django.urls import include, path urlpatterns = [ @@ -11,6 +12,8 @@ urlpatterns = [ path("teams/", include("apps.teams.urls")), path("stats/", include("apps.stats.urls")), path("scouting/", include("apps.scouting.urls")), - path("providers/", include("apps.providers.urls")), path("ingestion/", include("apps.ingestion.urls")), ] + +if settings.LEGACY_PROVIDER_STACK_ENABLED: + urlpatterns.append(path("providers/", include("apps.providers.urls"))) diff --git a/tests/test_celery_schedule_safety.py b/tests/test_celery_schedule_safety.py deleted file mode 100644 index df7015f..0000000 --- a/tests/test_celery_schedule_safety.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import subprocess -import sys - -import pytest - - -def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess: - env = os.environ.copy() - env.update(env_overrides) - return subprocess.run( - [sys.executable, "-c", code], - capture_output=True, - text=True, - env=env, - check=False, - ) - - -@pytest.mark.django_db -def test_invalid_cron_does_not_crash_config_import_path(): - result = _run_python_import( - ( - "import config; " - "from config.celery import app; " - "print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')" - ), - { - "DJANGO_SETTINGS_MODULE": "config.settings.development", - "DJANGO_ENV": "development", - "DJANGO_DEBUG": "1", - "INGESTION_SCHEDULE_ENABLED": "1", - "INGESTION_SCHEDULE_CRON": "bad cron value", - }, - ) - - assert result.returncode == 0 - assert "beat_schedule_size=0" in result.stdout diff --git a/tests/test_ingestion_sync.py b/tests/test_ingestion_sync.py deleted file mode 100644 index 268c74a..0000000 --- a/tests/test_ingestion_sync.py +++ /dev/null @@ -1,251 +0,0 @@ -import os - -import pytest - -from apps.competitions.models import Competition, Season -from apps.ingestion.models import IngestionError, IngestionRun -from apps.ingestion.services.sync import run_sync_job -from apps.players.models import Nationality, Player -from apps.providers.exceptions import ProviderRateLimitError -from apps.providers.models import ExternalMapping -from apps.stats.models import PlayerSeason, PlayerSeasonStats -from apps.teams.models import Team - - -@pytest.mark.django_db -def test_run_full_sync_creates_domain_objects(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - - run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - - assert run.status == IngestionRun.RunStatus.SUCCESS - assert Competition.objects.count() >= 1 - assert Team.objects.count() >= 1 - assert Season.objects.count() >= 1 - assert Player.objects.count() >= 1 - assert PlayerSeason.objects.count() >= 1 - assert PlayerSeasonStats.objects.count() >= 1 - assert Player.objects.filter(origin_competition__isnull=False).exists() - assert run.context.get("completed_steps") == [ - "competitions", - "teams", - "seasons", - "players", - "player_stats", - "player_careers", - ] - assert run.context.get("source_counts", {}).get("players", 0) >= 1 - - -@pytest.mark.django_db -def test_full_sync_is_idempotent(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - - run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - counts_after_first = { - "competition": Competition.objects.count(), - "team": Team.objects.count(), - "season": Season.objects.count(), - "player": Player.objects.count(), - "player_season": PlayerSeason.objects.count(), - "player_stats": PlayerSeasonStats.objects.count(), - } - - run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - counts_after_second = { - "competition": Competition.objects.count(), - "team": Team.objects.count(), - "season": Season.objects.count(), - "player": Player.objects.count(), - "player_season": PlayerSeason.objects.count(), - "player_stats": PlayerSeasonStats.objects.count(), - } - - assert counts_after_first == counts_after_second - - -@pytest.mark.django_db -def test_incremental_sync_runs_successfully(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - - run = run_sync_job( - provider_namespace="mvp_demo", - job_type=IngestionRun.JobType.INCREMENTAL, - cursor="demo-cursor", - ) - - assert run.status == IngestionRun.RunStatus.SUCCESS - assert run.records_processed > 0 - assert run.started_at is not None - assert run.finished_at is not None - assert run.finished_at >= run.started_at - assert run.error_summary == "" - - -@pytest.mark.django_db -def test_run_sync_handles_rate_limit(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1" - - with pytest.raises(ProviderRateLimitError): - run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - - run = IngestionRun.objects.order_by("-id").first() - assert run is not None - assert run.status == IngestionRun.RunStatus.FAILED - assert run.started_at is not None - assert run.finished_at is not None - assert "Rate limit" in run.error_summary - assert IngestionError.objects.filter(ingestion_run=run).exists() - - os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None) - - -@pytest.mark.django_db -def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch): - class StableProvider: - def sync_all(self): - return { - "competitions": [ - { - "external_id": "competition-nba", - "name": "NBA", - "slug": "nba", - "competition_type": "league", - "gender": "men", - "level": 1, - "country": None, - "is_active": True, - } - ], - "teams": [ - { - "external_id": "team-14", - "name": "Los Angeles Lakers", - "short_name": "LAL", - "slug": "los-angeles-lakers", - "country": None, - "is_national_team": False, - } - ], - "seasons": [ - { - "external_id": "season-2024", - "label": "2024-2025", - "start_date": "2024-10-01", - "end_date": "2025-06-30", - "is_current": False, - } - ], - "players": [ - { - "external_id": "player-237", - "first_name": "LeBron", - "last_name": "James", - "full_name": "LeBron James", - "birth_date": None, - "nationality": None, - "nominal_position": {"code": "SF", "name": "Small Forward"}, - "inferred_role": {"code": "wing", "name": "Wing"}, - "height_cm": None, - "weight_kg": None, - "dominant_hand": "unknown", - "is_active": True, - "aliases": [], - } - ], - "player_stats": [ - { - "external_id": "ps-2024-237-14", - "player_external_id": "player-237", - "team_external_id": "team-14", - "competition_external_id": "competition-nba", - "season_external_id": "season-2024", - "games_played": 2, - "games_started": 0, - "minutes_played": 68, - "points": 25, - "rebounds": 9, - "assists": 8, - "steals": 1.5, - "blocks": 0.5, - "turnovers": 3.5, - "fg_pct": 55.0, - "three_pct": 45.0, - "ft_pct": 95.0, - "usage_rate": None, - "true_shooting_pct": None, - "player_efficiency_rating": None, - } - ], - "player_careers": [ - { - "external_id": "career-2024-237-14", - "player_external_id": "player-237", - "team_external_id": "team-14", - "competition_external_id": "competition-nba", - "season_external_id": "season-2024", - "role_code": "", - "shirt_number": None, - "start_date": "2024-10-01", - "end_date": "2025-06-30", - "notes": "Imported from balldontlie aggregated box scores", - } - ], - } - - def sync_incremental(self, *, cursor: str | None = None): - payload = self.sync_all() - payload["cursor"] = cursor - return payload - - monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider()) - - run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC) - lebron = Player.objects.get(full_name="LeBron James") - assert lebron.nationality is None - assert not Nationality.objects.filter(iso2_code="ZZ").exists() - - counts_first = { - "competition": Competition.objects.count(), - "team": Team.objects.count(), - "season": Season.objects.count(), - "player": Player.objects.count(), - "player_season": PlayerSeason.objects.count(), - "player_stats": PlayerSeasonStats.objects.count(), - "mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(), - } - - run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC) - counts_second = { - "competition": Competition.objects.count(), - "team": Team.objects.count(), - "season": Season.objects.count(), - "player": Player.objects.count(), - "player_season": PlayerSeason.objects.count(), - "player_stats": PlayerSeasonStats.objects.count(), - "mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(), - } - - assert counts_first == counts_second - - -@pytest.mark.django_db -def test_batch_transactions_preserve_prior_step_progress_on_failure(settings, monkeypatch): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - - def boom(*args, **kwargs): - raise RuntimeError("teams-sync-failed") - - monkeypatch.setattr("apps.ingestion.services.sync._sync_teams", boom) - - with pytest.raises(RuntimeError): - run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - - run = IngestionRun.objects.order_by("-id").first() - assert run is not None - assert run.status == IngestionRun.RunStatus.FAILED - assert Competition.objects.exists() - assert Team.objects.count() == 0 - assert run.context.get("completed_steps") == ["competitions"] - assert "Unhandled ingestion error" in run.error_summary diff --git a/tests/test_ingestion_tasks.py b/tests/test_ingestion_tasks.py deleted file mode 100644 index c53973f..0000000 --- a/tests/test_ingestion_tasks.py +++ /dev/null @@ -1,112 +0,0 @@ -import pytest -from contextlib import contextmanager -from celery.schedules import crontab -import psycopg -from django.conf import settings - -from apps.ingestion.models import IngestionRun -from apps.ingestion.services.runs import _build_ingestion_lock_key, release_ingestion_lock, try_acquire_ingestion_lock -from apps.ingestion.tasks import scheduled_provider_sync, trigger_incremental_sync -from config.celery import app as celery_app, build_periodic_schedule - - -@pytest.mark.django_db -def test_periodic_task_registered(): - assert "apps.ingestion.tasks.scheduled_provider_sync" in celery_app.tasks - - -@pytest.mark.django_db -def test_build_periodic_schedule_enabled(settings): - settings.INGESTION_SCHEDULE_ENABLED = True - settings.INGESTION_SCHEDULE_CRON = "15 * * * *" - - schedule = build_periodic_schedule() - assert "ingestion.scheduled_provider_sync" in schedule - entry = schedule["ingestion.scheduled_provider_sync"] - assert entry["task"] == "apps.ingestion.tasks.scheduled_provider_sync" - assert isinstance(entry["schedule"], crontab) - assert entry["schedule"]._orig_minute == "15" - - -@pytest.mark.django_db -def test_build_periodic_schedule_disabled(settings): - settings.INGESTION_SCHEDULE_ENABLED = False - assert build_periodic_schedule() == {} - - -@pytest.mark.django_db -def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog): - settings.INGESTION_SCHEDULE_ENABLED = True - settings.INGESTION_SCHEDULE_CRON = "invalid-cron" - - with caplog.at_level("ERROR"): - schedule = build_periodic_schedule() - - assert schedule == {} - assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages) - - -@pytest.mark.django_db -def test_trigger_incremental_sync_skips_when_advisory_lock_not_acquired(settings, monkeypatch): - settings.INGESTION_PREVENT_OVERLAP = True - - @contextmanager - def fake_lock(**kwargs): - yield False - - monkeypatch.setattr("apps.ingestion.tasks.ingestion_advisory_lock", fake_lock) - run_id = trigger_incremental_sync.apply( - kwargs={"provider_namespace": "mvp_demo"}, - ).get() - skipped_run = IngestionRun.objects.get(id=run_id) - assert skipped_run.status == IngestionRun.RunStatus.CANCELED - assert "advisory lock" in skipped_run.error_summary - - -@pytest.mark.django_db -def test_advisory_lock_prevents_concurrent_acquisition(): - provider_namespace = "mvp_demo" - job_type = IngestionRun.JobType.INCREMENTAL - lock_key = _build_ingestion_lock_key(provider_namespace=provider_namespace, job_type=job_type) - - conninfo = ( - f"dbname={settings.DATABASES['default']['NAME']} " - f"user={settings.DATABASES['default']['USER']} " - f"password={settings.DATABASES['default']['PASSWORD']} " - f"host={settings.DATABASES['default']['HOST']} " - f"port={settings.DATABASES['default']['PORT']}" - ) - with psycopg.connect(conninfo) as external_conn: - with external_conn.cursor() as cursor: - cursor.execute("SELECT pg_advisory_lock(%s);", [lock_key]) - acquired, _ = try_acquire_ingestion_lock( - provider_namespace=provider_namespace, - job_type=job_type, - ) - assert acquired is False - cursor.execute("SELECT pg_advisory_unlock(%s);", [lock_key]) - - acquired, django_key = try_acquire_ingestion_lock( - provider_namespace=provider_namespace, - job_type=job_type, - ) - assert acquired is True - release_ingestion_lock(lock_key=django_key) - - -@pytest.mark.django_db -def test_scheduled_provider_sync_uses_configured_job_type(settings, monkeypatch): - settings.INGESTION_SCHEDULE_JOB_TYPE = IngestionRun.JobType.FULL_SYNC - settings.INGESTION_SCHEDULE_PROVIDER_NAMESPACE = "mvp_demo" - captured = {} - - def fake_runner(**kwargs): - captured.update(kwargs) - return 99 - - monkeypatch.setattr("apps.ingestion.tasks._run_sync_with_overlap_guard", fake_runner) - - result = scheduled_provider_sync.apply().get() - assert result == 99 - assert captured["provider_namespace"] == "mvp_demo" - assert captured["job_type"] == IngestionRun.JobType.FULL_SYNC diff --git a/tests/test_integration_paths.py b/tests/test_integration_paths.py index a4f4599..742dd00 100644 --- a/tests/test_integration_paths.py +++ b/tests/test_integration_paths.py @@ -4,8 +4,6 @@ import pytest from django.contrib.auth.models import User from django.urls import reverse -from apps.ingestion.models import IngestionRun -from apps.ingestion.services.sync import run_sync_job from apps.players.models import Nationality, Player, Position, Role from apps.scouting.models import SavedSearch @@ -49,25 +47,3 @@ def test_saved_search_run_filters_player_results(client): assert response.status_code == 200 assert "Marco Rossi" in response.content.decode() assert "Luca Bianchi" not in response.content.decode() - - -@pytest.mark.django_db -def test_ingestion_output_is_searchable_in_ui_and_api(settings, client): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC) - assert run.status == IngestionRun.RunStatus.SUCCESS - - player = Player.objects.filter(origin_competition__isnull=False).order_by("id").first() - assert player is not None - assert player.origin_competition_id is not None - - params = {"origin_competition": player.origin_competition_id} - ui_response = client.get(reverse("players:index"), data=params) - api_response = client.get(reverse("api:players"), data=params) - - assert ui_response.status_code == 200 - assert api_response.status_code == 200 - ui_ids = {item.id for item in ui_response.context["players"]} - api_ids = {item["id"] for item in api_response.json()["results"]} - assert player.id in ui_ids - assert player.id in api_ids diff --git a/tests/test_players_views.py b/tests/test_players_views.py index 96ef4ce..f66bdaf 100644 --- a/tests/test_players_views.py +++ b/tests/test_players_views.py @@ -4,7 +4,7 @@ import pytest from django.urls import reverse from apps.competitions.models import Competition, Season -from apps.players.models import Nationality, Player, Position, Role +from apps.players.models import Nationality, Player, PlayerAlias, Position, Role from apps.stats.models import PlayerSeason, PlayerSeasonStats from apps.teams.models import Team diff --git a/tests/test_provider_adapter.py b/tests/test_provider_adapter.py deleted file mode 100644 index 95c11ce..0000000 --- a/tests/test_provider_adapter.py +++ /dev/null @@ -1,77 +0,0 @@ -import os - -import pytest - -from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter -from apps.providers.exceptions import ProviderNotFoundError, ProviderRateLimitError -from apps.providers.registry import get_provider - - -@pytest.mark.django_db -def test_mvp_provider_fetch_and_search_players(): - adapter = MvpDemoProviderAdapter() - - players = adapter.fetch_players() - assert len(players) >= 2 - - results = adapter.search_players(query="luca") - assert any("Luca" in item["full_name"] for item in results) - - detail = adapter.fetch_player(external_player_id="player-001") - assert detail is not None - assert detail["full_name"] == "Luca Rinaldi" - - -@pytest.mark.django_db -def test_mvp_provider_rate_limit_signal(): - os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1" - adapter = MvpDemoProviderAdapter() - - with pytest.raises(ProviderRateLimitError): - adapter.fetch_players() - - os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None) - - -@pytest.mark.django_db -def test_provider_registry_resolution(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - provider = get_provider() - assert isinstance(provider, MvpDemoProviderAdapter) - - with pytest.raises(ProviderNotFoundError): - get_provider("does-not-exist") - - -@pytest.mark.django_db -def test_demo_provider_sync_payload_uses_normalized_shape(): - adapter = MvpDemoProviderAdapter() - payload = adapter.sync_all() - - assert set(payload.keys()) == { - "players", - "competitions", - "teams", - "seasons", - "player_stats", - "player_careers", - "cursor", - } - assert payload["cursor"] is None - - player = payload["players"][0] - assert set(player.keys()) == { - "external_id", - "first_name", - "last_name", - "full_name", - "birth_date", - "nationality", - "nominal_position", - "inferred_role", - "height_cm", - "weight_kg", - "dominant_hand", - "is_active", - "aliases", - } diff --git a/tests/test_provider_balldontlie.py b/tests/test_provider_balldontlie.py deleted file mode 100644 index 8b13c76..0000000 --- a/tests/test_provider_balldontlie.py +++ /dev/null @@ -1,263 +0,0 @@ -from __future__ import annotations - -import time -from typing import Any - -import pytest -import requests - -from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter -from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter -from apps.providers.clients.balldontlie import BalldontlieClient -from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError -from apps.providers.registry import get_default_provider_namespace, get_provider -from apps.providers.services.balldontlie_mappings import map_seasons - - -class _FakeResponse: - def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""): - self.status_code = status_code - self._payload = payload or {} - self.headers = headers or {} - self.text = text - - def json(self): - return self._payload - - -class _FakeSession: - def __init__(self, responses: list[Any]): - self._responses = responses - self.calls: list[dict[str, Any]] = [] - - def get(self, *args, **kwargs): - self.calls.append(kwargs) - item = self._responses.pop(0) - if isinstance(item, Exception): - raise item - return item - - -class _FakeBalldontlieClient: - def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]: - if path == "/nba/v1/teams": - return { - "data": [ - { - "id": 14, - "full_name": "Los Angeles Lakers", - "abbreviation": "LAL", - } - ] - } - return {"data": []} - - def list_paginated( - self, - path: str, - *, - params: dict[str, Any] | None = None, - per_page: int = 100, - page_limit: int = 1, - ) -> list[dict[str, Any]]: - if path == "/nba/v1/players": - return [ - { - "id": 237, - "first_name": "LeBron", - "last_name": "James", - "position": "F", - "team": {"id": 14}, - } - ] - if path == "/nba/v1/stats": - return [ - { - "pts": 20, - "reb": 8, - "ast": 7, - "stl": 1, - "blk": 1, - "turnover": 3, - "fg_pct": 0.5, - "fg3_pct": 0.4, - "ft_pct": 0.9, - "min": "35:12", - "player": {"id": 237}, - "team": {"id": 14}, - "game": {"season": 2024}, - }, - { - "pts": 30, - "reb": 10, - "ast": 9, - "stl": 2, - "blk": 0, - "turnover": 4, - "fg_pct": 0.6, - "fg3_pct": 0.5, - "ft_pct": 1.0, - "min": "33:00", - "player": {"id": 237}, - "team": {"id": 14}, - "game": {"season": 2024}, - }, - ] - return [] - - -@pytest.mark.django_db -def test_provider_registry_backend_selection(settings): - settings.PROVIDER_DEFAULT_NAMESPACE = "" - settings.PROVIDER_BACKEND = "demo" - assert get_default_provider_namespace() == "mvp_demo" - assert isinstance(get_provider(), MvpDemoProviderAdapter) - - settings.PROVIDER_BACKEND = "balldontlie" - assert get_default_provider_namespace() == "balldontlie" - assert isinstance(get_provider(), BalldontlieProviderAdapter) - - settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo" - assert get_default_provider_namespace() == "mvp_demo" - - -@pytest.mark.django_db -def test_balldontlie_adapter_maps_payloads(settings): - settings.PROVIDER_BALLDONTLIE_SEASONS = [2024] - adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient()) - - payload = adapter.sync_all() - - assert payload["competitions"][0]["external_id"] == "competition-nba" - assert payload["teams"][0]["external_id"] == "team-14" - assert payload["players"][0]["external_id"] == "player-237" - assert payload["seasons"][0]["external_id"] == "season-2024" - assert payload["player_stats"][0]["games_played"] == 2 - assert payload["player_stats"][0]["points"] == 25.0 - assert payload["player_stats"][0]["fg_pct"] == 55.0 - - player = payload["players"][0] - assert player["nationality"] is None - assert "current_team_external_id" not in player - - expected_keys = { - "external_id", - "first_name", - "last_name", - "full_name", - "birth_date", - "nationality", - "nominal_position", - "inferred_role", - "height_cm", - "weight_kg", - "dominant_hand", - "is_active", - "aliases", - } - assert set(player.keys()) == expected_keys - - -@pytest.mark.django_db -def test_balldontlie_map_seasons_marks_latest_as_current(): - seasons = map_seasons([2022, 2024, 2023, 2024]) - current_rows = [row for row in seasons if row["is_current"]] - assert len(current_rows) == 1 - assert current_rows[0]["external_id"] == "season-2024" - assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"] - - -@pytest.mark.django_db -def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings): - class _UnauthorizedStatsClient(_FakeBalldontlieClient): - def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1): - if path == "/nba/v1/stats": - raise ProviderUnauthorizedError( - provider="balldontlie", - path="stats", - status_code=401, - detail="Unauthorized", - ) - return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit) - - settings.PROVIDER_BALLDONTLIE_SEASONS = [2024] - settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False - adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient()) - - payload = adapter.sync_all() - assert payload["players"] - assert payload["teams"] - assert payload["player_stats"] == [] - assert payload["player_careers"] == [] - - -@pytest.mark.django_db -def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings): - monkeypatch.setattr(time, "sleep", lambda _: None) - settings.PROVIDER_REQUEST_RETRIES = 2 - settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 - - session = _FakeSession( - responses=[ - _FakeResponse(status_code=429, headers={"Retry-After": "0"}), - _FakeResponse(status_code=200, payload={"data": []}), - ] - ) - client = BalldontlieClient(session=session) - - payload = client.get_json("players") - assert payload == {"data": []} - - -@pytest.mark.django_db -def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings): - monkeypatch.setattr(time, "sleep", lambda _: None) - settings.PROVIDER_REQUEST_RETRIES = 2 - settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 - - session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")]) - client = BalldontlieClient(session=session) - - with pytest.raises(ProviderTransientError): - client.get_json("players") - - -@pytest.mark.django_db -def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings): - monkeypatch.setattr(time, "sleep", lambda _: None) - settings.PROVIDER_REQUEST_RETRIES = 2 - settings.PROVIDER_REQUEST_RETRY_SLEEP = 0 - - session = _FakeSession( - responses=[ - _FakeResponse(status_code=429, headers={"Retry-After": "1"}), - _FakeResponse(status_code=429, headers={"Retry-After": "1"}), - ] - ) - client = BalldontlieClient(session=session) - - with pytest.raises(ProviderRateLimitError): - client.get_json("players") - - -@pytest.mark.django_db -def test_balldontlie_client_cursor_pagination(settings): - session = _FakeSession( - responses=[ - _FakeResponse( - status_code=200, - payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}}, - ), - _FakeResponse( - status_code=200, - payload={"data": [{"id": 2}], "meta": {"next_cursor": None}}, - ), - ] - ) - client = BalldontlieClient(session=session) - rows = client.list_paginated("players", per_page=1, page_limit=5) - - assert rows == [{"id": 1}, {"id": 2}] - assert "page" not in session.calls[0]["params"] - assert "cursor" not in session.calls[0]["params"] - assert session.calls[1]["params"]["cursor"] == 101 diff --git a/tests/test_v2_runtime_boundaries.py b/tests/test_v2_runtime_boundaries.py new file mode 100644 index 0000000..9d85aeb --- /dev/null +++ b/tests/test_v2_runtime_boundaries.py @@ -0,0 +1,15 @@ +import pytest +from django.conf import settings + + +@pytest.mark.django_db +def test_legacy_provider_stack_disabled_by_default(): + assert settings.LEGACY_PROVIDER_STACK_ENABLED is False + assert "apps.providers" not in settings.INSTALLED_APPS + + +@pytest.mark.django_db +def test_providers_route_not_mounted_by_default(client): + response = client.get("/providers/") + assert response.status_code == 404 +