refactor(v2): isolate legacy provider stack and prune obsolete tests
This commit is contained in:
@ -62,6 +62,12 @@ SCHEDULER_INTERVAL_SECONDS=900
|
||||
# When scheduler is disabled but container is started, keep it idle (avoid restart loops)
|
||||
SCHEDULER_DISABLED_SLEEP_SECONDS=300
|
||||
|
||||
# Legacy provider-sync stack (v1-style) is disabled by default in v2.
|
||||
LEGACY_PROVIDER_STACK_ENABLED=0
|
||||
# Optional legacy provider settings (only when LEGACY_PROVIDER_STACK_ENABLED=1):
|
||||
# PROVIDER_BACKEND=demo
|
||||
# PROVIDER_DEFAULT_NAMESPACE=mvp_demo
|
||||
|
||||
# API safeguards (read-only API is optional)
|
||||
API_THROTTLE_ANON=100/hour
|
||||
API_THROTTLE_USER=1000/hour
|
||||
|
||||
@ -87,6 +87,7 @@ docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -
|
||||
- Keep PostgreSQL as source of truth.
|
||||
- Keep snapshot storage file-based and volume-backed.
|
||||
- Do not introduce MongoDB or Elasticsearch as source of truth.
|
||||
- Keep legacy provider/Celery sync code isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
|
||||
|
||||
## Repository Bootstrap Commands
|
||||
|
||||
|
||||
@ -22,7 +22,8 @@ Runtime services are intentionally small:
|
||||
- optional `scheduler` profile service (runs daily extractor/import loop)
|
||||
|
||||
No Redis/Celery services are part of the v2 default runtime topology.
|
||||
Legacy Celery/provider code is still in repository history/codebase but de-emphasized for v2.
|
||||
Legacy Celery/provider code remains in-repo but is isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
|
||||
Default v2 runtime keeps that stack disabled.
|
||||
|
||||
## Image Strategy
|
||||
|
||||
@ -96,6 +97,7 @@ Core groups:
|
||||
- snapshot directory vars (`STATIC_DATASET_*`)
|
||||
- optional future scheduler vars (`SCHEDULER_*`)
|
||||
- daily orchestration vars (`DAILY_ORCHESTRATION_*`)
|
||||
- optional legacy provider-sync toggle (`LEGACY_PROVIDER_STACK_ENABLED`)
|
||||
|
||||
## Snapshot Storage Convention
|
||||
|
||||
@ -404,3 +406,7 @@ This v2 work branch is:
|
||||
|
||||
Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation.
|
||||
They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks.
|
||||
By default:
|
||||
- `apps.providers` is not installed
|
||||
- `/providers/` routes are not mounted
|
||||
- legacy provider-specific settings are not required
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from django.contrib import admin
|
||||
from django.conf import settings
|
||||
|
||||
from .models import ImportFile, ImportRun, IngestionError, IngestionRun
|
||||
|
||||
@ -91,15 +92,18 @@ class ImportFileAdmin(admin.ModelAdmin):
|
||||
)
|
||||
|
||||
|
||||
@admin.register(IngestionRun)
|
||||
class LegacyIngestionRunAdmin(admin.ModelAdmin):
|
||||
list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at")
|
||||
list_filter = ("provider_namespace", "job_type", "status")
|
||||
search_fields = ("provider_namespace", "error_summary")
|
||||
|
||||
|
||||
@admin.register(IngestionError)
|
||||
class LegacyIngestionErrorAdmin(admin.ModelAdmin):
|
||||
list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at")
|
||||
list_filter = ("severity", "provider_namespace")
|
||||
search_fields = ("entity_type", "external_id", "message")
|
||||
|
||||
|
||||
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||
admin.site.register(IngestionRun, LegacyIngestionRunAdmin)
|
||||
admin.site.register(IngestionError, LegacyIngestionErrorAdmin)
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
from django.conf import settings
|
||||
|
||||
from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
|
||||
from .sync import run_sync_job
|
||||
|
||||
__all__ = [
|
||||
"start_ingestion_run",
|
||||
"finish_ingestion_run",
|
||||
"log_ingestion_error",
|
||||
"run_sync_job",
|
||||
]
|
||||
|
||||
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||
from .sync import run_sync_job # pragma: no cover - legacy provider stack only.
|
||||
|
||||
__all__.append("run_sync_job")
|
||||
|
||||
@ -72,10 +72,14 @@ INSTALLED_APPS = [
|
||||
"apps.teams",
|
||||
"apps.stats",
|
||||
"apps.scouting",
|
||||
"apps.providers",
|
||||
"apps.ingestion",
|
||||
]
|
||||
|
||||
# v2 default runtime is snapshot-first. Legacy provider stack is opt-in.
|
||||
LEGACY_PROVIDER_STACK_ENABLED = env_bool("LEGACY_PROVIDER_STACK_ENABLED", False)
|
||||
if LEGACY_PROVIDER_STACK_ENABLED:
|
||||
INSTALLED_APPS.append("apps.providers")
|
||||
|
||||
MIDDLEWARE = [
|
||||
"django.middleware.security.SecurityMiddleware",
|
||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||
@ -195,29 +199,30 @@ SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))
|
||||
if SCHEDULER_INTERVAL_SECONDS < 30:
|
||||
raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.")
|
||||
|
||||
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
|
||||
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
|
||||
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
|
||||
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
|
||||
PROVIDER_MVP_DATA_FILE = os.getenv(
|
||||
if LEGACY_PROVIDER_STACK_ENABLED:
|
||||
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
|
||||
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
|
||||
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
|
||||
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
|
||||
PROVIDER_MVP_DATA_FILE = os.getenv(
|
||||
"PROVIDER_MVP_DATA_FILE",
|
||||
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
|
||||
)
|
||||
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
|
||||
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
|
||||
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
|
||||
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
|
||||
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
|
||||
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
|
||||
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
|
||||
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
|
||||
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
|
||||
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
|
||||
PROVIDER_BALLDONTLIE_SEASONS = [
|
||||
)
|
||||
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
|
||||
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
|
||||
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
|
||||
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
|
||||
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
|
||||
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
|
||||
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
|
||||
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
|
||||
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
|
||||
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
|
||||
PROVIDER_BALLDONTLIE_SEASONS = [
|
||||
int(value.strip())
|
||||
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
|
||||
if value.strip().isdigit()
|
||||
]
|
||||
]
|
||||
|
||||
LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper()
|
||||
LOG_SQL = env_bool("DJANGO_LOG_SQL", False)
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from django.contrib import admin
|
||||
from django.conf import settings
|
||||
from django.urls import include, path
|
||||
|
||||
urlpatterns = [
|
||||
@ -11,6 +12,8 @@ urlpatterns = [
|
||||
path("teams/", include("apps.teams.urls")),
|
||||
path("stats/", include("apps.stats.urls")),
|
||||
path("scouting/", include("apps.scouting.urls")),
|
||||
path("providers/", include("apps.providers.urls")),
|
||||
path("ingestion/", include("apps.ingestion.urls")),
|
||||
]
|
||||
|
||||
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||
urlpatterns.append(path("providers/", include("apps.providers.urls")))
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
|
||||
env = os.environ.copy()
|
||||
env.update(env_overrides)
|
||||
return subprocess.run(
|
||||
[sys.executable, "-c", code],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_invalid_cron_does_not_crash_config_import_path():
|
||||
result = _run_python_import(
|
||||
(
|
||||
"import config; "
|
||||
"from config.celery import app; "
|
||||
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
|
||||
),
|
||||
{
|
||||
"DJANGO_SETTINGS_MODULE": "config.settings.development",
|
||||
"DJANGO_ENV": "development",
|
||||
"DJANGO_DEBUG": "1",
|
||||
"INGESTION_SCHEDULE_ENABLED": "1",
|
||||
"INGESTION_SCHEDULE_CRON": "bad cron value",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "beat_schedule_size=0" in result.stdout
|
||||
@ -1,251 +0,0 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from apps.competitions.models import Competition, Season
|
||||
from apps.ingestion.models import IngestionError, IngestionRun
|
||||
from apps.ingestion.services.sync import run_sync_job
|
||||
from apps.players.models import Nationality, Player
|
||||
from apps.providers.exceptions import ProviderRateLimitError
|
||||
from apps.providers.models import ExternalMapping
|
||||
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
||||
from apps.teams.models import Team
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_full_sync_creates_domain_objects(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
|
||||
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
|
||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
||||
assert Competition.objects.count() >= 1
|
||||
assert Team.objects.count() >= 1
|
||||
assert Season.objects.count() >= 1
|
||||
assert Player.objects.count() >= 1
|
||||
assert PlayerSeason.objects.count() >= 1
|
||||
assert PlayerSeasonStats.objects.count() >= 1
|
||||
assert Player.objects.filter(origin_competition__isnull=False).exists()
|
||||
assert run.context.get("completed_steps") == [
|
||||
"competitions",
|
||||
"teams",
|
||||
"seasons",
|
||||
"players",
|
||||
"player_stats",
|
||||
"player_careers",
|
||||
]
|
||||
assert run.context.get("source_counts", {}).get("players", 0) >= 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_full_sync_is_idempotent(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
|
||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
counts_after_first = {
|
||||
"competition": Competition.objects.count(),
|
||||
"team": Team.objects.count(),
|
||||
"season": Season.objects.count(),
|
||||
"player": Player.objects.count(),
|
||||
"player_season": PlayerSeason.objects.count(),
|
||||
"player_stats": PlayerSeasonStats.objects.count(),
|
||||
}
|
||||
|
||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
counts_after_second = {
|
||||
"competition": Competition.objects.count(),
|
||||
"team": Team.objects.count(),
|
||||
"season": Season.objects.count(),
|
||||
"player": Player.objects.count(),
|
||||
"player_season": PlayerSeason.objects.count(),
|
||||
"player_stats": PlayerSeasonStats.objects.count(),
|
||||
}
|
||||
|
||||
assert counts_after_first == counts_after_second
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_incremental_sync_runs_successfully(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
|
||||
run = run_sync_job(
|
||||
provider_namespace="mvp_demo",
|
||||
job_type=IngestionRun.JobType.INCREMENTAL,
|
||||
cursor="demo-cursor",
|
||||
)
|
||||
|
||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
||||
assert run.records_processed > 0
|
||||
assert run.started_at is not None
|
||||
assert run.finished_at is not None
|
||||
assert run.finished_at >= run.started_at
|
||||
assert run.error_summary == ""
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_sync_handles_rate_limit(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
|
||||
|
||||
with pytest.raises(ProviderRateLimitError):
|
||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
|
||||
run = IngestionRun.objects.order_by("-id").first()
|
||||
assert run is not None
|
||||
assert run.status == IngestionRun.RunStatus.FAILED
|
||||
assert run.started_at is not None
|
||||
assert run.finished_at is not None
|
||||
assert "Rate limit" in run.error_summary
|
||||
assert IngestionError.objects.filter(ingestion_run=run).exists()
|
||||
|
||||
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
|
||||
class StableProvider:
|
||||
def sync_all(self):
|
||||
return {
|
||||
"competitions": [
|
||||
{
|
||||
"external_id": "competition-nba",
|
||||
"name": "NBA",
|
||||
"slug": "nba",
|
||||
"competition_type": "league",
|
||||
"gender": "men",
|
||||
"level": 1,
|
||||
"country": None,
|
||||
"is_active": True,
|
||||
}
|
||||
],
|
||||
"teams": [
|
||||
{
|
||||
"external_id": "team-14",
|
||||
"name": "Los Angeles Lakers",
|
||||
"short_name": "LAL",
|
||||
"slug": "los-angeles-lakers",
|
||||
"country": None,
|
||||
"is_national_team": False,
|
||||
}
|
||||
],
|
||||
"seasons": [
|
||||
{
|
||||
"external_id": "season-2024",
|
||||
"label": "2024-2025",
|
||||
"start_date": "2024-10-01",
|
||||
"end_date": "2025-06-30",
|
||||
"is_current": False,
|
||||
}
|
||||
],
|
||||
"players": [
|
||||
{
|
||||
"external_id": "player-237",
|
||||
"first_name": "LeBron",
|
||||
"last_name": "James",
|
||||
"full_name": "LeBron James",
|
||||
"birth_date": None,
|
||||
"nationality": None,
|
||||
"nominal_position": {"code": "SF", "name": "Small Forward"},
|
||||
"inferred_role": {"code": "wing", "name": "Wing"},
|
||||
"height_cm": None,
|
||||
"weight_kg": None,
|
||||
"dominant_hand": "unknown",
|
||||
"is_active": True,
|
||||
"aliases": [],
|
||||
}
|
||||
],
|
||||
"player_stats": [
|
||||
{
|
||||
"external_id": "ps-2024-237-14",
|
||||
"player_external_id": "player-237",
|
||||
"team_external_id": "team-14",
|
||||
"competition_external_id": "competition-nba",
|
||||
"season_external_id": "season-2024",
|
||||
"games_played": 2,
|
||||
"games_started": 0,
|
||||
"minutes_played": 68,
|
||||
"points": 25,
|
||||
"rebounds": 9,
|
||||
"assists": 8,
|
||||
"steals": 1.5,
|
||||
"blocks": 0.5,
|
||||
"turnovers": 3.5,
|
||||
"fg_pct": 55.0,
|
||||
"three_pct": 45.0,
|
||||
"ft_pct": 95.0,
|
||||
"usage_rate": None,
|
||||
"true_shooting_pct": None,
|
||||
"player_efficiency_rating": None,
|
||||
}
|
||||
],
|
||||
"player_careers": [
|
||||
{
|
||||
"external_id": "career-2024-237-14",
|
||||
"player_external_id": "player-237",
|
||||
"team_external_id": "team-14",
|
||||
"competition_external_id": "competition-nba",
|
||||
"season_external_id": "season-2024",
|
||||
"role_code": "",
|
||||
"shirt_number": None,
|
||||
"start_date": "2024-10-01",
|
||||
"end_date": "2025-06-30",
|
||||
"notes": "Imported from balldontlie aggregated box scores",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
def sync_incremental(self, *, cursor: str | None = None):
|
||||
payload = self.sync_all()
|
||||
payload["cursor"] = cursor
|
||||
return payload
|
||||
|
||||
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
|
||||
|
||||
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
lebron = Player.objects.get(full_name="LeBron James")
|
||||
assert lebron.nationality is None
|
||||
assert not Nationality.objects.filter(iso2_code="ZZ").exists()
|
||||
|
||||
counts_first = {
|
||||
"competition": Competition.objects.count(),
|
||||
"team": Team.objects.count(),
|
||||
"season": Season.objects.count(),
|
||||
"player": Player.objects.count(),
|
||||
"player_season": PlayerSeason.objects.count(),
|
||||
"player_stats": PlayerSeasonStats.objects.count(),
|
||||
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
|
||||
}
|
||||
|
||||
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
counts_second = {
|
||||
"competition": Competition.objects.count(),
|
||||
"team": Team.objects.count(),
|
||||
"season": Season.objects.count(),
|
||||
"player": Player.objects.count(),
|
||||
"player_season": PlayerSeason.objects.count(),
|
||||
"player_stats": PlayerSeasonStats.objects.count(),
|
||||
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
|
||||
}
|
||||
|
||||
assert counts_first == counts_second
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_batch_transactions_preserve_prior_step_progress_on_failure(settings, monkeypatch):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
raise RuntimeError("teams-sync-failed")
|
||||
|
||||
monkeypatch.setattr("apps.ingestion.services.sync._sync_teams", boom)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
|
||||
run = IngestionRun.objects.order_by("-id").first()
|
||||
assert run is not None
|
||||
assert run.status == IngestionRun.RunStatus.FAILED
|
||||
assert Competition.objects.exists()
|
||||
assert Team.objects.count() == 0
|
||||
assert run.context.get("completed_steps") == ["competitions"]
|
||||
assert "Unhandled ingestion error" in run.error_summary
|
||||
@ -1,112 +0,0 @@
|
||||
import pytest
|
||||
from contextlib import contextmanager
|
||||
from celery.schedules import crontab
|
||||
import psycopg
|
||||
from django.conf import settings
|
||||
|
||||
from apps.ingestion.models import IngestionRun
|
||||
from apps.ingestion.services.runs import _build_ingestion_lock_key, release_ingestion_lock, try_acquire_ingestion_lock
|
||||
from apps.ingestion.tasks import scheduled_provider_sync, trigger_incremental_sync
|
||||
from config.celery import app as celery_app, build_periodic_schedule
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_periodic_task_registered():
|
||||
assert "apps.ingestion.tasks.scheduled_provider_sync" in celery_app.tasks
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_build_periodic_schedule_enabled(settings):
|
||||
settings.INGESTION_SCHEDULE_ENABLED = True
|
||||
settings.INGESTION_SCHEDULE_CRON = "15 * * * *"
|
||||
|
||||
schedule = build_periodic_schedule()
|
||||
assert "ingestion.scheduled_provider_sync" in schedule
|
||||
entry = schedule["ingestion.scheduled_provider_sync"]
|
||||
assert entry["task"] == "apps.ingestion.tasks.scheduled_provider_sync"
|
||||
assert isinstance(entry["schedule"], crontab)
|
||||
assert entry["schedule"]._orig_minute == "15"
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_build_periodic_schedule_disabled(settings):
|
||||
settings.INGESTION_SCHEDULE_ENABLED = False
|
||||
assert build_periodic_schedule() == {}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
|
||||
settings.INGESTION_SCHEDULE_ENABLED = True
|
||||
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
|
||||
|
||||
with caplog.at_level("ERROR"):
|
||||
schedule = build_periodic_schedule()
|
||||
|
||||
assert schedule == {}
|
||||
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_trigger_incremental_sync_skips_when_advisory_lock_not_acquired(settings, monkeypatch):
|
||||
settings.INGESTION_PREVENT_OVERLAP = True
|
||||
|
||||
@contextmanager
|
||||
def fake_lock(**kwargs):
|
||||
yield False
|
||||
|
||||
monkeypatch.setattr("apps.ingestion.tasks.ingestion_advisory_lock", fake_lock)
|
||||
run_id = trigger_incremental_sync.apply(
|
||||
kwargs={"provider_namespace": "mvp_demo"},
|
||||
).get()
|
||||
skipped_run = IngestionRun.objects.get(id=run_id)
|
||||
assert skipped_run.status == IngestionRun.RunStatus.CANCELED
|
||||
assert "advisory lock" in skipped_run.error_summary
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_advisory_lock_prevents_concurrent_acquisition():
|
||||
provider_namespace = "mvp_demo"
|
||||
job_type = IngestionRun.JobType.INCREMENTAL
|
||||
lock_key = _build_ingestion_lock_key(provider_namespace=provider_namespace, job_type=job_type)
|
||||
|
||||
conninfo = (
|
||||
f"dbname={settings.DATABASES['default']['NAME']} "
|
||||
f"user={settings.DATABASES['default']['USER']} "
|
||||
f"password={settings.DATABASES['default']['PASSWORD']} "
|
||||
f"host={settings.DATABASES['default']['HOST']} "
|
||||
f"port={settings.DATABASES['default']['PORT']}"
|
||||
)
|
||||
with psycopg.connect(conninfo) as external_conn:
|
||||
with external_conn.cursor() as cursor:
|
||||
cursor.execute("SELECT pg_advisory_lock(%s);", [lock_key])
|
||||
acquired, _ = try_acquire_ingestion_lock(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
)
|
||||
assert acquired is False
|
||||
cursor.execute("SELECT pg_advisory_unlock(%s);", [lock_key])
|
||||
|
||||
acquired, django_key = try_acquire_ingestion_lock(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
)
|
||||
assert acquired is True
|
||||
release_ingestion_lock(lock_key=django_key)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_scheduled_provider_sync_uses_configured_job_type(settings, monkeypatch):
|
||||
settings.INGESTION_SCHEDULE_JOB_TYPE = IngestionRun.JobType.FULL_SYNC
|
||||
settings.INGESTION_SCHEDULE_PROVIDER_NAMESPACE = "mvp_demo"
|
||||
captured = {}
|
||||
|
||||
def fake_runner(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return 99
|
||||
|
||||
monkeypatch.setattr("apps.ingestion.tasks._run_sync_with_overlap_guard", fake_runner)
|
||||
|
||||
result = scheduled_provider_sync.apply().get()
|
||||
assert result == 99
|
||||
assert captured["provider_namespace"] == "mvp_demo"
|
||||
assert captured["job_type"] == IngestionRun.JobType.FULL_SYNC
|
||||
@ -4,8 +4,6 @@ import pytest
|
||||
from django.contrib.auth.models import User
|
||||
from django.urls import reverse
|
||||
|
||||
from apps.ingestion.models import IngestionRun
|
||||
from apps.ingestion.services.sync import run_sync_job
|
||||
from apps.players.models import Nationality, Player, Position, Role
|
||||
from apps.scouting.models import SavedSearch
|
||||
|
||||
@ -49,25 +47,3 @@ def test_saved_search_run_filters_player_results(client):
|
||||
assert response.status_code == 200
|
||||
assert "Marco Rossi" in response.content.decode()
|
||||
assert "Luca Bianchi" not in response.content.decode()
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_ingestion_output_is_searchable_in_ui_and_api(settings, client):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
||||
|
||||
player = Player.objects.filter(origin_competition__isnull=False).order_by("id").first()
|
||||
assert player is not None
|
||||
assert player.origin_competition_id is not None
|
||||
|
||||
params = {"origin_competition": player.origin_competition_id}
|
||||
ui_response = client.get(reverse("players:index"), data=params)
|
||||
api_response = client.get(reverse("api:players"), data=params)
|
||||
|
||||
assert ui_response.status_code == 200
|
||||
assert api_response.status_code == 200
|
||||
ui_ids = {item.id for item in ui_response.context["players"]}
|
||||
api_ids = {item["id"] for item in api_response.json()["results"]}
|
||||
assert player.id in ui_ids
|
||||
assert player.id in api_ids
|
||||
|
||||
@ -4,7 +4,7 @@ import pytest
|
||||
from django.urls import reverse
|
||||
|
||||
from apps.competitions.models import Competition, Season
|
||||
from apps.players.models import Nationality, Player, Position, Role
|
||||
from apps.players.models import Nationality, Player, PlayerAlias, Position, Role
|
||||
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
||||
from apps.teams.models import Team
|
||||
|
||||
|
||||
@ -1,77 +0,0 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
|
||||
from apps.providers.exceptions import ProviderNotFoundError, ProviderRateLimitError
|
||||
from apps.providers.registry import get_provider
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_mvp_provider_fetch_and_search_players():
|
||||
adapter = MvpDemoProviderAdapter()
|
||||
|
||||
players = adapter.fetch_players()
|
||||
assert len(players) >= 2
|
||||
|
||||
results = adapter.search_players(query="luca")
|
||||
assert any("Luca" in item["full_name"] for item in results)
|
||||
|
||||
detail = adapter.fetch_player(external_player_id="player-001")
|
||||
assert detail is not None
|
||||
assert detail["full_name"] == "Luca Rinaldi"
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_mvp_provider_rate_limit_signal():
|
||||
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
|
||||
adapter = MvpDemoProviderAdapter()
|
||||
|
||||
with pytest.raises(ProviderRateLimitError):
|
||||
adapter.fetch_players()
|
||||
|
||||
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_provider_registry_resolution(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
provider = get_provider()
|
||||
assert isinstance(provider, MvpDemoProviderAdapter)
|
||||
|
||||
with pytest.raises(ProviderNotFoundError):
|
||||
get_provider("does-not-exist")
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_demo_provider_sync_payload_uses_normalized_shape():
|
||||
adapter = MvpDemoProviderAdapter()
|
||||
payload = adapter.sync_all()
|
||||
|
||||
assert set(payload.keys()) == {
|
||||
"players",
|
||||
"competitions",
|
||||
"teams",
|
||||
"seasons",
|
||||
"player_stats",
|
||||
"player_careers",
|
||||
"cursor",
|
||||
}
|
||||
assert payload["cursor"] is None
|
||||
|
||||
player = payload["players"][0]
|
||||
assert set(player.keys()) == {
|
||||
"external_id",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"full_name",
|
||||
"birth_date",
|
||||
"nationality",
|
||||
"nominal_position",
|
||||
"inferred_role",
|
||||
"height_cm",
|
||||
"weight_kg",
|
||||
"dominant_hand",
|
||||
"is_active",
|
||||
"aliases",
|
||||
}
|
||||
@ -1,263 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
|
||||
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
|
||||
from apps.providers.clients.balldontlie import BalldontlieClient
|
||||
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
|
||||
from apps.providers.registry import get_default_provider_namespace, get_provider
|
||||
from apps.providers.services.balldontlie_mappings import map_seasons
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
|
||||
self.status_code = status_code
|
||||
self._payload = payload or {}
|
||||
self.headers = headers or {}
|
||||
self.text = text
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
def __init__(self, responses: list[Any]):
|
||||
self._responses = responses
|
||||
self.calls: list[dict[str, Any]] = []
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
self.calls.append(kwargs)
|
||||
item = self._responses.pop(0)
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
return item
|
||||
|
||||
|
||||
class _FakeBalldontlieClient:
|
||||
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
if path == "/nba/v1/teams":
|
||||
return {
|
||||
"data": [
|
||||
{
|
||||
"id": 14,
|
||||
"full_name": "Los Angeles Lakers",
|
||||
"abbreviation": "LAL",
|
||||
}
|
||||
]
|
||||
}
|
||||
return {"data": []}
|
||||
|
||||
def list_paginated(
|
||||
self,
|
||||
path: str,
|
||||
*,
|
||||
params: dict[str, Any] | None = None,
|
||||
per_page: int = 100,
|
||||
page_limit: int = 1,
|
||||
) -> list[dict[str, Any]]:
|
||||
if path == "/nba/v1/players":
|
||||
return [
|
||||
{
|
||||
"id": 237,
|
||||
"first_name": "LeBron",
|
||||
"last_name": "James",
|
||||
"position": "F",
|
||||
"team": {"id": 14},
|
||||
}
|
||||
]
|
||||
if path == "/nba/v1/stats":
|
||||
return [
|
||||
{
|
||||
"pts": 20,
|
||||
"reb": 8,
|
||||
"ast": 7,
|
||||
"stl": 1,
|
||||
"blk": 1,
|
||||
"turnover": 3,
|
||||
"fg_pct": 0.5,
|
||||
"fg3_pct": 0.4,
|
||||
"ft_pct": 0.9,
|
||||
"min": "35:12",
|
||||
"player": {"id": 237},
|
||||
"team": {"id": 14},
|
||||
"game": {"season": 2024},
|
||||
},
|
||||
{
|
||||
"pts": 30,
|
||||
"reb": 10,
|
||||
"ast": 9,
|
||||
"stl": 2,
|
||||
"blk": 0,
|
||||
"turnover": 4,
|
||||
"fg_pct": 0.6,
|
||||
"fg3_pct": 0.5,
|
||||
"ft_pct": 1.0,
|
||||
"min": "33:00",
|
||||
"player": {"id": 237},
|
||||
"team": {"id": 14},
|
||||
"game": {"season": 2024},
|
||||
},
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_provider_registry_backend_selection(settings):
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = ""
|
||||
settings.PROVIDER_BACKEND = "demo"
|
||||
assert get_default_provider_namespace() == "mvp_demo"
|
||||
assert isinstance(get_provider(), MvpDemoProviderAdapter)
|
||||
|
||||
settings.PROVIDER_BACKEND = "balldontlie"
|
||||
assert get_default_provider_namespace() == "balldontlie"
|
||||
assert isinstance(get_provider(), BalldontlieProviderAdapter)
|
||||
|
||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
||||
assert get_default_provider_namespace() == "mvp_demo"
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_adapter_maps_payloads(settings):
|
||||
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
|
||||
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
|
||||
|
||||
payload = adapter.sync_all()
|
||||
|
||||
assert payload["competitions"][0]["external_id"] == "competition-nba"
|
||||
assert payload["teams"][0]["external_id"] == "team-14"
|
||||
assert payload["players"][0]["external_id"] == "player-237"
|
||||
assert payload["seasons"][0]["external_id"] == "season-2024"
|
||||
assert payload["player_stats"][0]["games_played"] == 2
|
||||
assert payload["player_stats"][0]["points"] == 25.0
|
||||
assert payload["player_stats"][0]["fg_pct"] == 55.0
|
||||
|
||||
player = payload["players"][0]
|
||||
assert player["nationality"] is None
|
||||
assert "current_team_external_id" not in player
|
||||
|
||||
expected_keys = {
|
||||
"external_id",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"full_name",
|
||||
"birth_date",
|
||||
"nationality",
|
||||
"nominal_position",
|
||||
"inferred_role",
|
||||
"height_cm",
|
||||
"weight_kg",
|
||||
"dominant_hand",
|
||||
"is_active",
|
||||
"aliases",
|
||||
}
|
||||
assert set(player.keys()) == expected_keys
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_map_seasons_marks_latest_as_current():
|
||||
seasons = map_seasons([2022, 2024, 2023, 2024])
|
||||
current_rows = [row for row in seasons if row["is_current"]]
|
||||
assert len(current_rows) == 1
|
||||
assert current_rows[0]["external_id"] == "season-2024"
|
||||
assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"]
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings):
|
||||
class _UnauthorizedStatsClient(_FakeBalldontlieClient):
|
||||
def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1):
|
||||
if path == "/nba/v1/stats":
|
||||
raise ProviderUnauthorizedError(
|
||||
provider="balldontlie",
|
||||
path="stats",
|
||||
status_code=401,
|
||||
detail="Unauthorized",
|
||||
)
|
||||
return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit)
|
||||
|
||||
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
|
||||
settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False
|
||||
adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient())
|
||||
|
||||
payload = adapter.sync_all()
|
||||
assert payload["players"]
|
||||
assert payload["teams"]
|
||||
assert payload["player_stats"] == []
|
||||
assert payload["player_careers"] == []
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
|
||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
||||
|
||||
session = _FakeSession(
|
||||
responses=[
|
||||
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
|
||||
_FakeResponse(status_code=200, payload={"data": []}),
|
||||
]
|
||||
)
|
||||
client = BalldontlieClient(session=session)
|
||||
|
||||
payload = client.get_json("players")
|
||||
assert payload == {"data": []}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
|
||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
||||
|
||||
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
|
||||
client = BalldontlieClient(session=session)
|
||||
|
||||
with pytest.raises(ProviderTransientError):
|
||||
client.get_json("players")
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
|
||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
||||
|
||||
session = _FakeSession(
|
||||
responses=[
|
||||
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
|
||||
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
|
||||
]
|
||||
)
|
||||
client = BalldontlieClient(session=session)
|
||||
|
||||
with pytest.raises(ProviderRateLimitError):
|
||||
client.get_json("players")
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_balldontlie_client_cursor_pagination(settings):
|
||||
session = _FakeSession(
|
||||
responses=[
|
||||
_FakeResponse(
|
||||
status_code=200,
|
||||
payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}},
|
||||
),
|
||||
_FakeResponse(
|
||||
status_code=200,
|
||||
payload={"data": [{"id": 2}], "meta": {"next_cursor": None}},
|
||||
),
|
||||
]
|
||||
)
|
||||
client = BalldontlieClient(session=session)
|
||||
rows = client.list_paginated("players", per_page=1, page_limit=5)
|
||||
|
||||
assert rows == [{"id": 1}, {"id": 2}]
|
||||
assert "page" not in session.calls[0]["params"]
|
||||
assert "cursor" not in session.calls[0]["params"]
|
||||
assert session.calls[1]["params"]["cursor"] == 101
|
||||
15
tests/test_v2_runtime_boundaries.py
Normal file
15
tests/test_v2_runtime_boundaries.py
Normal file
@ -0,0 +1,15 @@
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_legacy_provider_stack_disabled_by_default():
|
||||
assert settings.LEGACY_PROVIDER_STACK_ENABLED is False
|
||||
assert "apps.providers" not in settings.INSTALLED_APPS
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_providers_route_not_mounted_by_default(client):
|
||||
response = client.get("/providers/")
|
||||
assert response.status_code == 404
|
||||
|
||||
Reference in New Issue
Block a user