144 lines
5.3 KiB
Python
144 lines
5.3 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import date
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from django.core.management import call_command
|
|
|
|
from apps.ingestion.extractors.bcl import BCLSnapshotExtractor
|
|
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
|
from apps.ingestion.extractors.registry import create_extractor
|
|
|
|
|
|
def _load_fixture(path: str) -> dict:
|
|
fixture_path = Path(__file__).parent / "fixtures" / path
|
|
return json.loads(fixture_path.read_text(encoding="utf-8"))
|
|
|
|
|
|
@pytest.mark.django_db
|
|
def test_bcl_extractor_normalizes_fixture_payload(tmp_path, settings):
|
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
|
|
|
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
|
|
|
|
class FakeClient:
|
|
def get_json(self, *_args, **_kwargs):
|
|
return fixture_payload
|
|
|
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
|
output_path = tmp_path / "bcl.json"
|
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
|
|
|
assert result.extractor_name == "bcl"
|
|
assert result.source_name == "bcl"
|
|
assert result.records_count == 1
|
|
|
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
|
assert payload["source_name"] == "bcl"
|
|
assert payload["snapshot_date"] == "2026-03-13"
|
|
row = payload["records"][0]
|
|
assert row["competition_external_id"] == "bcl"
|
|
assert row["competition_name"] == "Basketball Champions League"
|
|
assert row["team_external_id"] == "bcl-team-murcia"
|
|
assert row["team_name"] == "UCAM Murcia"
|
|
assert row["player_external_id"] == "bcl-player-42"
|
|
assert row["full_name"] == "John Carter"
|
|
assert row["minutes_per_game"] == 29.1
|
|
assert row["three_pt_pct"] == 37.2
|
|
|
|
|
|
@pytest.mark.django_db
|
|
def test_bcl_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
|
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
|
|
|
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
|
|
|
|
class FakeClient:
|
|
def get_json(self, *_args, **_kwargs):
|
|
return fixture_payload
|
|
|
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
|
output_path = tmp_path / "bcl-partial.json"
|
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
|
|
|
assert result.records_count == 1
|
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
|
row = payload["records"][0]
|
|
assert row["full_name"] == "Alex Novak"
|
|
assert row["first_name"] is None
|
|
assert row["last_name"] is None
|
|
assert row["birth_date"] is None
|
|
assert row["nationality"] is None
|
|
assert row["height_cm"] is None
|
|
assert row["weight_kg"] is None
|
|
assert row["position"] is None
|
|
assert row["games_played"] == 10
|
|
|
|
|
|
@pytest.mark.django_db
|
|
def test_bcl_extractor_still_fails_when_required_stats_are_missing(settings):
|
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
|
|
|
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
|
|
fixture_payload["data"][0].pop("ppg")
|
|
|
|
class FakeClient:
|
|
def get_json(self, *_args, **_kwargs):
|
|
return fixture_payload
|
|
|
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
|
with pytest.raises(ExtractorNormalizationError):
|
|
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
|
|
|
|
|
@pytest.mark.django_db
|
|
def test_bcl_extractor_registry_selection(settings):
|
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
|
extractor = create_extractor("bcl")
|
|
assert isinstance(extractor, BCLSnapshotExtractor)
|
|
|
|
|
|
@pytest.mark.django_db
|
|
def test_run_bcl_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch):
|
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
|
|
|
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
|
|
|
|
class FakeClient:
|
|
def get_json(self, *_args, **_kwargs):
|
|
return fixture_payload
|
|
|
|
monkeypatch.setattr(
|
|
"apps.ingestion.extractors.bcl.ResponsibleHttpClient",
|
|
lambda **_kwargs: FakeClient(),
|
|
)
|
|
|
|
call_command(
|
|
"run_bcl_extractor",
|
|
"--output-path",
|
|
str(tmp_path),
|
|
"--snapshot-date",
|
|
"2026-03-13",
|
|
)
|
|
|
|
files = list(tmp_path.glob("bcl-2026-03-13.json"))
|
|
assert len(files) == 1
|
|
payload = json.loads(files[0].read_text(encoding="utf-8"))
|
|
assert payload["source_name"] == "bcl"
|
|
assert len(payload["records"]) == 1
|