from __future__ import annotations import json from datetime import date from pathlib import Path import pytest from django.core.management import call_command from apps.ingestion.extractors.bcl import BCLSnapshotExtractor from apps.ingestion.extractors.base import ExtractorNormalizationError from apps.ingestion.extractors.registry import create_extractor def _load_fixture(path: str) -> dict: fixture_path = Path(__file__).parent / "fixtures" / path return json.loads(fixture_path.read_text(encoding="utf-8")) @pytest.mark.django_db def test_bcl_extractor_normalizes_fixture_payload(tmp_path, settings): settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json" settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl" settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League" fixture_payload = _load_fixture("bcl/bcl_players_stats.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = BCLSnapshotExtractor(http_client=FakeClient()) output_path = tmp_path / "bcl.json" result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13)) assert result.extractor_name == "bcl" assert result.source_name == "bcl" assert result.records_count == 1 payload = json.loads(output_path.read_text(encoding="utf-8")) assert payload["source_name"] == "bcl" assert payload["snapshot_date"] == "2026-03-13" row = payload["records"][0] assert row["competition_external_id"] == "bcl" assert row["competition_name"] == "Basketball Champions League" assert row["team_external_id"] == "bcl-team-murcia" assert row["team_name"] == "UCAM Murcia" assert row["player_external_id"] == "bcl-player-42" assert row["full_name"] == "John Carter" assert row["minutes_per_game"] == 29.1 assert row["three_pt_pct"] == 37.2 @pytest.mark.django_db def test_bcl_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings): settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json" settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl" settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League" fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = BCLSnapshotExtractor(http_client=FakeClient()) output_path = tmp_path / "bcl-partial.json" result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13)) assert result.records_count == 1 payload = json.loads(output_path.read_text(encoding="utf-8")) row = payload["records"][0] assert row["full_name"] == "Alex Novak" assert row["first_name"] is None assert row["last_name"] is None assert row["birth_date"] is None assert row["nationality"] is None assert row["height_cm"] is None assert row["weight_kg"] is None assert row["position"] is None assert row["games_played"] == 10 @pytest.mark.django_db def test_bcl_extractor_still_fails_when_required_stats_are_missing(settings): settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json" settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl" settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League" fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json") fixture_payload["data"][0].pop("ppg") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = BCLSnapshotExtractor(http_client=FakeClient()) with pytest.raises(ExtractorNormalizationError): extractor.run(write_output=False, snapshot_date=date(2026, 3, 13)) @pytest.mark.django_db def test_bcl_extractor_registry_selection(settings): settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json" settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026" extractor = create_extractor("bcl") assert isinstance(extractor, BCLSnapshotExtractor) @pytest.mark.django_db def test_run_bcl_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch): settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json" settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl" settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League" fixture_payload = _load_fixture("bcl/bcl_players_stats.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload monkeypatch.setattr( "apps.ingestion.extractors.bcl.ResponsibleHttpClient", lambda **_kwargs: FakeClient(), ) call_command( "run_bcl_extractor", "--output-path", str(tmp_path), "--snapshot-date", "2026-03-13", ) files = list(tmp_path.glob("bcl-2026-03-13.json")) assert len(files) == 1 payload = json.loads(files[0].read_text(encoding="utf-8")) assert payload["source_name"] == "bcl" assert len(payload["records"]) == 1