from __future__ import annotations import json from datetime import date from pathlib import Path import pytest from django.core.management import call_command from apps.ingestion.extractors.lba import LBASnapshotExtractor from apps.ingestion.extractors.base import ExtractorNormalizationError from apps.ingestion.extractors.registry import create_extractor def _load_fixture(path: str) -> dict: fixture_path = Path(__file__).parent / "fixtures" / path return json.loads(fixture_path.read_text(encoding="utf-8")) @pytest.mark.django_db def test_lba_extractor_normalizes_fixture_payload(tmp_path, settings): settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json" settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a" settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A" fixture_payload = _load_fixture("lba/lba_players_stats.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = LBASnapshotExtractor(http_client=FakeClient()) output_path = tmp_path / "lba.json" result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13)) assert result.extractor_name == "lba" assert result.source_name == "lba" assert result.records_count == 1 payload = json.loads(output_path.read_text(encoding="utf-8")) assert payload["source_name"] == "lba" assert payload["snapshot_date"] == "2026-03-13" row = payload["records"][0] assert row["competition_external_id"] == "lba-serie-a" assert row["competition_name"] == "Lega Basket Serie A" assert row["team_external_id"] == "team-virtus-bologna" assert row["team_name"] == "Virtus Bologna" assert row["player_external_id"] == "p-001" assert row["full_name"] == "Marco Rossi" assert row["minutes_per_game"] == 28.3 assert row["three_pt_pct"] == 36.5 @pytest.mark.django_db def test_lba_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings): settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json" settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a" settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A" fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = LBASnapshotExtractor(http_client=FakeClient()) output_path = tmp_path / "lba-partial.json" result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13)) assert result.records_count == 1 payload = json.loads(output_path.read_text(encoding="utf-8")) row = payload["records"][0] assert row["full_name"] == "Andrea Bianchi" assert row["first_name"] is None assert row["last_name"] is None assert row["birth_date"] is None assert row["nationality"] is None assert row["height_cm"] is None assert row["weight_kg"] is None assert row["position"] is None assert row["games_played"] == 18 @pytest.mark.django_db def test_lba_extractor_still_fails_when_required_stats_are_missing(settings): settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json" settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a" settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A" fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json") fixture_payload["data"][0].pop("ppg") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload extractor = LBASnapshotExtractor(http_client=FakeClient()) with pytest.raises(ExtractorNormalizationError): extractor.run(write_output=False, snapshot_date=date(2026, 3, 13)) @pytest.mark.django_db def test_lba_extractor_registry_selection(settings): settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json" settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026" extractor = create_extractor("lba") assert isinstance(extractor, LBASnapshotExtractor) @pytest.mark.django_db def test_run_lba_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch): settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json" settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026" settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a" settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A" fixture_payload = _load_fixture("lba/lba_players_stats.json") class FakeClient: def get_json(self, *_args, **_kwargs): return fixture_payload monkeypatch.setattr( "apps.ingestion.extractors.lba.ResponsibleHttpClient", lambda **_kwargs: FakeClient(), ) call_command( "run_lba_extractor", "--output-path", str(tmp_path), "--snapshot-date", "2026-03-13", ) files = list(tmp_path.glob("lba-2026-03-13.json")) assert len(files) == 1 payload = json.loads(files[0].read_text(encoding="utf-8")) assert payload["source_name"] == "lba" assert len(payload["records"]) == 1