from __future__ import annotations import json from datetime import date import pytest from django.core.management import call_command from apps.ingestion.extractors.base import BaseSnapshotExtractor from apps.ingestion.extractors.http import ResponsibleHttpClient from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor class DummyExtractor(BaseSnapshotExtractor): extractor_name = "dummy" source_name = "dummy_source" def fetch(self): return {"rows": [{"name": "Jane Doe"}]} def parse(self, payload): return payload["rows"] def normalize_record(self, source_record): return { "competition_external_id": "comp-1", "competition_name": "League One", "season": "2025-2026", "team_external_id": "team-1", "team_name": "Team One", "player_external_id": "player-1", "full_name": source_record["name"], "first_name": "Jane", "last_name": "Doe", "birth_date": "2000-01-01", "nationality": "US", "height_cm": 180, "weight_kg": 75, "position": "SG", "games_played": 10, "minutes_per_game": 30.0, "points_per_game": 15.0, "rebounds_per_game": 4.0, "assists_per_game": 3.0, "steals_per_game": 1.2, "blocks_per_game": 0.4, "turnovers_per_game": 2.0, "fg_pct": 45.0, "three_pt_pct": 35.0, "ft_pct": 82.0, } class _FakeResponse: def __init__(self, payload, status_code=200): self._payload = payload self.status_code = status_code def raise_for_status(self): if self.status_code >= 400: raise RuntimeError(f"status={self.status_code}") def json(self): return self._payload @pytest.mark.django_db def test_base_extractor_run_writes_snapshot_file(tmp_path, settings): settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming") extractor = DummyExtractor() result = extractor.run(snapshot_date=date(2026, 3, 13)) assert result.records_count == 1 assert result.source_name == "dummy_source" assert result.output_path is not None assert result.output_path.exists() payload = json.loads(result.output_path.read_text(encoding="utf-8")) assert payload["source_name"] == "dummy_source" assert payload["snapshot_date"] == "2026-03-13" assert payload["records"][0]["full_name"] == "Jane Doe" @pytest.mark.django_db def test_public_json_extractor_normalizes_common_field_aliases(tmp_path): class FakeClient: def get_json(self, *_args, **_kwargs): return { "records": [ { "competition_id": 99, "competition_name": "National League", "season": 2025, "team_id": 10, "team_name": "Blue Team", "player_id": 123, "player_name": "John Smith", "first_name": "John", "last_name": "Smith", "birth_date": "2001-05-12", "nationality": "US", "height_cm": 198, "weight_kg": 96, "position": "SF", "gp": 20, "mpg": 28.5, "ppg": 14.2, "rpg": 5.1, "apg": 3.2, "spg": 1.1, "bpg": 0.5, "tov": 1.9, "fg_pct": 47.3, "three_pct": 36.1, "ft_pct": 80.0, } ] } extractor = PublicJsonSnapshotExtractor( url="https://example.com/public-feed.json", source_name="test_public_feed", http_client=FakeClient(), ) output_file = tmp_path / "public.json" result = extractor.run(output_path=output_file, snapshot_date=date(2026, 3, 13)) assert result.records_count == 1 payload = json.loads(output_file.read_text(encoding="utf-8")) row = payload["records"][0] assert row["competition_external_id"] == "99" assert row["team_external_id"] == "10" assert row["player_external_id"] == "123" assert row["full_name"] == "John Smith" assert row["three_pt_pct"] == 36.1 @pytest.mark.django_db def test_run_extractor_management_command_writes_snapshot(tmp_path, settings): settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json" settings.EXTRACTOR_PUBLIC_SOURCE_NAME = "cmd_test_source" output_dir = tmp_path / "snapshots" class FakeClient: def get_json(self, *_args, **_kwargs): return { "records": [ { "competition_external_id": "comp-a", "competition_name": "Alpha League", "season": "2025-2026", "team_external_id": "team-a", "team_name": "Alpha Team", "player_external_id": "player-a", "full_name": "Alpha Player", "first_name": "Alpha", "last_name": "Player", "birth_date": "2000-04-01", "nationality": "US", "height_cm": 190, "weight_kg": 88, "position": "PG", "games_played": 12, "minutes_per_game": 31.0, "points_per_game": 17.0, "rebounds_per_game": 4.0, "assists_per_game": 6.0, "steals_per_game": 1.3, "blocks_per_game": 0.1, "turnovers_per_game": 2.4, "fg_pct": 44.0, "three_pt_pct": 37.0, "ft_pct": 79.0, } ] } monkeypatch = pytest.MonkeyPatch() monkeypatch.setattr( "apps.ingestion.extractors.public_json.ResponsibleHttpClient", lambda **_kwargs: FakeClient(), ) try: call_command( "run_extractor", "public_json_snapshot", "--output-path", str(output_dir), "--snapshot-date", "2026-03-13", ) finally: monkeypatch.undo() files = list(output_dir.glob("public_json_snapshot-2026-03-13.json")) assert len(files) == 1 payload = json.loads(files[0].read_text(encoding="utf-8")) assert payload["source_name"] == "cmd_test_source" assert payload["records"][0]["full_name"] == "Alpha Player" def test_http_client_retries_on_retryable_status(monkeypatch): class FakeSession: def __init__(self): self.calls = 0 def get(self, *_args, **_kwargs): self.calls += 1 if self.calls == 1: return _FakeResponse({"error": "busy"}, status_code=429) return _FakeResponse({"records": []}, status_code=200) client = ResponsibleHttpClient( user_agent="test-agent", timeout_seconds=5, retries=1, retry_sleep_seconds=0, request_delay_seconds=0, session=FakeSession(), ) payload = client.get_json("https://example.com/feed.json") assert payload == {"records": []}