from __future__ import annotations import json from datetime import date from pathlib import Path import pytest from django.core.management import call_command from apps.competitions.models import Competition, Season from apps.ingestion.models import ImportFile, ImportRun from apps.players.models import Player from apps.stats.models import PlayerSeason, PlayerSeasonStats from apps.teams.models import Team def _valid_payload() -> dict: return { "source_name": "official_site_feed", "snapshot_date": "2026-03-13", "records": [ { "competition_external_id": "comp-nba", "competition_name": "NBA", "season": "2025-2026", "team_external_id": "team-lal", "team_name": "Los Angeles Lakers", "player_external_id": "player-23", "full_name": "LeBron James", "first_name": "LeBron", "last_name": "James", "birth_date": "1984-12-30", "nationality": "US", "height_cm": 206, "weight_kg": 113, "position": "SF", "role": "Primary Creator", "games_played": 60, "minutes_per_game": 34.5, "points_per_game": 25.4, "rebounds_per_game": 7.2, "assists_per_game": 8.1, "steals_per_game": 1.3, "blocks_per_game": 0.7, "turnovers_per_game": 3.2, "fg_pct": 51.1, "three_pt_pct": 38.4, "ft_pct": 79.8, } ], } def _valid_payload_for_source(source_name: str, *, competition_name: str = "NBA", team_name: str = "Los Angeles Lakers") -> dict: payload = _valid_payload() payload["source_name"] = source_name payload["records"][0]["competition_name"] = competition_name payload["records"][0]["team_name"] = team_name return payload def _write_json(path: Path, payload: dict) -> None: path.write_text(json.dumps(payload), encoding="utf-8") @pytest.mark.django_db def test_valid_snapshot_import(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() file_path = incoming / "nba-2026-03-13.json" _write_json(file_path, payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") run = ImportRun.objects.get() assert run.status == ImportRun.RunStatus.SUCCESS assert run.files_processed == 1 assert run.rows_upserted == 1 import_file = ImportFile.objects.get(import_run=run) assert import_file.status == ImportFile.FileStatus.SUCCESS assert import_file.source_name == "official_site_feed" assert import_file.snapshot_date == date(2026, 3, 13) assert (archive / "nba-2026-03-13.json").exists() assert not (incoming / "nba-2026-03-13.json").exists() assert Competition.objects.filter(source_name="official_site_feed", source_uid="comp-nba").exists() assert Team.objects.filter(source_name="official_site_feed", source_uid="team-lal").exists() assert Player.objects.filter(source_name="official_site_feed", source_uid="player-23").exists() assert Season.objects.filter(source_uid="season:2025-2026").exists() assert PlayerSeason.objects.count() == 1 assert PlayerSeasonStats.objects.count() == 1 @pytest.mark.django_db def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"): payload["records"][0].pop(optional_field, None) file_path = incoming / "optional-missing.json" _write_json(file_path, payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") run = ImportRun.objects.get() assert run.status == ImportRun.RunStatus.SUCCESS player = Player.objects.get(source_uid="player-23") assert player.first_name == "LeBron" assert player.last_name == "James" assert player.birth_date is None assert player.nationality is None assert player.nominal_position is None assert player.height_cm is None assert player.weight_kg is None assert PlayerSeasonStats.objects.count() == 1 @pytest.mark.django_db @pytest.mark.parametrize( ("source_name", "competition_id", "competition_name"), [ ("lba", "lba-serie-a", "Lega Basket Serie A"), ("bcl", "bcl", "Basketball Champions League"), ], ) def test_partial_public_source_snapshot_imports_for_lba_and_bcl( tmp_path, settings, source_name, competition_id, competition_name, ): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() payload["source_name"] = source_name row = payload["records"][0] row["competition_external_id"] = competition_id row["competition_name"] = competition_name for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"): row.pop(optional_field, None) _write_json(incoming / f"{source_name}.json", payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") run = ImportRun.objects.get() assert run.status == ImportRun.RunStatus.SUCCESS assert Competition.objects.filter(source_uid=competition_id, name=competition_name).exists() assert Player.objects.filter(source_uid="player-23").exists() assert PlayerSeasonStats.objects.count() == 1 @pytest.mark.django_db def test_invalid_snapshot_rejected_and_moved_to_failed(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() del payload["records"][0]["points_per_game"] file_path = incoming / "broken.json" _write_json(file_path, payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") run = ImportRun.objects.get() assert run.status == ImportRun.RunStatus.FAILED import_file = ImportFile.objects.get(import_run=run) assert import_file.status == ImportFile.FileStatus.FAILED assert "missing required fields" in import_file.error_message assert (failed / "broken.json").exists() assert not (archive / "broken.json").exists() assert not Competition.objects.exists() @pytest.mark.django_db def test_idempotent_reimport_uses_checksum_and_skips_duplicate(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() _write_json(incoming / "first.json", payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") assert Competition.objects.count() == 1 assert Player.objects.count() == 1 # Re-drop same content with different filename. _write_json(incoming / "first-duplicate.json", payload) call_command("import_snapshots") assert Competition.objects.count() == 1 assert Player.objects.count() == 1 assert PlayerSeason.objects.count() == 1 duplicate_file = ImportFile.objects.filter(relative_path="first-duplicate.json").order_by("-id").first() assert duplicate_file is not None assert duplicate_file.status == ImportFile.FileStatus.SKIPPED assert duplicate_file.checksum assert "duplicate checksum" in duplicate_file.error_message.lower() assert (archive / "first-duplicate.json").exists() @pytest.mark.django_db def test_same_run_second_file_same_checksum_is_skipped(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload() _write_json(incoming / "a.json", payload) _write_json(incoming / "b.json", payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") files = {row.relative_path: row for row in ImportFile.objects.order_by("relative_path")} assert files["a.json"].status == ImportFile.FileStatus.SUCCESS assert files["b.json"].status == ImportFile.FileStatus.SKIPPED assert files["a.json"].checksum == files["b.json"].checksum @pytest.mark.django_db def test_same_raw_external_ids_from_different_sources_do_not_collide(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() lba_payload = _valid_payload_for_source("lba", competition_name="Lega Basket Serie A", team_name="Virtus Bologna") bcl_payload = _valid_payload_for_source("bcl", competition_name="Basketball Champions League", team_name="AEK Athens") _write_json(incoming / "lba.json", lba_payload) _write_json(incoming / "bcl.json", bcl_payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") assert Competition.objects.filter(source_uid="comp-nba").count() == 2 assert Team.objects.filter(source_uid="team-lal").count() == 2 assert Player.objects.filter(source_uid="player-23").count() == 2 assert Competition.objects.filter(source_name="lba", source_uid="comp-nba", name="Lega Basket Serie A").exists() assert Competition.objects.filter(source_name="bcl", source_uid="comp-nba", name="Basketball Champions League").exists() assert Team.objects.filter(source_name="lba", source_uid="team-lal", name="Virtus Bologna").exists() assert Team.objects.filter(source_name="bcl", source_uid="team-lal", name="AEK Athens").exists() @pytest.mark.django_db def test_reimport_same_source_payload_remains_idempotent(tmp_path, settings): incoming = tmp_path / "incoming" archive = tmp_path / "archive" failed = tmp_path / "failed" incoming.mkdir() archive.mkdir() failed.mkdir() payload = _valid_payload_for_source("lba") _write_json(incoming / "lba-1.json", payload) settings.STATIC_DATASET_INCOMING_DIR = str(incoming) settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) settings.STATIC_DATASET_FAILED_DIR = str(failed) call_command("import_snapshots") _write_json(incoming / "lba-2.json", payload) call_command("import_snapshots") assert Competition.objects.filter(source_name="lba", source_uid="comp-nba").count() == 1 assert Team.objects.filter(source_name="lba", source_uid="team-lal").count() == 1 assert Player.objects.filter(source_name="lba", source_uid="player-23").count() == 1