fix(v2-ingestion): harden LBA/BCL snapshot contract for public data
This commit is contained in:
25
tests/fixtures/bcl/bcl_players_stats_partial_public.json
vendored
Normal file
25
tests/fixtures/bcl/bcl_players_stats_partial_public.json
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"player": {
|
||||
"id": "bcl-player-99",
|
||||
"name": "Alex Novak"
|
||||
},
|
||||
"team": {
|
||||
"id": "bcl-team-tenerife",
|
||||
"name": "Lenovo Tenerife"
|
||||
},
|
||||
"gp": 10,
|
||||
"mpg": 27.2,
|
||||
"ppg": 14.8,
|
||||
"rpg": 4.1,
|
||||
"apg": 3.3,
|
||||
"spg": 1.2,
|
||||
"bpg": 0.4,
|
||||
"tov": 2.0,
|
||||
"fg_pct": 47.3,
|
||||
"three_pct": 38.0,
|
||||
"ft_pct": 79.1
|
||||
}
|
||||
]
|
||||
}
|
||||
25
tests/fixtures/lba/lba_players_stats_partial_public.json
vendored
Normal file
25
tests/fixtures/lba/lba_players_stats_partial_public.json
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"player": {
|
||||
"id": "p-002",
|
||||
"name": "Andrea Bianchi"
|
||||
},
|
||||
"team": {
|
||||
"id": "team-olimpia-milano",
|
||||
"name": "Olimpia Milano"
|
||||
},
|
||||
"gp": 18,
|
||||
"mpg": 24.7,
|
||||
"ppg": 12.3,
|
||||
"rpg": 2.9,
|
||||
"apg": 4.2,
|
||||
"spg": 1.1,
|
||||
"bpg": 0.1,
|
||||
"tov": 1.8,
|
||||
"fg_pct": 45.0,
|
||||
"three_pct": 35.4,
|
||||
"ft_pct": 82.7
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -51,6 +51,37 @@ def test_bcl_extractor_normalizes_fixture_payload(tmp_path, settings):
|
||||
assert row["three_pt_pct"] == 37.2
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bcl_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
|
||||
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
||||
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
||||
|
||||
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return fixture_payload
|
||||
|
||||
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
||||
output_path = tmp_path / "bcl-partial.json"
|
||||
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
assert result.records_count == 1
|
||||
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||
row = payload["records"][0]
|
||||
assert row["full_name"] == "Alex Novak"
|
||||
assert row["first_name"] is None
|
||||
assert row["last_name"] is None
|
||||
assert row["birth_date"] is None
|
||||
assert row["nationality"] is None
|
||||
assert row["height_cm"] is None
|
||||
assert row["weight_kg"] is None
|
||||
assert row["position"] is None
|
||||
assert row["games_played"] == 10
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_bcl_extractor_registry_selection(settings):
|
||||
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||
|
||||
@ -103,6 +103,86 @@ def test_valid_snapshot_import(tmp_path, settings):
|
||||
assert PlayerSeasonStats.objects.count() == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing(tmp_path, settings):
|
||||
incoming = tmp_path / "incoming"
|
||||
archive = tmp_path / "archive"
|
||||
failed = tmp_path / "failed"
|
||||
incoming.mkdir()
|
||||
archive.mkdir()
|
||||
failed.mkdir()
|
||||
|
||||
payload = _valid_payload()
|
||||
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
|
||||
payload["records"][0].pop(optional_field, None)
|
||||
|
||||
file_path = incoming / "optional-missing.json"
|
||||
_write_json(file_path, payload)
|
||||
|
||||
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||
|
||||
call_command("import_snapshots")
|
||||
|
||||
run = ImportRun.objects.get()
|
||||
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||
player = Player.objects.get(source_uid="player-23")
|
||||
assert player.first_name == "LeBron"
|
||||
assert player.last_name == "James"
|
||||
assert player.birth_date is None
|
||||
assert player.nationality is None
|
||||
assert player.nominal_position is None
|
||||
assert player.height_cm is None
|
||||
assert player.weight_kg is None
|
||||
assert PlayerSeasonStats.objects.count() == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
("source_name", "competition_id", "competition_name"),
|
||||
[
|
||||
("lba", "lba-serie-a", "Lega Basket Serie A"),
|
||||
("bcl", "bcl", "Basketball Champions League"),
|
||||
],
|
||||
)
|
||||
def test_partial_public_source_snapshot_imports_for_lba_and_bcl(
|
||||
tmp_path,
|
||||
settings,
|
||||
source_name,
|
||||
competition_id,
|
||||
competition_name,
|
||||
):
|
||||
incoming = tmp_path / "incoming"
|
||||
archive = tmp_path / "archive"
|
||||
failed = tmp_path / "failed"
|
||||
incoming.mkdir()
|
||||
archive.mkdir()
|
||||
failed.mkdir()
|
||||
|
||||
payload = _valid_payload()
|
||||
payload["source_name"] = source_name
|
||||
row = payload["records"][0]
|
||||
row["competition_external_id"] = competition_id
|
||||
row["competition_name"] = competition_name
|
||||
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
|
||||
row.pop(optional_field, None)
|
||||
|
||||
_write_json(incoming / f"{source_name}.json", payload)
|
||||
|
||||
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||
|
||||
call_command("import_snapshots")
|
||||
|
||||
run = ImportRun.objects.get()
|
||||
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||
assert Competition.objects.filter(source_uid=competition_id, name=competition_name).exists()
|
||||
assert Player.objects.filter(source_uid="player-23").exists()
|
||||
assert PlayerSeasonStats.objects.count() == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_invalid_snapshot_rejected_and_moved_to_failed(tmp_path, settings):
|
||||
incoming = tmp_path / "incoming"
|
||||
|
||||
@ -8,6 +8,7 @@ import pytest
|
||||
from django.core.management import call_command
|
||||
|
||||
from apps.ingestion.extractors.lba import LBASnapshotExtractor
|
||||
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||
from apps.ingestion.extractors.registry import create_extractor
|
||||
|
||||
|
||||
@ -51,6 +52,56 @@ def test_lba_extractor_normalizes_fixture_payload(tmp_path, settings):
|
||||
assert row["three_pt_pct"] == 36.5
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_lba_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
|
||||
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||
|
||||
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return fixture_payload
|
||||
|
||||
extractor = LBASnapshotExtractor(http_client=FakeClient())
|
||||
output_path = tmp_path / "lba-partial.json"
|
||||
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
assert result.records_count == 1
|
||||
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||
row = payload["records"][0]
|
||||
assert row["full_name"] == "Andrea Bianchi"
|
||||
assert row["first_name"] is None
|
||||
assert row["last_name"] is None
|
||||
assert row["birth_date"] is None
|
||||
assert row["nationality"] is None
|
||||
assert row["height_cm"] is None
|
||||
assert row["weight_kg"] is None
|
||||
assert row["position"] is None
|
||||
assert row["games_played"] == 18
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_lba_extractor_still_fails_when_required_stats_are_missing(settings):
|
||||
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||
|
||||
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
|
||||
fixture_payload["data"][0].pop("ppg")
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return fixture_payload
|
||||
|
||||
extractor = LBASnapshotExtractor(http_client=FakeClient())
|
||||
with pytest.raises(ExtractorNormalizationError):
|
||||
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_lba_extractor_registry_selection(settings):
|
||||
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||
|
||||
Reference in New Issue
Block a user