test(v2-ingestion): harden public-source schema contract and docs

This commit is contained in:
Alfredo Di Stasio
2026-03-20 15:50:59 +01:00
parent 48a82e812a
commit 3f811827de
6 changed files with 131 additions and 29 deletions

View File

@ -7,8 +7,10 @@ import pytest
from django.core.management import call_command
from apps.ingestion.extractors.base import BaseSnapshotExtractor
from apps.ingestion.extractors.base import ExtractorNormalizationError
from apps.ingestion.extractors.http import ResponsibleHttpClient
from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
class DummyExtractor(BaseSnapshotExtractor):
@ -64,6 +66,29 @@ class _FakeResponse:
return self._payload
def _minimal_public_json_record() -> dict:
return {
"competition_external_id": "comp-1",
"competition_name": "League One",
"season": "2025-2026",
"team_external_id": "team-1",
"team_name": "Team One",
"player_external_id": "player-1",
"full_name": "Jane Doe",
"games_played": 12,
"minutes_per_game": 27.2,
"points_per_game": 13.0,
"rebounds_per_game": 4.4,
"assists_per_game": 3.1,
"steals_per_game": 1.0,
"blocks_per_game": 0.3,
"turnovers_per_game": 1.8,
"fg_pct": 46.2,
"three_pt_pct": 35.5,
"ft_pct": 82.1,
}
@pytest.mark.django_db
def test_base_extractor_run_writes_snapshot_file(tmp_path, settings):
settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming")
@ -139,30 +164,7 @@ def test_public_json_extractor_normalizes_common_field_aliases(tmp_path):
def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path):
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {
"records": [
{
"competition_external_id": "comp-1",
"competition_name": "League One",
"season": "2025-2026",
"team_external_id": "team-1",
"team_name": "Team One",
"player_external_id": "player-1",
"full_name": "Jane Doe",
"games_played": 12,
"minutes_per_game": 27.2,
"points_per_game": 13.0,
"rebounds_per_game": 4.4,
"assists_per_game": 3.1,
"steals_per_game": 1.0,
"blocks_per_game": 0.3,
"turnovers_per_game": 1.8,
"fg_pct": 46.2,
"three_pt_pct": 35.5,
"ft_pct": 82.1,
}
]
}
return {"records": [_minimal_public_json_record()]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
@ -186,6 +188,43 @@ def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(
assert row.get("role") is None
@pytest.mark.django_db
def test_public_json_extractor_fails_when_required_stat_missing():
broken = _minimal_public_json_record()
broken.pop("points_per_game")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {"records": [broken]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
with pytest.raises(ExtractorNormalizationError):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS))
def test_public_json_required_fields_follow_snapshot_schema(required_field):
broken = _minimal_public_json_record()
broken.pop(required_field)
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {"records": [broken]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
with pytest.raises(ExtractorNormalizationError, match="missing required fields"):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
def test_run_extractor_management_command_writes_snapshot(tmp_path, settings):
settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json"