test(v2-ingestion): harden public-source schema contract and docs
This commit is contained in:
@ -7,8 +7,10 @@ import pytest
|
||||
from django.core.management import call_command
|
||||
|
||||
from apps.ingestion.extractors.base import BaseSnapshotExtractor
|
||||
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||
from apps.ingestion.extractors.http import ResponsibleHttpClient
|
||||
from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor
|
||||
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
|
||||
|
||||
|
||||
class DummyExtractor(BaseSnapshotExtractor):
|
||||
@ -64,6 +66,29 @@ class _FakeResponse:
|
||||
return self._payload
|
||||
|
||||
|
||||
def _minimal_public_json_record() -> dict:
|
||||
return {
|
||||
"competition_external_id": "comp-1",
|
||||
"competition_name": "League One",
|
||||
"season": "2025-2026",
|
||||
"team_external_id": "team-1",
|
||||
"team_name": "Team One",
|
||||
"player_external_id": "player-1",
|
||||
"full_name": "Jane Doe",
|
||||
"games_played": 12,
|
||||
"minutes_per_game": 27.2,
|
||||
"points_per_game": 13.0,
|
||||
"rebounds_per_game": 4.4,
|
||||
"assists_per_game": 3.1,
|
||||
"steals_per_game": 1.0,
|
||||
"blocks_per_game": 0.3,
|
||||
"turnovers_per_game": 1.8,
|
||||
"fg_pct": 46.2,
|
||||
"three_pt_pct": 35.5,
|
||||
"ft_pct": 82.1,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_base_extractor_run_writes_snapshot_file(tmp_path, settings):
|
||||
settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming")
|
||||
@ -139,30 +164,7 @@ def test_public_json_extractor_normalizes_common_field_aliases(tmp_path):
|
||||
def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path):
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {
|
||||
"records": [
|
||||
{
|
||||
"competition_external_id": "comp-1",
|
||||
"competition_name": "League One",
|
||||
"season": "2025-2026",
|
||||
"team_external_id": "team-1",
|
||||
"team_name": "Team One",
|
||||
"player_external_id": "player-1",
|
||||
"full_name": "Jane Doe",
|
||||
"games_played": 12,
|
||||
"minutes_per_game": 27.2,
|
||||
"points_per_game": 13.0,
|
||||
"rebounds_per_game": 4.4,
|
||||
"assists_per_game": 3.1,
|
||||
"steals_per_game": 1.0,
|
||||
"blocks_per_game": 0.3,
|
||||
"turnovers_per_game": 1.8,
|
||||
"fg_pct": 46.2,
|
||||
"three_pt_pct": 35.5,
|
||||
"ft_pct": 82.1,
|
||||
}
|
||||
]
|
||||
}
|
||||
return {"records": [_minimal_public_json_record()]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
@ -186,6 +188,43 @@ def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(
|
||||
assert row.get("role") is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_public_json_extractor_fails_when_required_stat_missing():
|
||||
broken = _minimal_public_json_record()
|
||||
broken.pop("points_per_game")
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {"records": [broken]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
source_name="test_public_feed",
|
||||
http_client=FakeClient(),
|
||||
)
|
||||
with pytest.raises(ExtractorNormalizationError):
|
||||
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS))
|
||||
def test_public_json_required_fields_follow_snapshot_schema(required_field):
|
||||
broken = _minimal_public_json_record()
|
||||
broken.pop(required_field)
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {"records": [broken]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
source_name="test_public_feed",
|
||||
http_client=FakeClient(),
|
||||
)
|
||||
with pytest.raises(ExtractorNormalizationError, match="missing required fields"):
|
||||
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_extractor_management_command_writes_snapshot(tmp_path, settings):
|
||||
settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json"
|
||||
|
||||
Reference in New Issue
Block a user