test(v2-ingestion): harden public-source schema contract and docs
This commit is contained in:
@ -63,6 +63,15 @@ git checkout -b feature/your-feature-name
|
||||
3. Implement with focused commits and tests.
|
||||
4. Open PR: `feature/*` -> `develop`.
|
||||
|
||||
## Running Tests (v2)
|
||||
|
||||
Runtime images are intentionally lean and may not ship `pytest`.
|
||||
Use the development compose stack and install dev dependencies before running tests:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
|
||||
```
|
||||
|
||||
## PR Checklist
|
||||
|
||||
- [ ] Target branch is correct
|
||||
|
||||
@ -336,11 +336,11 @@ Notes:
|
||||
## Testing
|
||||
|
||||
- runtime `web` image stays lean and may not include `pytest` tooling
|
||||
- run tests with the development compose stack (or a dedicated test image/profile) where test dependencies are installed
|
||||
- local example:
|
||||
- run tests with the development compose stack (or a dedicated test image/profile) and install dev dependencies first
|
||||
- local example (one-off):
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web pytest -q
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
|
||||
```
|
||||
|
||||
## Migration and Superuser Commands
|
||||
|
||||
@ -16,6 +16,16 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||
return None
|
||||
|
||||
|
||||
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
|
||||
for key in keys:
|
||||
value = record.get(key)
|
||||
if isinstance(value, str):
|
||||
stripped = value.strip()
|
||||
if stripped:
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
ESSENTIAL_FIELDS = {
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
@ -108,7 +118,9 @@ class BCLSnapshotExtractor(BaseSnapshotExtractor):
|
||||
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
|
||||
team_obj, "id", "team_id"
|
||||
)
|
||||
team_name = _first_non_empty(source_record, "team_name", "team") or _first_non_empty(team_obj, "name")
|
||||
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
|
||||
team_obj, "name"
|
||||
)
|
||||
|
||||
normalized = {
|
||||
"competition_external_id": self.competition_external_id,
|
||||
|
||||
@ -16,6 +16,16 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||
return None
|
||||
|
||||
|
||||
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
|
||||
for key in keys:
|
||||
value = record.get(key)
|
||||
if isinstance(value, str):
|
||||
stripped = value.strip()
|
||||
if stripped:
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
ESSENTIAL_FIELDS = {
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
@ -108,7 +118,9 @@ class LBASnapshotExtractor(BaseSnapshotExtractor):
|
||||
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
|
||||
team_obj, "id", "team_id"
|
||||
)
|
||||
team_name = _first_non_empty(source_record, "team_name", "team") or _first_non_empty(team_obj, "name")
|
||||
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
|
||||
team_obj, "name"
|
||||
)
|
||||
|
||||
normalized = {
|
||||
"competition_external_id": self.competition_external_id,
|
||||
|
||||
@ -7,8 +7,10 @@ import pytest
|
||||
from django.core.management import call_command
|
||||
|
||||
from apps.ingestion.extractors.base import BaseSnapshotExtractor
|
||||
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||
from apps.ingestion.extractors.http import ResponsibleHttpClient
|
||||
from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor
|
||||
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
|
||||
|
||||
|
||||
class DummyExtractor(BaseSnapshotExtractor):
|
||||
@ -64,6 +66,29 @@ class _FakeResponse:
|
||||
return self._payload
|
||||
|
||||
|
||||
def _minimal_public_json_record() -> dict:
|
||||
return {
|
||||
"competition_external_id": "comp-1",
|
||||
"competition_name": "League One",
|
||||
"season": "2025-2026",
|
||||
"team_external_id": "team-1",
|
||||
"team_name": "Team One",
|
||||
"player_external_id": "player-1",
|
||||
"full_name": "Jane Doe",
|
||||
"games_played": 12,
|
||||
"minutes_per_game": 27.2,
|
||||
"points_per_game": 13.0,
|
||||
"rebounds_per_game": 4.4,
|
||||
"assists_per_game": 3.1,
|
||||
"steals_per_game": 1.0,
|
||||
"blocks_per_game": 0.3,
|
||||
"turnovers_per_game": 1.8,
|
||||
"fg_pct": 46.2,
|
||||
"three_pt_pct": 35.5,
|
||||
"ft_pct": 82.1,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_base_extractor_run_writes_snapshot_file(tmp_path, settings):
|
||||
settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming")
|
||||
@ -139,30 +164,7 @@ def test_public_json_extractor_normalizes_common_field_aliases(tmp_path):
|
||||
def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path):
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {
|
||||
"records": [
|
||||
{
|
||||
"competition_external_id": "comp-1",
|
||||
"competition_name": "League One",
|
||||
"season": "2025-2026",
|
||||
"team_external_id": "team-1",
|
||||
"team_name": "Team One",
|
||||
"player_external_id": "player-1",
|
||||
"full_name": "Jane Doe",
|
||||
"games_played": 12,
|
||||
"minutes_per_game": 27.2,
|
||||
"points_per_game": 13.0,
|
||||
"rebounds_per_game": 4.4,
|
||||
"assists_per_game": 3.1,
|
||||
"steals_per_game": 1.0,
|
||||
"blocks_per_game": 0.3,
|
||||
"turnovers_per_game": 1.8,
|
||||
"fg_pct": 46.2,
|
||||
"three_pt_pct": 35.5,
|
||||
"ft_pct": 82.1,
|
||||
}
|
||||
]
|
||||
}
|
||||
return {"records": [_minimal_public_json_record()]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
@ -186,6 +188,43 @@ def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(
|
||||
assert row.get("role") is None
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_public_json_extractor_fails_when_required_stat_missing():
|
||||
broken = _minimal_public_json_record()
|
||||
broken.pop("points_per_game")
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {"records": [broken]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
source_name="test_public_feed",
|
||||
http_client=FakeClient(),
|
||||
)
|
||||
with pytest.raises(ExtractorNormalizationError):
|
||||
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS))
|
||||
def test_public_json_required_fields_follow_snapshot_schema(required_field):
|
||||
broken = _minimal_public_json_record()
|
||||
broken.pop(required_field)
|
||||
|
||||
class FakeClient:
|
||||
def get_json(self, *_args, **_kwargs):
|
||||
return {"records": [broken]}
|
||||
|
||||
extractor = PublicJsonSnapshotExtractor(
|
||||
url="https://example.com/public-feed.json",
|
||||
source_name="test_public_feed",
|
||||
http_client=FakeClient(),
|
||||
)
|
||||
with pytest.raises(ExtractorNormalizationError, match="missing required fields"):
|
||||
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_run_extractor_management_command_writes_snapshot(tmp_path, settings):
|
||||
settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json"
|
||||
|
||||
@ -138,6 +138,36 @@ def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing(
|
||||
assert PlayerSeasonStats.objects.count() == 1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_snapshot_import_preserves_single_name_part_without_forced_split(tmp_path, settings):
|
||||
incoming = tmp_path / "incoming"
|
||||
archive = tmp_path / "archive"
|
||||
failed = tmp_path / "failed"
|
||||
incoming.mkdir()
|
||||
archive.mkdir()
|
||||
failed.mkdir()
|
||||
|
||||
payload = _valid_payload()
|
||||
row = payload["records"][0]
|
||||
row["first_name"] = "LeBron"
|
||||
row.pop("last_name")
|
||||
|
||||
file_path = incoming / "single-name-part.json"
|
||||
_write_json(file_path, payload)
|
||||
|
||||
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||
|
||||
call_command("import_snapshots")
|
||||
|
||||
run = ImportRun.objects.get()
|
||||
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||
player = Player.objects.get(source_uid="player-23")
|
||||
assert player.first_name == "LeBron"
|
||||
assert player.last_name == ""
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
("source_name", "competition_id", "competition_name"),
|
||||
|
||||
Reference in New Issue
Block a user