diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1b803bb..d105e9f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -63,6 +63,15 @@ git checkout -b feature/your-feature-name 3. Implement with focused commits and tests. 4. Open PR: `feature/*` -> `develop`. +## Running Tests (v2) + +Runtime images are intentionally lean and may not ship `pytest`. +Use the development compose stack and install dev dependencies before running tests: + +```bash +docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q" +``` + ## PR Checklist - [ ] Target branch is correct diff --git a/README.md b/README.md index d28b5ae..901f852 100644 --- a/README.md +++ b/README.md @@ -336,11 +336,11 @@ Notes: ## Testing - runtime `web` image stays lean and may not include `pytest` tooling -- run tests with the development compose stack (or a dedicated test image/profile) where test dependencies are installed -- local example: +- run tests with the development compose stack (or a dedicated test image/profile) and install dev dependencies first +- local example (one-off): ```bash -docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web pytest -q +docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q" ``` ## Migration and Superuser Commands diff --git a/apps/ingestion/extractors/bcl.py b/apps/ingestion/extractors/bcl.py index bd50094..f91b101 100644 --- a/apps/ingestion/extractors/bcl.py +++ b/apps/ingestion/extractors/bcl.py @@ -16,6 +16,16 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any: return None +def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None: + for key in keys: + value = record.get(key) + if isinstance(value, str): + stripped = value.strip() + if stripped: + return stripped + return None + + ESSENTIAL_FIELDS = { "competition_external_id", "competition_name", @@ -108,7 +118,9 @@ class BCLSnapshotExtractor(BaseSnapshotExtractor): team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty( team_obj, "id", "team_id" ) - team_name = _first_non_empty(source_record, "team_name", "team") or _first_non_empty(team_obj, "name") + team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text( + team_obj, "name" + ) normalized = { "competition_external_id": self.competition_external_id, diff --git a/apps/ingestion/extractors/lba.py b/apps/ingestion/extractors/lba.py index 97347a5..d62abdb 100644 --- a/apps/ingestion/extractors/lba.py +++ b/apps/ingestion/extractors/lba.py @@ -16,6 +16,16 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any: return None +def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None: + for key in keys: + value = record.get(key) + if isinstance(value, str): + stripped = value.strip() + if stripped: + return stripped + return None + + ESSENTIAL_FIELDS = { "competition_external_id", "competition_name", @@ -108,7 +118,9 @@ class LBASnapshotExtractor(BaseSnapshotExtractor): team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty( team_obj, "id", "team_id" ) - team_name = _first_non_empty(source_record, "team_name", "team") or _first_non_empty(team_obj, "name") + team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text( + team_obj, "name" + ) normalized = { "competition_external_id": self.competition_external_id, diff --git a/tests/test_extractors_framework.py b/tests/test_extractors_framework.py index 1d873dd..a63fcd9 100644 --- a/tests/test_extractors_framework.py +++ b/tests/test_extractors_framework.py @@ -7,8 +7,10 @@ import pytest from django.core.management import call_command from apps.ingestion.extractors.base import BaseSnapshotExtractor +from apps.ingestion.extractors.base import ExtractorNormalizationError from apps.ingestion.extractors.http import ResponsibleHttpClient from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor +from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS class DummyExtractor(BaseSnapshotExtractor): @@ -64,6 +66,29 @@ class _FakeResponse: return self._payload +def _minimal_public_json_record() -> dict: + return { + "competition_external_id": "comp-1", + "competition_name": "League One", + "season": "2025-2026", + "team_external_id": "team-1", + "team_name": "Team One", + "player_external_id": "player-1", + "full_name": "Jane Doe", + "games_played": 12, + "minutes_per_game": 27.2, + "points_per_game": 13.0, + "rebounds_per_game": 4.4, + "assists_per_game": 3.1, + "steals_per_game": 1.0, + "blocks_per_game": 0.3, + "turnovers_per_game": 1.8, + "fg_pct": 46.2, + "three_pt_pct": 35.5, + "ft_pct": 82.1, + } + + @pytest.mark.django_db def test_base_extractor_run_writes_snapshot_file(tmp_path, settings): settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming") @@ -139,30 +164,7 @@ def test_public_json_extractor_normalizes_common_field_aliases(tmp_path): def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path): class FakeClient: def get_json(self, *_args, **_kwargs): - return { - "records": [ - { - "competition_external_id": "comp-1", - "competition_name": "League One", - "season": "2025-2026", - "team_external_id": "team-1", - "team_name": "Team One", - "player_external_id": "player-1", - "full_name": "Jane Doe", - "games_played": 12, - "minutes_per_game": 27.2, - "points_per_game": 13.0, - "rebounds_per_game": 4.4, - "assists_per_game": 3.1, - "steals_per_game": 1.0, - "blocks_per_game": 0.3, - "turnovers_per_game": 1.8, - "fg_pct": 46.2, - "three_pt_pct": 35.5, - "ft_pct": 82.1, - } - ] - } + return {"records": [_minimal_public_json_record()]} extractor = PublicJsonSnapshotExtractor( url="https://example.com/public-feed.json", @@ -186,6 +188,43 @@ def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields( assert row.get("role") is None +@pytest.mark.django_db +def test_public_json_extractor_fails_when_required_stat_missing(): + broken = _minimal_public_json_record() + broken.pop("points_per_game") + + class FakeClient: + def get_json(self, *_args, **_kwargs): + return {"records": [broken]} + + extractor = PublicJsonSnapshotExtractor( + url="https://example.com/public-feed.json", + source_name="test_public_feed", + http_client=FakeClient(), + ) + with pytest.raises(ExtractorNormalizationError): + extractor.run(write_output=False, snapshot_date=date(2026, 3, 13)) + + +@pytest.mark.django_db +@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS)) +def test_public_json_required_fields_follow_snapshot_schema(required_field): + broken = _minimal_public_json_record() + broken.pop(required_field) + + class FakeClient: + def get_json(self, *_args, **_kwargs): + return {"records": [broken]} + + extractor = PublicJsonSnapshotExtractor( + url="https://example.com/public-feed.json", + source_name="test_public_feed", + http_client=FakeClient(), + ) + with pytest.raises(ExtractorNormalizationError, match="missing required fields"): + extractor.run(write_output=False, snapshot_date=date(2026, 3, 13)) + + @pytest.mark.django_db def test_run_extractor_management_command_writes_snapshot(tmp_path, settings): settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json" diff --git a/tests/test_import_snapshots_command.py b/tests/test_import_snapshots_command.py index 83b4125..b997a56 100644 --- a/tests/test_import_snapshots_command.py +++ b/tests/test_import_snapshots_command.py @@ -138,6 +138,36 @@ def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing( assert PlayerSeasonStats.objects.count() == 1 +@pytest.mark.django_db +def test_snapshot_import_preserves_single_name_part_without_forced_split(tmp_path, settings): + incoming = tmp_path / "incoming" + archive = tmp_path / "archive" + failed = tmp_path / "failed" + incoming.mkdir() + archive.mkdir() + failed.mkdir() + + payload = _valid_payload() + row = payload["records"][0] + row["first_name"] = "LeBron" + row.pop("last_name") + + file_path = incoming / "single-name-part.json" + _write_json(file_path, payload) + + settings.STATIC_DATASET_INCOMING_DIR = str(incoming) + settings.STATIC_DATASET_ARCHIVE_DIR = str(archive) + settings.STATIC_DATASET_FAILED_DIR = str(failed) + + call_command("import_snapshots") + + run = ImportRun.objects.get() + assert run.status == ImportRun.RunStatus.SUCCESS + player = Player.objects.get(source_uid="player-23") + assert player.first_name == "LeBron" + assert player.last_name == "" + + @pytest.mark.django_db @pytest.mark.parametrize( ("source_name", "competition_id", "competition_name"),