diff --git a/README.md b/README.md index 10802a7..27e934d 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ The current application baseline provides: - user-scoped shortlist favorites - user-scoped plain-text scouting notes on player detail pages - user-scoped saved searches (save, rerun, delete) +- first real-data ingestion command baseline (`import_hoopdata_demo_competition`) with idempotent source-identity mapping Accepted technical and product-shaping decisions live in: - `docs/ARCHITECTURE.md` @@ -59,6 +60,12 @@ Accepted technical and product-shaping decisions live in: 9. Open player detail pages to review context rows and create user-scoped notes. 10. Use `http://127.0.0.1:8000/favorites/` to review your user-scoped shortlist. +First real-data importer (ADR-0009 baseline): +- default sample snapshot import: + `docker compose --env-file .env -f infra/docker-compose.yml exec -T app python manage.py import_hoopdata_demo_competition` +- explicit input path import: + `docker compose --env-file .env -f infra/docker-compose.yml exec -T app python manage.py import_hoopdata_demo_competition --input /app/scouting/sample_data/imports/hoopdata_demo_serie_a2_2025_2026.json` + Legacy shared favorites and notes from the pre-auth MVP are cleared by the early-stage ownership migration so the app can move cleanly to user-scoped data. ## Workflow diff --git a/app/scouting/importers/__init__.py b/app/scouting/importers/__init__.py new file mode 100644 index 0000000..f3ce152 --- /dev/null +++ b/app/scouting/importers/__init__.py @@ -0,0 +1 @@ +"""Importer modules for source-scoped real-data ingestion flows.""" diff --git a/app/scouting/importers/hoopdata_demo.py b/app/scouting/importers/hoopdata_demo.py new file mode 100644 index 0000000..ac93713 --- /dev/null +++ b/app/scouting/importers/hoopdata_demo.py @@ -0,0 +1,330 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import date +from decimal import Decimal + +from django.db import transaction + +from scouting.models import ( + Competition, + ExternalEntityMapping, + Player, + PlayerSeason, + PlayerSeasonStats, + Season, + Team, +) + + +@dataclass +class ImportSummary: + players_created: int = 0 + players_updated: int = 0 + teams_created: int = 0 + teams_updated: int = 0 + contexts_created: int = 0 + contexts_updated: int = 0 + + +def parse_date(value: str | None) -> date | None: + if not value: + return None + return date.fromisoformat(value) + + +def parse_decimal(value) -> Decimal | None: + if value in (None, ""): + return None + return Decimal(str(value)) + + +class ImportValidationError(ValueError): + pass + + +class HoopDataDemoCompetitionImporter: + """Source-specific MVP importer for one competition snapshot payload.""" + + EXPECTED_SOURCE_NAME = "hoopdata_demo" + + def __init__(self, payload: dict): + self.payload = payload + self.summary = ImportSummary() + + @transaction.atomic + def run(self) -> ImportSummary: + self._validate_payload_shape() + + source_name = self.payload["source_name"] + competition = self._upsert_competition(source_name, self.payload["competition"]) + season = self._upsert_season(self.payload["season"]) + + for player_record in self.payload["players"]: + team = self._upsert_team(source_name, player_record["team"]) + player = self._upsert_player(source_name, player_record) + context = self._upsert_player_season( + source_name=source_name, + competition=competition, + season=season, + team=team, + player=player, + player_record=player_record, + ) + self._upsert_player_season_stats(context=context, player_record=player_record) + + return self.summary + + def _validate_payload_shape(self) -> None: + if self.payload.get("source_name") != self.EXPECTED_SOURCE_NAME: + raise ImportValidationError( + f"Expected source_name='{self.EXPECTED_SOURCE_NAME}', got '{self.payload.get('source_name')}'." + ) + + required_root_keys = ["competition", "season", "players"] + for key in required_root_keys: + if key not in self.payload: + raise ImportValidationError(f"Missing root key '{key}'.") + + if not isinstance(self.payload["players"], list) or not self.payload["players"]: + raise ImportValidationError("Payload must include at least one player record.") + + competition = self.payload["competition"] + for field in ["external_id", "name"]: + if not competition.get(field): + raise ImportValidationError(f"Competition requires '{field}'.") + + season = self.payload["season"] + for field in ["name", "start_year", "end_year"]: + if season.get(field) in (None, ""): + raise ImportValidationError(f"Season requires '{field}'.") + + for index, player_record in enumerate(self.payload["players"], start=1): + for field in ["external_id", "context_external_id", "full_name", "position", "team", "stats"]: + if player_record.get(field) in (None, ""): + raise ImportValidationError(f"Player record #{index} missing '{field}'.") + + team_record = player_record["team"] + for field in ["external_id", "name", "country"]: + if team_record.get(field) in (None, ""): + raise ImportValidationError(f"Player record #{index} team missing '{field}'.") + + def _mapping_for(self, source_name: str, entity_type: str, external_id: str) -> ExternalEntityMapping | None: + return ExternalEntityMapping.objects.filter( + source_name=source_name, + entity_type=entity_type, + external_id=external_id, + ).first() + + def _bind_mapping(self, source_name: str, entity_type: str, external_id: str, object_id: int) -> None: + existing_for_external = ExternalEntityMapping.objects.filter( + source_name=source_name, + entity_type=entity_type, + external_id=external_id, + ).first() + if existing_for_external and existing_for_external.object_id != object_id: + raise ImportValidationError( + f"External ID '{external_id}' for {entity_type} is already linked to a different record." + ) + + existing_for_object = ExternalEntityMapping.objects.filter( + source_name=source_name, + entity_type=entity_type, + object_id=object_id, + ).first() + if existing_for_object and existing_for_object.external_id != external_id: + raise ImportValidationError( + f"Conflicting mapping for {entity_type} object {object_id}: " + f"'{existing_for_object.external_id}' vs '{external_id}'." + ) + + ExternalEntityMapping.objects.get_or_create( + source_name=source_name, + entity_type=entity_type, + external_id=external_id, + defaults={"object_id": object_id}, + ) + + def _upsert_competition(self, source_name: str, record: dict) -> Competition: + mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.COMPETITION, record["external_id"]) + + defaults = { + "country": record.get("country", ""), + "level": record.get("level", ""), + } + if mapping: + competition = Competition.objects.filter(pk=mapping.object_id).first() + if competition is None: + raise ImportValidationError("Competition mapping points to a missing record.") + Competition.objects.filter(pk=competition.pk).update(name=record["name"], **defaults) + competition.refresh_from_db() + else: + competition, _ = Competition.objects.get_or_create(name=record["name"], defaults=defaults) + updates = [] + for field, value in defaults.items(): + if getattr(competition, field) != value: + setattr(competition, field, value) + updates.append(field) + if updates: + competition.save(update_fields=updates + ["updated_at"]) + + self._bind_mapping( + source_name=source_name, + entity_type=ExternalEntityMapping.EntityType.COMPETITION, + external_id=record["external_id"], + object_id=competition.id, + ) + return competition + + def _upsert_season(self, record: dict) -> Season: + season, created = Season.objects.get_or_create( + name=record["name"], + defaults={ + "start_year": record["start_year"], + "end_year": record["end_year"], + }, + ) + if not created and ( + season.start_year != record["start_year"] + or season.end_year != record["end_year"] + ): + raise ImportValidationError( + f"Season '{season.name}' already exists with different years " + f"({season.start_year}-{season.end_year})." + ) + return season + + def _upsert_team(self, source_name: str, record: dict) -> Team: + mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.TEAM, record["external_id"]) + + if mapping: + team = Team.objects.filter(pk=mapping.object_id).first() + if team is None: + raise ImportValidationError("Team mapping points to a missing record.") + updates = [] + for field in ["name", "country"]: + value = record[field] + if getattr(team, field) != value: + setattr(team, field, value) + updates.append(field) + if updates: + team.save(update_fields=updates + ["updated_at"]) + self.summary.teams_updated += 1 + else: + team, created = Team.objects.get_or_create(name=record["name"], country=record["country"], defaults={}) + if created: + self.summary.teams_created += 1 + else: + self.summary.teams_updated += 1 + + self._bind_mapping( + source_name=source_name, + entity_type=ExternalEntityMapping.EntityType.TEAM, + external_id=record["external_id"], + object_id=team.id, + ) + return team + + def _upsert_player(self, source_name: str, record: dict) -> Player: + mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.PLAYER, record["external_id"]) + + defaults = { + "full_name": record["full_name"], + "first_name": record.get("first_name", ""), + "last_name": record.get("last_name", ""), + "birth_date": parse_date(record.get("birth_date")), + "nationality": record.get("nationality", ""), + "height_cm": parse_decimal(record.get("height_cm")), + "weight_kg": parse_decimal(record.get("weight_kg")), + "wingspan_cm": parse_decimal(record.get("wingspan_cm")), + "position": record["position"], + } + + if mapping: + player = Player.objects.filter(pk=mapping.object_id).first() + if player is None: + raise ImportValidationError("Player mapping points to a missing record.") + for field, value in defaults.items(): + setattr(player, field, value) + player.save() + self.summary.players_updated += 1 + else: + player = Player.objects.create(**defaults) + self.summary.players_created += 1 + + self._bind_mapping( + source_name=source_name, + entity_type=ExternalEntityMapping.EntityType.PLAYER, + external_id=record["external_id"], + object_id=player.id, + ) + return player + + def _upsert_player_season( + self, + *, + source_name: str, + competition: Competition, + season: Season, + team: Team, + player: Player, + player_record: dict, + ) -> PlayerSeason: + mapping = self._mapping_for( + source_name, + ExternalEntityMapping.EntityType.PLAYER_SEASON, + player_record["context_external_id"], + ) + + if mapping: + context = PlayerSeason.objects.filter(pk=mapping.object_id).first() + if context is None: + raise ImportValidationError("PlayerSeason mapping points to a missing record.") + if ( + context.player_id != player.id + or context.season_id != season.id + or context.team_id != team.id + or context.competition_id != competition.id + ): + raise ImportValidationError( + "Mapped player-season context does not match the incoming deterministic context identity." + ) + self.summary.contexts_updated += 1 + else: + context, created = PlayerSeason.objects.get_or_create( + player=player, + season=season, + team=team, + competition=competition, + defaults={}, + ) + if created: + self.summary.contexts_created += 1 + else: + self.summary.contexts_updated += 1 + + self._bind_mapping( + source_name=source_name, + entity_type=ExternalEntityMapping.EntityType.PLAYER_SEASON, + external_id=player_record["context_external_id"], + object_id=context.id, + ) + return context + + def _upsert_player_season_stats(self, *, context: PlayerSeason, player_record: dict) -> None: + stats = player_record["stats"] + PlayerSeasonStats.objects.update_or_create( + player_season=context, + defaults={ + "points": parse_decimal(stats.get("points")), + "assists": parse_decimal(stats.get("assists")), + "steals": parse_decimal(stats.get("steals")), + "turnovers": parse_decimal(stats.get("turnovers")), + "blocks": parse_decimal(stats.get("blocks")), + "efg_pct": parse_decimal(stats.get("efg_pct")), + "ts_pct": parse_decimal(stats.get("ts_pct")), + "plus_minus": parse_decimal(stats.get("plus_minus")), + "offensive_rating": parse_decimal(stats.get("offensive_rating")), + "defensive_rating": parse_decimal(stats.get("defensive_rating")), + }, + ) diff --git a/app/scouting/management/commands/import_hoopdata_demo_competition.py b/app/scouting/management/commands/import_hoopdata_demo_competition.py new file mode 100644 index 0000000..660b800 --- /dev/null +++ b/app/scouting/management/commands/import_hoopdata_demo_competition.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +from scouting.importers.hoopdata_demo import ( + HoopDataDemoCompetitionImporter, + ImportValidationError, +) + + +class Command(BaseCommand): + help = "Import the first real-data MVP snapshot for the hoopdata_demo source (single competition scope)." + + def add_arguments(self, parser): + parser.add_argument( + "--input", + default=str( + Path(settings.BASE_DIR) + / "scouting" + / "sample_data" + / "imports" + / "hoopdata_demo_serie_a2_2025_2026.json" + ), + help="Path to a hoopdata_demo competition snapshot JSON file.", + ) + + def handle(self, *args, **options): + input_path = Path(options["input"]) + if not input_path.exists(): + raise CommandError(f"Input file does not exist: {input_path}") + + try: + payload = json.loads(input_path.read_text(encoding="utf-8")) + summary = HoopDataDemoCompetitionImporter(payload).run() + except json.JSONDecodeError as exc: + raise CommandError(f"Invalid JSON payload: {exc}") from exc + except ImportValidationError as exc: + raise CommandError(f"Import validation failed: {exc}") from exc + + self.stdout.write( + self.style.SUCCESS( + "Imported hoopdata_demo competition snapshot successfully. " + f"Players +{summary.players_created}/~{summary.players_updated}, " + f"Teams +{summary.teams_created}/~{summary.teams_updated}, " + f"Contexts +{summary.contexts_created}/~{summary.contexts_updated}." + ) + ) + self.stdout.write(f"Source file: {input_path}") diff --git a/app/scouting/migrations/0009_externalentitymapping.py b/app/scouting/migrations/0009_externalentitymapping.py new file mode 100644 index 0000000..e2b318b --- /dev/null +++ b/app/scouting/migrations/0009_externalentitymapping.py @@ -0,0 +1,29 @@ +# Generated by Django 5.2.2 on 2026-04-10 22:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scouting', '0008_savedsearch'), + ] + + operations = [ + migrations.CreateModel( + name='ExternalEntityMapping', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('source_name', models.CharField(max_length=80)), + ('entity_type', models.CharField(choices=[('player', 'Player'), ('competition', 'Competition'), ('team', 'Team'), ('player_season', 'Player season')], max_length=30)), + ('external_id', models.CharField(max_length=140)), + ('object_id', models.PositiveBigIntegerField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'ordering': ['source_name', 'entity_type', 'external_id'], + 'constraints': [models.UniqueConstraint(fields=('source_name', 'entity_type', 'external_id'), name='uniq_external_entity_mapping'), models.UniqueConstraint(fields=('source_name', 'entity_type', 'object_id'), name='uniq_external_entity_target')], + }, + ), + ] diff --git a/app/scouting/models.py b/app/scouting/models.py index e2a0218..ed78c91 100644 --- a/app/scouting/models.py +++ b/app/scouting/models.py @@ -228,3 +228,34 @@ class SavedSearch(models.Model): def __str__(self) -> str: return f"{self.user} - {self.name}" + + +class ExternalEntityMapping(models.Model): + class EntityType(models.TextChoices): + PLAYER = "player", "Player" + COMPETITION = "competition", "Competition" + TEAM = "team", "Team" + PLAYER_SEASON = "player_season", "Player season" + + source_name = models.CharField(max_length=80) + entity_type = models.CharField(max_length=30, choices=EntityType.choices) + external_id = models.CharField(max_length=140) + object_id = models.PositiveBigIntegerField() + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ["source_name", "entity_type", "external_id"] + constraints = [ + models.UniqueConstraint( + fields=["source_name", "entity_type", "external_id"], + name="uniq_external_entity_mapping", + ), + models.UniqueConstraint( + fields=["source_name", "entity_type", "object_id"], + name="uniq_external_entity_target", + ), + ] + + def __str__(self) -> str: + return f"{self.source_name}:{self.entity_type}:{self.external_id} -> {self.object_id}" diff --git a/app/scouting/sample_data/imports/hoopdata_demo_serie_a2_2025_2026.json b/app/scouting/sample_data/imports/hoopdata_demo_serie_a2_2025_2026.json new file mode 100644 index 0000000..caddf11 --- /dev/null +++ b/app/scouting/sample_data/imports/hoopdata_demo_serie_a2_2025_2026.json @@ -0,0 +1,76 @@ +{ + "source_name": "hoopdata_demo", + "competition": { + "external_id": "comp-ita2-2025", + "name": "Italian Serie A2", + "country": "IT", + "level": "second" + }, + "season": { + "name": "2025-2026", + "start_year": 2025, + "end_year": 2026 + }, + "players": [ + { + "external_id": "player-1001", + "context_external_id": "ctx-1001-2025-ita2-bologna-blaze", + "full_name": "Andrea Pulse", + "first_name": "Andrea", + "last_name": "Pulse", + "birth_date": "2003-03-19", + "nationality": "IT", + "position": "PG", + "height_cm": 186.0, + "weight_kg": 79.0, + "wingspan_cm": 193.0, + "team": { + "external_id": "team-bologna-blaze", + "name": "Bologna Blaze", + "country": "IT" + }, + "stats": { + "points": 17.2, + "assists": 6.4, + "steals": 1.7, + "turnovers": 2.5, + "blocks": 0.2, + "efg_pct": 52.1, + "ts_pct": 57.9, + "plus_minus": 3.8, + "offensive_rating": 113.4, + "defensive_rating": 106.9 + } + }, + { + "external_id": "player-1002", + "context_external_id": "ctx-1002-2025-ita2-venice-harbor", + "full_name": "Matteo Harbor", + "first_name": "Matteo", + "last_name": "Harbor", + "birth_date": "2001-11-02", + "nationality": "IT", + "position": "C", + "height_cm": 211.0, + "weight_kg": 108.0, + "wingspan_cm": 220.0, + "team": { + "external_id": "team-venice-harbor", + "name": "Venice Harbor", + "country": "IT" + }, + "stats": { + "points": 12.8, + "assists": 1.6, + "steals": 0.9, + "turnovers": 1.8, + "blocks": 2.1, + "efg_pct": 58.7, + "ts_pct": 61.5, + "plus_minus": 4.1, + "offensive_rating": 111.9, + "defensive_rating": 102.7 + } + } + ] +} diff --git a/app/scouting/tests.py b/app/scouting/tests.py index abffceb..9cc34f1 100644 --- a/app/scouting/tests.py +++ b/app/scouting/tests.py @@ -11,6 +11,7 @@ from django.urls import reverse from .models import ( Competition, + ExternalEntityMapping, FavoritePlayer, Player, PlayerNote, @@ -442,6 +443,89 @@ class SeedScoutingDataCommandTests(TestCase): self.assertEqual(Specialty.objects.count(), first_counts["specialties"]) +class FirstRealIngestionFlowTests(TestCase): + COMMAND_NAME = "import_hoopdata_demo_competition" + SOURCE_NAME = "hoopdata_demo" + + def test_importer_command_runs_successfully(self): + call_command(self.COMMAND_NAME) + self.assertGreaterEqual(Player.objects.count(), 2) + + def test_importer_creates_expected_core_objects(self): + call_command(self.COMMAND_NAME) + + self.assertTrue(Competition.objects.filter(name="Italian Serie A2").exists()) + self.assertTrue(Season.objects.filter(name="2025-2026", start_year=2025, end_year=2026).exists()) + self.assertTrue(Player.objects.filter(full_name="Andrea Pulse", position="PG").exists()) + self.assertTrue(Player.objects.filter(full_name="Matteo Harbor", position="C").exists()) + self.assertTrue(PlayerSeason.objects.filter(player__full_name="Andrea Pulse").exists()) + self.assertTrue(PlayerSeasonStats.objects.filter(player_season__player__full_name="Andrea Pulse").exists()) + self.assertEqual( + ExternalEntityMapping.objects.filter(source_name=self.SOURCE_NAME).count(), + 7, + ) + + def test_importer_is_idempotent_for_same_input(self): + call_command(self.COMMAND_NAME) + first_counts = { + "players": Player.objects.count(), + "teams": Team.objects.count(), + "contexts": PlayerSeason.objects.count(), + "stats": PlayerSeasonStats.objects.count(), + "mappings": ExternalEntityMapping.objects.count(), + } + + call_command(self.COMMAND_NAME) + + self.assertEqual(Player.objects.count(), first_counts["players"]) + self.assertEqual(Team.objects.count(), first_counts["teams"]) + self.assertEqual(PlayerSeason.objects.count(), first_counts["contexts"]) + self.assertEqual(PlayerSeasonStats.objects.count(), first_counts["stats"]) + self.assertEqual(ExternalEntityMapping.objects.count(), first_counts["mappings"]) + + def test_importer_does_not_overwrite_internal_scouting_fields(self): + role = Role.objects.create(name="internal role", slug="internal-role") + specialty = Specialty.objects.create(name="internal specialty", slug="internal-specialty") + call_command(self.COMMAND_NAME) + + player = Player.objects.get(full_name="Andrea Pulse") + player.roles.add(role) + player.specialties.add(specialty) + + call_command(self.COMMAND_NAME) + player.refresh_from_db() + + self.assertTrue(player.roles.filter(pk=role.pk).exists()) + self.assertTrue(player.specialties.filter(pk=specialty.pk).exists()) + + def test_importer_does_not_interfere_with_user_owned_data(self): + call_command(self.COMMAND_NAME) + user = User.objects.create_user(username="ingest_user", password="pass12345") + player = Player.objects.get(full_name="Andrea Pulse") + favorite = FavoritePlayer.objects.create(user=user, player=player) + note = PlayerNote.objects.create(user=user, player=player, body="Tracked after import") + saved = SavedSearch.objects.create(user=user, name="Imported PG", params={"name": "Andrea"}) + + call_command(self.COMMAND_NAME) + + self.assertTrue(FavoritePlayer.objects.filter(pk=favorite.pk).exists()) + self.assertTrue(PlayerNote.objects.filter(pk=note.pk).exists()) + self.assertTrue(SavedSearch.objects.filter(pk=saved.pk).exists()) + + def test_imported_data_is_visible_in_search_and_detail_flows(self): + call_command(self.COMMAND_NAME) + + list_response = self.client.get(reverse("scouting:player_list"), {"name": "Andrea"}) + self.assertEqual(list_response.status_code, 200) + self.assertContains(list_response, "Andrea Pulse") + + player = Player.objects.get(full_name="Andrea Pulse") + detail_response = self.client.get(reverse("scouting:player_detail", args=[player.id])) + self.assertEqual(detail_response.status_code, 200) + self.assertContains(detail_response, "Andrea Pulse") + self.assertContains(detail_response, "PTS 17.2") + + class FavoritePlayerViewsTests(TestCase): @classmethod def setUpTestData(cls):