Merge branch 'feature/phase-2-first-real-ingestion-flow' into develop

This commit is contained in:
bisco
2026-04-11 00:24:48 +02:00
8 changed files with 610 additions and 0 deletions

View File

@ -15,6 +15,7 @@ The current application baseline provides:
- user-scoped shortlist favorites
- user-scoped plain-text scouting notes on player detail pages
- user-scoped saved searches (save, rerun, delete)
- first real-data ingestion command baseline (`import_hoopdata_demo_competition`) with idempotent source-identity mapping
Accepted technical and product-shaping decisions live in:
- `docs/ARCHITECTURE.md`
@ -59,6 +60,12 @@ Accepted technical and product-shaping decisions live in:
9. Open player detail pages to review context rows and create user-scoped notes.
10. Use `http://127.0.0.1:8000/favorites/` to review your user-scoped shortlist.
First real-data importer (ADR-0009 baseline):
- default sample snapshot import:
`docker compose --env-file .env -f infra/docker-compose.yml exec -T app python manage.py import_hoopdata_demo_competition`
- explicit input path import:
`docker compose --env-file .env -f infra/docker-compose.yml exec -T app python manage.py import_hoopdata_demo_competition --input /app/scouting/sample_data/imports/hoopdata_demo_serie_a2_2025_2026.json`
Legacy shared favorites and notes from the pre-auth MVP are cleared by the early-stage ownership migration so the app can move cleanly to user-scoped data.
## Workflow

View File

@ -0,0 +1 @@
"""Importer modules for source-scoped real-data ingestion flows."""

View File

@ -0,0 +1,330 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
from decimal import Decimal
from django.db import transaction
from scouting.models import (
Competition,
ExternalEntityMapping,
Player,
PlayerSeason,
PlayerSeasonStats,
Season,
Team,
)
@dataclass
class ImportSummary:
players_created: int = 0
players_updated: int = 0
teams_created: int = 0
teams_updated: int = 0
contexts_created: int = 0
contexts_updated: int = 0
def parse_date(value: str | None) -> date | None:
if not value:
return None
return date.fromisoformat(value)
def parse_decimal(value) -> Decimal | None:
if value in (None, ""):
return None
return Decimal(str(value))
class ImportValidationError(ValueError):
pass
class HoopDataDemoCompetitionImporter:
"""Source-specific MVP importer for one competition snapshot payload."""
EXPECTED_SOURCE_NAME = "hoopdata_demo"
def __init__(self, payload: dict):
self.payload = payload
self.summary = ImportSummary()
@transaction.atomic
def run(self) -> ImportSummary:
self._validate_payload_shape()
source_name = self.payload["source_name"]
competition = self._upsert_competition(source_name, self.payload["competition"])
season = self._upsert_season(self.payload["season"])
for player_record in self.payload["players"]:
team = self._upsert_team(source_name, player_record["team"])
player = self._upsert_player(source_name, player_record)
context = self._upsert_player_season(
source_name=source_name,
competition=competition,
season=season,
team=team,
player=player,
player_record=player_record,
)
self._upsert_player_season_stats(context=context, player_record=player_record)
return self.summary
def _validate_payload_shape(self) -> None:
if self.payload.get("source_name") != self.EXPECTED_SOURCE_NAME:
raise ImportValidationError(
f"Expected source_name='{self.EXPECTED_SOURCE_NAME}', got '{self.payload.get('source_name')}'."
)
required_root_keys = ["competition", "season", "players"]
for key in required_root_keys:
if key not in self.payload:
raise ImportValidationError(f"Missing root key '{key}'.")
if not isinstance(self.payload["players"], list) or not self.payload["players"]:
raise ImportValidationError("Payload must include at least one player record.")
competition = self.payload["competition"]
for field in ["external_id", "name"]:
if not competition.get(field):
raise ImportValidationError(f"Competition requires '{field}'.")
season = self.payload["season"]
for field in ["name", "start_year", "end_year"]:
if season.get(field) in (None, ""):
raise ImportValidationError(f"Season requires '{field}'.")
for index, player_record in enumerate(self.payload["players"], start=1):
for field in ["external_id", "context_external_id", "full_name", "position", "team", "stats"]:
if player_record.get(field) in (None, ""):
raise ImportValidationError(f"Player record #{index} missing '{field}'.")
team_record = player_record["team"]
for field in ["external_id", "name", "country"]:
if team_record.get(field) in (None, ""):
raise ImportValidationError(f"Player record #{index} team missing '{field}'.")
def _mapping_for(self, source_name: str, entity_type: str, external_id: str) -> ExternalEntityMapping | None:
return ExternalEntityMapping.objects.filter(
source_name=source_name,
entity_type=entity_type,
external_id=external_id,
).first()
def _bind_mapping(self, source_name: str, entity_type: str, external_id: str, object_id: int) -> None:
existing_for_external = ExternalEntityMapping.objects.filter(
source_name=source_name,
entity_type=entity_type,
external_id=external_id,
).first()
if existing_for_external and existing_for_external.object_id != object_id:
raise ImportValidationError(
f"External ID '{external_id}' for {entity_type} is already linked to a different record."
)
existing_for_object = ExternalEntityMapping.objects.filter(
source_name=source_name,
entity_type=entity_type,
object_id=object_id,
).first()
if existing_for_object and existing_for_object.external_id != external_id:
raise ImportValidationError(
f"Conflicting mapping for {entity_type} object {object_id}: "
f"'{existing_for_object.external_id}' vs '{external_id}'."
)
ExternalEntityMapping.objects.get_or_create(
source_name=source_name,
entity_type=entity_type,
external_id=external_id,
defaults={"object_id": object_id},
)
def _upsert_competition(self, source_name: str, record: dict) -> Competition:
mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.COMPETITION, record["external_id"])
defaults = {
"country": record.get("country", ""),
"level": record.get("level", ""),
}
if mapping:
competition = Competition.objects.filter(pk=mapping.object_id).first()
if competition is None:
raise ImportValidationError("Competition mapping points to a missing record.")
Competition.objects.filter(pk=competition.pk).update(name=record["name"], **defaults)
competition.refresh_from_db()
else:
competition, _ = Competition.objects.get_or_create(name=record["name"], defaults=defaults)
updates = []
for field, value in defaults.items():
if getattr(competition, field) != value:
setattr(competition, field, value)
updates.append(field)
if updates:
competition.save(update_fields=updates + ["updated_at"])
self._bind_mapping(
source_name=source_name,
entity_type=ExternalEntityMapping.EntityType.COMPETITION,
external_id=record["external_id"],
object_id=competition.id,
)
return competition
def _upsert_season(self, record: dict) -> Season:
season, created = Season.objects.get_or_create(
name=record["name"],
defaults={
"start_year": record["start_year"],
"end_year": record["end_year"],
},
)
if not created and (
season.start_year != record["start_year"]
or season.end_year != record["end_year"]
):
raise ImportValidationError(
f"Season '{season.name}' already exists with different years "
f"({season.start_year}-{season.end_year})."
)
return season
def _upsert_team(self, source_name: str, record: dict) -> Team:
mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.TEAM, record["external_id"])
if mapping:
team = Team.objects.filter(pk=mapping.object_id).first()
if team is None:
raise ImportValidationError("Team mapping points to a missing record.")
updates = []
for field in ["name", "country"]:
value = record[field]
if getattr(team, field) != value:
setattr(team, field, value)
updates.append(field)
if updates:
team.save(update_fields=updates + ["updated_at"])
self.summary.teams_updated += 1
else:
team, created = Team.objects.get_or_create(name=record["name"], country=record["country"], defaults={})
if created:
self.summary.teams_created += 1
else:
self.summary.teams_updated += 1
self._bind_mapping(
source_name=source_name,
entity_type=ExternalEntityMapping.EntityType.TEAM,
external_id=record["external_id"],
object_id=team.id,
)
return team
def _upsert_player(self, source_name: str, record: dict) -> Player:
mapping = self._mapping_for(source_name, ExternalEntityMapping.EntityType.PLAYER, record["external_id"])
defaults = {
"full_name": record["full_name"],
"first_name": record.get("first_name", ""),
"last_name": record.get("last_name", ""),
"birth_date": parse_date(record.get("birth_date")),
"nationality": record.get("nationality", ""),
"height_cm": parse_decimal(record.get("height_cm")),
"weight_kg": parse_decimal(record.get("weight_kg")),
"wingspan_cm": parse_decimal(record.get("wingspan_cm")),
"position": record["position"],
}
if mapping:
player = Player.objects.filter(pk=mapping.object_id).first()
if player is None:
raise ImportValidationError("Player mapping points to a missing record.")
for field, value in defaults.items():
setattr(player, field, value)
player.save()
self.summary.players_updated += 1
else:
player = Player.objects.create(**defaults)
self.summary.players_created += 1
self._bind_mapping(
source_name=source_name,
entity_type=ExternalEntityMapping.EntityType.PLAYER,
external_id=record["external_id"],
object_id=player.id,
)
return player
def _upsert_player_season(
self,
*,
source_name: str,
competition: Competition,
season: Season,
team: Team,
player: Player,
player_record: dict,
) -> PlayerSeason:
mapping = self._mapping_for(
source_name,
ExternalEntityMapping.EntityType.PLAYER_SEASON,
player_record["context_external_id"],
)
if mapping:
context = PlayerSeason.objects.filter(pk=mapping.object_id).first()
if context is None:
raise ImportValidationError("PlayerSeason mapping points to a missing record.")
if (
context.player_id != player.id
or context.season_id != season.id
or context.team_id != team.id
or context.competition_id != competition.id
):
raise ImportValidationError(
"Mapped player-season context does not match the incoming deterministic context identity."
)
self.summary.contexts_updated += 1
else:
context, created = PlayerSeason.objects.get_or_create(
player=player,
season=season,
team=team,
competition=competition,
defaults={},
)
if created:
self.summary.contexts_created += 1
else:
self.summary.contexts_updated += 1
self._bind_mapping(
source_name=source_name,
entity_type=ExternalEntityMapping.EntityType.PLAYER_SEASON,
external_id=player_record["context_external_id"],
object_id=context.id,
)
return context
def _upsert_player_season_stats(self, *, context: PlayerSeason, player_record: dict) -> None:
stats = player_record["stats"]
PlayerSeasonStats.objects.update_or_create(
player_season=context,
defaults={
"points": parse_decimal(stats.get("points")),
"assists": parse_decimal(stats.get("assists")),
"steals": parse_decimal(stats.get("steals")),
"turnovers": parse_decimal(stats.get("turnovers")),
"blocks": parse_decimal(stats.get("blocks")),
"efg_pct": parse_decimal(stats.get("efg_pct")),
"ts_pct": parse_decimal(stats.get("ts_pct")),
"plus_minus": parse_decimal(stats.get("plus_minus")),
"offensive_rating": parse_decimal(stats.get("offensive_rating")),
"defensive_rating": parse_decimal(stats.get("defensive_rating")),
},
)

View File

@ -0,0 +1,52 @@
from __future__ import annotations
import json
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from scouting.importers.hoopdata_demo import (
HoopDataDemoCompetitionImporter,
ImportValidationError,
)
class Command(BaseCommand):
help = "Import the first real-data MVP snapshot for the hoopdata_demo source (single competition scope)."
def add_arguments(self, parser):
parser.add_argument(
"--input",
default=str(
Path(settings.BASE_DIR)
/ "scouting"
/ "sample_data"
/ "imports"
/ "hoopdata_demo_serie_a2_2025_2026.json"
),
help="Path to a hoopdata_demo competition snapshot JSON file.",
)
def handle(self, *args, **options):
input_path = Path(options["input"])
if not input_path.exists():
raise CommandError(f"Input file does not exist: {input_path}")
try:
payload = json.loads(input_path.read_text(encoding="utf-8"))
summary = HoopDataDemoCompetitionImporter(payload).run()
except json.JSONDecodeError as exc:
raise CommandError(f"Invalid JSON payload: {exc}") from exc
except ImportValidationError as exc:
raise CommandError(f"Import validation failed: {exc}") from exc
self.stdout.write(
self.style.SUCCESS(
"Imported hoopdata_demo competition snapshot successfully. "
f"Players +{summary.players_created}/~{summary.players_updated}, "
f"Teams +{summary.teams_created}/~{summary.teams_updated}, "
f"Contexts +{summary.contexts_created}/~{summary.contexts_updated}."
)
)
self.stdout.write(f"Source file: {input_path}")

View File

@ -0,0 +1,29 @@
# Generated by Django 5.2.2 on 2026-04-10 22:22
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('scouting', '0008_savedsearch'),
]
operations = [
migrations.CreateModel(
name='ExternalEntityMapping',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source_name', models.CharField(max_length=80)),
('entity_type', models.CharField(choices=[('player', 'Player'), ('competition', 'Competition'), ('team', 'Team'), ('player_season', 'Player season')], max_length=30)),
('external_id', models.CharField(max_length=140)),
('object_id', models.PositiveBigIntegerField()),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
],
options={
'ordering': ['source_name', 'entity_type', 'external_id'],
'constraints': [models.UniqueConstraint(fields=('source_name', 'entity_type', 'external_id'), name='uniq_external_entity_mapping'), models.UniqueConstraint(fields=('source_name', 'entity_type', 'object_id'), name='uniq_external_entity_target')],
},
),
]

View File

@ -228,3 +228,34 @@ class SavedSearch(models.Model):
def __str__(self) -> str:
return f"{self.user} - {self.name}"
class ExternalEntityMapping(models.Model):
class EntityType(models.TextChoices):
PLAYER = "player", "Player"
COMPETITION = "competition", "Competition"
TEAM = "team", "Team"
PLAYER_SEASON = "player_season", "Player season"
source_name = models.CharField(max_length=80)
entity_type = models.CharField(max_length=30, choices=EntityType.choices)
external_id = models.CharField(max_length=140)
object_id = models.PositiveBigIntegerField()
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ["source_name", "entity_type", "external_id"]
constraints = [
models.UniqueConstraint(
fields=["source_name", "entity_type", "external_id"],
name="uniq_external_entity_mapping",
),
models.UniqueConstraint(
fields=["source_name", "entity_type", "object_id"],
name="uniq_external_entity_target",
),
]
def __str__(self) -> str:
return f"{self.source_name}:{self.entity_type}:{self.external_id} -> {self.object_id}"

View File

@ -0,0 +1,76 @@
{
"source_name": "hoopdata_demo",
"competition": {
"external_id": "comp-ita2-2025",
"name": "Italian Serie A2",
"country": "IT",
"level": "second"
},
"season": {
"name": "2025-2026",
"start_year": 2025,
"end_year": 2026
},
"players": [
{
"external_id": "player-1001",
"context_external_id": "ctx-1001-2025-ita2-bologna-blaze",
"full_name": "Andrea Pulse",
"first_name": "Andrea",
"last_name": "Pulse",
"birth_date": "2003-03-19",
"nationality": "IT",
"position": "PG",
"height_cm": 186.0,
"weight_kg": 79.0,
"wingspan_cm": 193.0,
"team": {
"external_id": "team-bologna-blaze",
"name": "Bologna Blaze",
"country": "IT"
},
"stats": {
"points": 17.2,
"assists": 6.4,
"steals": 1.7,
"turnovers": 2.5,
"blocks": 0.2,
"efg_pct": 52.1,
"ts_pct": 57.9,
"plus_minus": 3.8,
"offensive_rating": 113.4,
"defensive_rating": 106.9
}
},
{
"external_id": "player-1002",
"context_external_id": "ctx-1002-2025-ita2-venice-harbor",
"full_name": "Matteo Harbor",
"first_name": "Matteo",
"last_name": "Harbor",
"birth_date": "2001-11-02",
"nationality": "IT",
"position": "C",
"height_cm": 211.0,
"weight_kg": 108.0,
"wingspan_cm": 220.0,
"team": {
"external_id": "team-venice-harbor",
"name": "Venice Harbor",
"country": "IT"
},
"stats": {
"points": 12.8,
"assists": 1.6,
"steals": 0.9,
"turnovers": 1.8,
"blocks": 2.1,
"efg_pct": 58.7,
"ts_pct": 61.5,
"plus_minus": 4.1,
"offensive_rating": 111.9,
"defensive_rating": 102.7
}
}
]
}

View File

@ -11,6 +11,7 @@ from django.urls import reverse
from .models import (
Competition,
ExternalEntityMapping,
FavoritePlayer,
Player,
PlayerNote,
@ -442,6 +443,89 @@ class SeedScoutingDataCommandTests(TestCase):
self.assertEqual(Specialty.objects.count(), first_counts["specialties"])
class FirstRealIngestionFlowTests(TestCase):
COMMAND_NAME = "import_hoopdata_demo_competition"
SOURCE_NAME = "hoopdata_demo"
def test_importer_command_runs_successfully(self):
call_command(self.COMMAND_NAME)
self.assertGreaterEqual(Player.objects.count(), 2)
def test_importer_creates_expected_core_objects(self):
call_command(self.COMMAND_NAME)
self.assertTrue(Competition.objects.filter(name="Italian Serie A2").exists())
self.assertTrue(Season.objects.filter(name="2025-2026", start_year=2025, end_year=2026).exists())
self.assertTrue(Player.objects.filter(full_name="Andrea Pulse", position="PG").exists())
self.assertTrue(Player.objects.filter(full_name="Matteo Harbor", position="C").exists())
self.assertTrue(PlayerSeason.objects.filter(player__full_name="Andrea Pulse").exists())
self.assertTrue(PlayerSeasonStats.objects.filter(player_season__player__full_name="Andrea Pulse").exists())
self.assertEqual(
ExternalEntityMapping.objects.filter(source_name=self.SOURCE_NAME).count(),
7,
)
def test_importer_is_idempotent_for_same_input(self):
call_command(self.COMMAND_NAME)
first_counts = {
"players": Player.objects.count(),
"teams": Team.objects.count(),
"contexts": PlayerSeason.objects.count(),
"stats": PlayerSeasonStats.objects.count(),
"mappings": ExternalEntityMapping.objects.count(),
}
call_command(self.COMMAND_NAME)
self.assertEqual(Player.objects.count(), first_counts["players"])
self.assertEqual(Team.objects.count(), first_counts["teams"])
self.assertEqual(PlayerSeason.objects.count(), first_counts["contexts"])
self.assertEqual(PlayerSeasonStats.objects.count(), first_counts["stats"])
self.assertEqual(ExternalEntityMapping.objects.count(), first_counts["mappings"])
def test_importer_does_not_overwrite_internal_scouting_fields(self):
role = Role.objects.create(name="internal role", slug="internal-role")
specialty = Specialty.objects.create(name="internal specialty", slug="internal-specialty")
call_command(self.COMMAND_NAME)
player = Player.objects.get(full_name="Andrea Pulse")
player.roles.add(role)
player.specialties.add(specialty)
call_command(self.COMMAND_NAME)
player.refresh_from_db()
self.assertTrue(player.roles.filter(pk=role.pk).exists())
self.assertTrue(player.specialties.filter(pk=specialty.pk).exists())
def test_importer_does_not_interfere_with_user_owned_data(self):
call_command(self.COMMAND_NAME)
user = User.objects.create_user(username="ingest_user", password="pass12345")
player = Player.objects.get(full_name="Andrea Pulse")
favorite = FavoritePlayer.objects.create(user=user, player=player)
note = PlayerNote.objects.create(user=user, player=player, body="Tracked after import")
saved = SavedSearch.objects.create(user=user, name="Imported PG", params={"name": "Andrea"})
call_command(self.COMMAND_NAME)
self.assertTrue(FavoritePlayer.objects.filter(pk=favorite.pk).exists())
self.assertTrue(PlayerNote.objects.filter(pk=note.pk).exists())
self.assertTrue(SavedSearch.objects.filter(pk=saved.pk).exists())
def test_imported_data_is_visible_in_search_and_detail_flows(self):
call_command(self.COMMAND_NAME)
list_response = self.client.get(reverse("scouting:player_list"), {"name": "Andrea"})
self.assertEqual(list_response.status_code, 200)
self.assertContains(list_response, "Andrea Pulse")
player = Player.objects.get(full_name="Andrea Pulse")
detail_response = self.client.get(reverse("scouting:player_detail", args=[player.id]))
self.assertEqual(detail_response.status_code, 200)
self.assertContains(detail_response, "Andrea Pulse")
self.assertContains(detail_response, "PTS 17.2")
class FavoritePlayerViewsTests(TestCase):
@classmethod
def setUpTestData(cls):