Compare commits

21 Commits

Author SHA1 Message Date
24aa827811 Merge branch 'feature/v2-api-search-metric-transparency' into feature/hoopscout-v2-static-architecture 2026-03-20 16:05:59 +01:00
90f83091ce feat(v2-api): expose sortable search metrics in player list responses 2026-03-20 16:05:56 +01:00
f2d5e20701 Merge branch 'feature/v2-docs-runtime-consistency-audit' into feature/hoopscout-v2-static-architecture 2026-03-20 16:02:22 +01:00
887da3cd06 docs(v2): align runtime and operations documentation with compose behavior 2026-03-20 16:02:12 +01:00
eb6e0bf594 Merge branch 'feature/v2-domain-pruning-legacy-models' into feature/hoopscout-v2-static-architecture 2026-03-20 15:57:23 +01:00
b6b6753931 refactor(v2): isolate legacy provider stack and prune obsolete tests 2026-03-20 15:57:20 +01:00
5a19587376 Merge branch 'feature/v2-public-source-schema-relaxation' into feature/hoopscout-v2-static-architecture 2026-03-20 15:51:09 +01:00
3f811827de test(v2-ingestion): harden public-source schema contract and docs 2026-03-20 15:50:59 +01:00
48a82e812a fix(v2-ingestion): align public schema realism follow-ups 2026-03-20 15:23:43 +01:00
6066d2a0bb fix(v2-ingestion): harden LBA/BCL snapshot contract for public data 2026-03-20 15:14:17 +01:00
1aad6945c7 fix(v2-scheduler): avoid restart loops when scheduler is disabled 2026-03-20 15:08:20 +01:00
ad85e40688 fix(v2-import): namespace source identity for snapshot upserts 2026-03-20 15:08:20 +01:00
20d3ee7dae feat(v2): streamline saved searches and favorites flows 2026-03-13 14:40:38 +01:00
0ed4fc57b8 feat(v2): add simple daily extraction-import orchestration 2026-03-13 14:37:17 +01:00
5df973467d feat(v2): add BCL snapshot extractor and command 2026-03-13 14:32:21 +01:00
97913c4a79 feat(v2): add LBA snapshot extractor and command 2026-03-13 14:28:35 +01:00
850e4de71b feat(v2): add snapshot extractor framework and run command 2026-03-13 14:24:54 +01:00
6fc583c79f feat(v2): implement scoped player search and detail flows 2026-03-13 14:10:39 +01:00
eacff3d25e Add v2 snapshot schema validation and import_snapshots command 2026-03-13 14:00:39 +01:00
6aa66807e9 Add v2 relational domain foundations with import run/file models 2026-03-13 13:54:29 +01:00
bb033222e3 Reset to HoopScout v2 runtime foundation and simplified topology 2026-03-13 10:31:29 +01:00
89 changed files with 4671 additions and 1778 deletions

View File

@ -1,87 +1,81 @@
# Django # HoopScout v2 runtime profile
DJANGO_SETTINGS_MODULE=config.settings.development DJANGO_SETTINGS_MODULE=config.settings.development
DJANGO_ENV=development DJANGO_ENV=development
# Required to be a strong, unique value outside development.
DJANGO_SECRET_KEY=change-me-in-production
DJANGO_DEBUG=1 DJANGO_DEBUG=1
DJANGO_SECRET_KEY=change-me-in-production
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1 DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1 DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1
DJANGO_TIME_ZONE=UTC DJANGO_TIME_ZONE=UTC
DJANGO_LOG_LEVEL=INFO DJANGO_LOG_LEVEL=INFO
DJANGO_LOG_SQL=0 DJANGO_LOG_SQL=0
DJANGO_SUPERUSER_USERNAME=admin
DJANGO_SUPERUSER_EMAIL=admin@example.com
DJANGO_SUPERUSER_PASSWORD=adminpass
# Database (PostgreSQL only) # Container image tags
APP_IMAGE_TAG=latest
NGINX_IMAGE_TAG=latest
# Reserved for future optional scheduler image:
# SCHEDULER_IMAGE_TAG=latest
# Web runtime behavior
GUNICORN_WORKERS=3
AUTO_APPLY_MIGRATIONS=1
AUTO_COLLECTSTATIC=1
# PostgreSQL (primary and only main database)
POSTGRES_DB=hoopscout POSTGRES_DB=hoopscout
POSTGRES_USER=hoopscout POSTGRES_USER=hoopscout
POSTGRES_PASSWORD=hoopscout POSTGRES_PASSWORD=hoopscout
POSTGRES_HOST=postgres POSTGRES_HOST=postgres
POSTGRES_PORT=5432 POSTGRES_PORT=5432
# Redis / Celery # Development UID/GID for bind-mounted source write permissions
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_DB=0
CELERY_BROKER_URL=redis://redis:6379/0
CELERY_RESULT_BACKEND=redis://redis:6379/0
# Runtime behavior
AUTO_APPLY_MIGRATIONS=1
AUTO_COLLECTSTATIC=1
AUTO_BUILD_TAILWIND=1
GUNICORN_WORKERS=3
# Development container UID/GID for bind-mounted source write permissions.
LOCAL_UID=1000 LOCAL_UID=1000
LOCAL_GID=1000 LOCAL_GID=1000
# Production-minded security toggles # Static dataset storage (volume-backed directories)
DJANGO_SECURE_SSL_REDIRECT=1 STATIC_DATASET_INCOMING_DIR=/app/snapshots/incoming
DJANGO_SECURE_HSTS_SECONDS=31536000 STATIC_DATASET_ARCHIVE_DIR=/app/snapshots/archive
DJANGO_SESSION_COOKIE_SAMESITE=Lax STATIC_DATASET_FAILED_DIR=/app/snapshots/failed
DJANGO_CSRF_COOKIE_SAMESITE=Lax
# Mandatory production variables (example values): # Extractor framework (fetch -> parse -> normalize -> emit snapshot)
EXTRACTOR_USER_AGENT=HoopScoutBot/2.0 (+https://younerd.org)
EXTRACTOR_HTTP_TIMEOUT_SECONDS=15
EXTRACTOR_HTTP_RETRIES=2
EXTRACTOR_RETRY_SLEEP_SECONDS=1.0
EXTRACTOR_REQUEST_DELAY_SECONDS=0.5
EXTRACTOR_PUBLIC_JSON_URL=
EXTRACTOR_PUBLIC_SOURCE_NAME=public_json_source
EXTRACTOR_INCLUDE_RAW_PAYLOAD=0
EXTRACTOR_LBA_STATS_URL=
EXTRACTOR_LBA_SEASON_LABEL=2025-2026
EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID=lba-serie-a
EXTRACTOR_LBA_COMPETITION_NAME=Lega Basket Serie A
EXTRACTOR_BCL_STATS_URL=
EXTRACTOR_BCL_SEASON_LABEL=2025-2026
EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID=bcl
EXTRACTOR_BCL_COMPETITION_NAME=Basketball Champions League
DAILY_ORCHESTRATION_EXTRACTORS=lba,bcl
DAILY_ORCHESTRATION_INTERVAL_SECONDS=86400
# Future optional scheduler loop settings (not enabled in base v2 runtime)
SCHEDULER_ENABLED=0
SCHEDULER_INTERVAL_SECONDS=900
# When scheduler is disabled but container is started, keep it idle (avoid restart loops)
SCHEDULER_DISABLED_SLEEP_SECONDS=300
# Legacy provider-sync stack (v1-style) is disabled by default in v2.
LEGACY_PROVIDER_STACK_ENABLED=0
# Optional legacy provider settings (only when LEGACY_PROVIDER_STACK_ENABLED=1):
# PROVIDER_BACKEND=demo
# PROVIDER_DEFAULT_NAMESPACE=mvp_demo
# API safeguards (read-only API is optional)
API_THROTTLE_ANON=100/hour
API_THROTTLE_USER=1000/hour
# Production profile reminders:
# DJANGO_SETTINGS_MODULE=config.settings.production # DJANGO_SETTINGS_MODULE=config.settings.production
# DJANGO_ENV=production # DJANGO_ENV=production
# DJANGO_DEBUG=0 # DJANGO_DEBUG=0
# DJANGO_SECRET_KEY=<strong-unique-secret-at-least-32-chars> # DJANGO_SECRET_KEY=<strong-unique-secret-at-least-32-chars>
# DJANGO_ALLOWED_HOSTS=app.example.com # DJANGO_ALLOWED_HOSTS=app.example.com
# DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com # DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com
# Providers / ingestion
PROVIDER_BACKEND=demo
PROVIDER_NAMESPACE_DEMO=mvp_demo
PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie
PROVIDER_DEFAULT_NAMESPACE=
PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
PROVIDER_REQUEST_RETRIES=3
PROVIDER_REQUEST_RETRY_SLEEP=1
PROVIDER_HTTP_TIMEOUT_SECONDS=10
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io
PROVIDER_BALLDONTLIE_API_KEY=
# NBA-centric MVP provider seasons to ingest (comma-separated years).
PROVIDER_BALLDONTLIE_SEASONS=2024
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
# When 0, a 401 on stats endpoint degrades to players/teams-only sync.
PROVIDER_BALLDONTLIE_STATS_STRICT=0
CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500
INGESTION_SCHEDULE_ENABLED=0
# 5-field cron: minute hour day_of_month month day_of_week
# Example hourly: 0 * * * *
INGESTION_SCHEDULE_CRON=*/30 * * * *
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
INGESTION_SCHEDULE_JOB_TYPE=incremental
INGESTION_PREVENT_OVERLAP=1
INGESTION_OVERLAP_WINDOW_MINUTES=180
API_THROTTLE_ANON=100/hour
API_THROTTLE_USER=1000/hour
# Testing (used with pytest-django)
# Keep development settings for local tests unless explicitly validating production settings.
PYTEST_ADDOPTS=-q

View File

@ -1,137 +1,105 @@
# Contributing to HoopScout # Contributing to HoopScout v2
This repository follows a pragmatic GitFlow model. HoopScout uses GitFlow and a pragmatic, production-minded workflow.
The goal is predictable releases with low process overhead.
## Branch Roles ## Branch Roles
- `main`: production-only, always releasable - `main`: production-only, always releasable
- `develop`: integration branch for upcoming release - `develop`: integration branch
- `feature/*`: feature work, branched from `develop`, merged into `develop` - `feature/*`: feature branches from `develop`
- `release/*`: stabilization branch, branched from `develop`, merged into `main` and back into `develop` - `release/*`: release hardening branches from `develop`
- `hotfix/*`: urgent production fixes, branched from `main`, merged into `main` and back into `develop` - `hotfix/*`: urgent production fixes from `main`
## Branch Naming Convention ## Branch Naming
Use lowercase kebab-case.
Use lowercase kebab-case:
- `feature/<scope>-<short-description>` - `feature/<scope>-<short-description>`
- `release/<major>.<minor>.<patch>` - `release/<major>.<minor>.<patch>`
- `hotfix/<scope>-<short-description>` - `hotfix/<scope>-<short-description>`
Examples: Examples:
- `feature/hoopscout-v2-static-architecture`
- `feature/v2-snapshot-import-command`
- `release/2.0.0`
- `hotfix/nginx-proxy-timeout`
- `feature/search-age-height-filters` ## v2 Development Runtime
- `feature/providers-mvp-retry-logic`
- `release/0.2.0` The v2 default runtime is intentionally simple:
- `hotfix/redis-volume-permissions` - `web`
- `postgres`
- `nginx`
No Redis/Celery runtime services in the default v2 foundation.
### Start dev stack
```bash
cp .env.example .env
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
```
### Start release-style stack
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
```
### Verify release topology assumptions
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml config
./scripts/verify_release_topology.sh
```
## Day-to-Day Feature Workflow ## Day-to-Day Feature Workflow
1. Sync `develop`. 1. Sync `develop`
```bash ```bash
git checkout develop git checkout develop
git pull origin develop git pull origin develop
``` ```
2. Create branch. 2. Create feature branch
```bash ```bash
git checkout -b feature/your-feature-name git checkout -b feature/your-feature-name
``` ```
3. Implement, test, commit in small logical steps. 3. Implement with focused commits and tests.
4. Open PR: `feature/*` -> `develop`.
4. Rebase or merge latest `develop` before PR if needed. ## Running Tests (v2)
Runtime images are intentionally lean and may not ship `pytest`.
Use the development compose stack and install dev dependencies before running tests:
```bash ```bash
git checkout develop docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
git pull origin develop
git checkout feature/your-feature-name
git rebase develop
``` ```
5. Open PR: `feature/*` -> `develop`. ## PR Checklist
## Recommended Release Workflow - [ ] Target branch is correct
- [ ] Scope is focused (no unrelated refactor)
- [ ] Runtime still starts with docker compose
- [ ] Tests updated/passing for changed scope
- [ ] Docs updated (`README.md`, `.env.example`, this file) when config/runtime changes
- [ ] No secrets committed
1. Create release branch from `develop`. ## v2 Foundation Rules
```bash - Prefer management commands over distributed orchestration unless clearly justified.
git checkout develop - Keep PostgreSQL as source of truth.
git pull origin develop - Keep snapshot storage file-based and volume-backed.
git checkout -b release/0.1.0 - Do not introduce MongoDB or Elasticsearch as source of truth.
``` - Keep legacy provider/Celery sync code isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
- Keep runtime/docs consistency aligned with `docs/runtime-consistency-checklist.md`.
2. On `release/*` allow only:
- bug fixes
- docs/changelog updates
- release metadata/version updates
3. Validate release candidate in Docker.
```bash
docker compose up -d --build
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
```
4. Merge `release/*` into `main`.
5. Tag release on `main` (`v0.1.0`).
6. Merge the same `release/*` back into `develop`.
7. Delete release branch after both merges.
## Recommended Hotfix Workflow
1. Create hotfix branch from `main`.
```bash
git checkout main
git pull origin main
git checkout -b hotfix/your-hotfix-name
```
2. Implement minimal fix and tests.
3. Open PR: `hotfix/*` -> `main`.
4. After merge to `main`, back-merge to `develop`.
5. Tag patch release (`vX.Y.Z`).
## Pull Request Checklist
Before requesting review, confirm:
- [ ] Branch target is correct (`develop`, `main`, or release back-merge)
- [ ] Scope is focused (no unrelated refactors)
- [ ] Docker stack still starts (`docker compose up -d`)
- [ ] Tests updated and passing
- [ ] Migrations included if models changed
- [ ] Docs updated (`README`, `CONTRIBUTING`, `.env.example`) when needed
- [ ] No secrets or credentials committed
- [ ] Changelog entry added under `Unreleased`
## Issue and Feature Templates
Use repository templates in `.github/ISSUE_TEMPLATE/`:
- `bug_report.md`
- `feature_request.md`
Use `.github/PULL_REQUEST_TEMPLATE.md` for PR descriptions.
## Changelog / Release Note Convention
- Single changelog file: `CHANGELOG.md`
- Keep `Unreleased` at top
- Categorize entries under:
- `Added`
- `Changed`
- `Fixed`
- Release format:
- `## [0.1.0] - 2026-03-10`
## Repository Bootstrap Commands ## Repository Bootstrap Commands
Maintainers should run these once to start GitFlow from current `main`: If `develop` is missing in a clone:
```bash ```bash
git checkout main git checkout main
@ -139,39 +107,3 @@ git pull origin main
git checkout -b develop git checkout -b develop
git push -u origin develop git push -u origin develop
``` ```
Then start regular feature work:
```bash
git checkout develop
git pull origin develop
git checkout -b feature/first-team-task
```
## Local Development Setup
```bash
cp .env.example .env
docker compose up --build
```
If needed:
```bash
docker compose exec web python manage.py migrate
docker compose exec web python manage.py createsuperuser
```
## Testing Commands
Run full suite:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
```
Run targeted modules while developing:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_players_views.py'
```

View File

@ -32,23 +32,19 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
WORKDIR /app WORKDIR /app
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y --no-install-recommends libpq5 postgresql-client curl nodejs npm \ && apt-get install -y --no-install-recommends libpq5 postgresql-client curl \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN groupadd --gid "${APP_GID}" "${APP_USER}" \ RUN groupadd --gid "${APP_GID}" "${APP_USER}" \
&& useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}" && useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}"
RUN printf '%s\n' 'export PATH="/opt/venv/bin:/home/app/.local/bin:$PATH"' > /etc/profile.d/hoopscout-path.sh
COPY --from=builder /opt/venv /opt/venv COPY --from=builder /opt/venv /opt/venv
COPY . /app COPY . /app
RUN if [ -f package.json ]; then npm install --no-audit --no-fund; fi RUN chmod +x /app/entrypoint.sh /app/scripts/scheduler.sh \
RUN if [ -f package.json ]; then npm run build; fi && mkdir -p /app/staticfiles /app/media /app/snapshots/incoming /app/snapshots/archive /app/snapshots/failed \
RUN chmod +x /app/entrypoint.sh
RUN mkdir -p /app/staticfiles /app/media /app/runtime /app/node_modules /app/static/vendor \
&& chown -R "${APP_UID}:${APP_GID}" /app /opt/venv && chown -R "${APP_UID}:${APP_GID}" /app /opt/venv
USER ${APP_UID}:${APP_GID} USER ${APP_UID}:${APP_GID}
ENTRYPOINT ["/app/entrypoint.sh"] ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000"] CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "3", "--access-logfile", "-", "--error-logfile", "-"]

728
README.md
View File

@ -1,422 +1,466 @@
# HoopScout # HoopScout v2 (Foundation Reset)
HoopScout is a production-minded basketball scouting and player search platform. HoopScout v2 is a controlled greenfield rebuild inside the existing repository.
The main product experience is server-rendered Django Templates with HTMX enhancements.
A minimal read-only API is included as a secondary integration surface.
## Core Stack Current v2 foundation scope in this branch:
- Django + HTMX server-rendered app
- PostgreSQL as the only primary database
- nginx reverse proxy
- management-command-driven runtime operations
- static snapshot directories persisted via Docker named volumes
- strict JSON snapshot schema + import management command
- extractor framework with LBA/BCL/public JSON adapters
- daily orchestration command and optional scheduler profile
- Python 3.12+ ## Runtime Architecture (v2)
- Django
- Django Templates + HTMX
- Tailwind CSS (CLI build pipeline)
- PostgreSQL
- Redis
- Celery + Celery Beat
- Django REST Framework (read-only API)
- pytest
- Docker / Docker Compose
- nginx
## Architecture Summary Runtime services are intentionally small:
- `web` (Django/Gunicorn)
- `postgres` (primary DB)
- `nginx` (reverse proxy + static/media serving)
- optional `scheduler` profile service (runs daily extractor/import loop)
- Main UI: Django + HTMX (not SPA) No Redis/Celery services are part of the v2 default runtime topology.
- Data layer: normalized domain models for players, seasons, competitions, teams, stats, scouting state Legacy Celery/provider code remains in-repo but is isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
- Provider integration: adapter-based abstraction in `apps/providers` Default v2 runtime keeps that stack disabled.
- Ingestion orchestration: `apps/ingestion` with run/error logs and Celery task execution
- Optional API: read-only DRF endpoints under `/api/`
## Repository Structure ## Image Strategy
```text Compose builds and tags images as:
. - `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}`
├── apps/ - `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}`
│ ├── api/
│ ├── competitions/
│ ├── core/
│ ├── ingestion/
│ ├── players/
│ ├── providers/
│ ├── scouting/
│ ├── stats/
│ ├── teams/
│ └── users/
├── config/
│ └── settings/
├── docs/
├── nginx/
├── requirements/
├── package.json
├── tailwind.config.js
├── static/
├── templates/
├── tests/
├── .github/
├── CHANGELOG.md
├── docker-compose.yml
├── Dockerfile
└── entrypoint.sh
```
## Quick Start Reserved for future optional scheduler use:
- `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
1. Create local env file: ## Entrypoint Strategy
- `web`: `entrypoint.sh`
- waits for PostgreSQL
- optionally runs migrations/collectstatic
- ensures snapshot directories exist
- `nginx`: `nginx/entrypoint.sh`
- simple runtime entrypoint wrapper
## Compose Files
- `docker-compose.yml`: production-minded baseline runtime (immutable image filesystem)
- `docker-compose.dev.yml`: development override with source bind mount for `web`
- `docker-compose.release.yml`: production settings override (`DJANGO_SETTINGS_MODULE=config.settings.production`)
- `scripts/verify_release_topology.sh`: validates merged release compose has no source-code bind mounts for runtime services
### Start development runtime
```bash ```bash
cp .env.example .env cp .env.example .env
```
2. Build and run services:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
```
This starts the development-oriented topology (source bind mounts enabled).
In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs).
3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually:
```bash
docker compose exec web python manage.py migrate
```
4. Create a superuser:
```bash
docker compose exec web python manage.py createsuperuser
```
5. Open the app:
- Web: http://localhost
- Admin: http://localhost/admin/
- Health: http://localhost/health/
- API root endpoints: `/api/players/`, `/api/competitions/`, `/api/teams/`, `/api/seasons/`
## Development vs Release Compose
Base compose (`docker-compose.yml`) is release-oriented and immutable for runtime services.
Development mutability is enabled via `docker-compose.dev.yml`.
Development startup (mutable source bind mounts for `web`/`celery_*`):
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
``` ```
Development startup with Tailwind watch: ### Start release-style runtime
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
```
Release-style startup (immutable runtime services):
```bash ```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
``` ```
Optional release-style stop: ### Start scheduler profile (optional)
```bash ```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml down docker compose --profile scheduler up -d scheduler
``` ```
Notes: For development override:
- In release-style mode, `web`, `celery_worker`, and `celery_beat` run from built image filesystem with no repository source bind mount. ```bash
- In development mode (with `docker-compose.dev.yml`), `web`, `celery_worker`, and `celery_beat` are mutable and bind-mount `.:/app`. docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile scheduler up -d scheduler
- `tailwind` is a dev-profile service and is not required for release runtime. ```
- `nginx`, `postgres`, and `redis` service naming remains unchanged.
- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`.
## Release Topology Verification ### Runtime Modes At A Glance
Inspect merged release config: - development (`docker-compose.yml` + `docker-compose.dev.yml`):
- mutable source bind mounts for `web` and `scheduler`
- optimized for local iteration
- release-style (`docker-compose.yml` + `docker-compose.release.yml`):
- immutable app filesystem for runtime services
- production settings enabled for Django
- scheduler profile:
- only starts when `--profile scheduler` is used
- if started with `SCHEDULER_ENABLED=0`, scheduler stays in idle sleep mode (no restart loop exit behavior)
### Release Topology Verification
Verify merged release config and immutability:
```bash ```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml config docker compose -f docker-compose.yml -f docker-compose.release.yml config
```
What to verify:
- `services.web.volumes` does not include a bind mount from repository path to `/app`
- `services.celery_worker.volumes` does not include a bind mount from repository path to `/app`
- `services.celery_beat.volumes` does not include a bind mount from repository path to `/app`
- persistent named volumes still exist for `postgres_data`, `static_data`, `media_data`, `runtime_data`, and `redis_data`
Automated local/CI-friendly check:
```bash
./scripts/verify_release_topology.sh ./scripts/verify_release_topology.sh
``` ```
## Setup and Run Notes Verification expectation:
- `web` and `scheduler` must not bind-mount repository source code in release mode.
- named volumes for DB/static/media/snapshots remain mounted.
- `web` service starts through `entrypoint.sh` and waits for PostgreSQL readiness. ## Named Volumes
- `web` service also builds Tailwind CSS before `collectstatic` when `AUTO_BUILD_TAILWIND=1`.
- `web`, `celery_worker`, `celery_beat`, and `tailwind` run as a non-root user inside the image.
- `celery_worker` executes background sync work.
- `celery_beat` triggers periodic provider sync (`apps.ingestion.tasks.scheduled_provider_sync`).
- `tailwind` service runs watch mode for development (`npm run dev`).
- nginx proxies web traffic and serves static/media volume mounts.
## Search Consistency Notes v2 runtime uses named volumes for persistence:
- `postgres_data`
- `static_data`
- `media_data`
- `snapshots_incoming`
- `snapshots_archive`
- `snapshots_failed`
- The server-rendered player search page (`/players/`) and read-only players API (`/api/players/`) use the same search form and ORM filter service. Development override uses separate dev-prefixed volumes to avoid ownership collisions.
- Sorting/filter semantics are aligned across UI, HTMX partial refreshes, and API responses.
- Search result metrics in the UI table use **best eligible semantics**:
- each metric (Games, MPG, PPG, RPG, APG) is the maximum value across eligible player-season rows
- eligibility is scoped by the active season/team/competition/stat filters
- different displayed metrics for one player can come from different eligible rows
- Metric-based API sorting (`ppg_*`, `mpg_*`) uses the same best-eligible semantics as UI search.
## Docker Volumes and Persistence Snapshot volume intent:
- `snapshots_incoming`: extractor output waiting for import
- `snapshots_archive`: successfully imported files
- `snapshots_failed`: schema/processing failures for operator inspection
`docker-compose.yml` uses named volumes: ## Environment Variables
- `postgres_data`: PostgreSQL persistent database Use `.env.example` as the source of truth.
- `static_data`: collected static assets
- `media_data`: user/provider media artifacts
- `runtime_data`: app runtime files (e.g., celery beat schedule)
- `redis_data`: Redis persistence (`/data` for RDB/AOF files)
- `node_modules_data`: Node modules cache for Tailwind builds in development override
This keeps persistent state outside container lifecycles. Core groups:
- Django runtime/security vars
- PostgreSQL connection vars
- image tag vars (`APP_IMAGE_TAG`, `NGINX_IMAGE_TAG`)
- snapshot directory vars (`STATIC_DATASET_*`)
- optional future scheduler vars (`SCHEDULER_*`)
- daily orchestration vars (`DAILY_ORCHESTRATION_*`)
- optional legacy provider-sync toggle (`LEGACY_PROVIDER_STACK_ENABLED`)
In release-style mode, these volumes remain the persistence layer: Operational reference:
- `docs/runtime-consistency-checklist.md`
- `postgres_data` for database state ## Snapshot Storage Convention
- `static_data` for collected static assets served by nginx
- `media_data` for uploaded/provider media
- `runtime_data` for Celery beat schedule/runtime files
- `redis_data` for Redis persistence
## Migrations Snapshot files are expected under:
- incoming: `/app/snapshots/incoming`
- archive: `/app/snapshots/archive`
- failed: `/app/snapshots/failed`
Create migration files: Configured via environment:
- `STATIC_DATASET_INCOMING_DIR`
- `STATIC_DATASET_ARCHIVE_DIR`
- `STATIC_DATASET_FAILED_DIR`
```bash ## Snapshot JSON Schema (MVP)
docker compose exec web python manage.py makemigrations
Each file must be a JSON object:
```json
{
"source_name": "official_site_feed",
"snapshot_date": "2026-03-13",
"records": [
{
"competition_external_id": "comp-nba",
"competition_name": "NBA",
"season": "2025-2026",
"team_external_id": "team-lal",
"team_name": "Los Angeles Lakers",
"player_external_id": "player-23",
"full_name": "LeBron James",
"first_name": "LeBron",
"last_name": "James",
"birth_date": "1984-12-30",
"nationality": "US",
"height_cm": 206,
"weight_kg": 113,
"position": "SF",
"role": "Primary Creator",
"games_played": 60,
"minutes_per_game": 34.5,
"points_per_game": 25.4,
"rebounds_per_game": 7.2,
"assists_per_game": 8.1,
"steals_per_game": 1.3,
"blocks_per_game": 0.7,
"turnovers_per_game": 3.2,
"fg_pct": 51.1,
"three_pt_pct": 38.4,
"ft_pct": 79.8,
"source_metadata": {},
"raw_payload": {}
}
],
"source_metadata": {},
"raw_payload": {}
}
``` ```
Apply migrations: Validation is strict:
- unknown fields are rejected
- required fields must exist:
- `competition_external_id`, `competition_name`, `season`
- `team_external_id`, `team_name`
- `player_external_id`, `full_name`
- core stats (`games_played`, `minutes_per_game`, `points_per_game`, `rebounds_per_game`, `assists_per_game`, `steals_per_game`, `blocks_per_game`, `turnovers_per_game`, `fg_pct`, `three_pt_pct`, `ft_pct`)
- optional player bio/physical fields:
- `first_name`, `last_name`, `birth_date`, `nationality`, `height_cm`, `weight_kg`, `position`, `role`
- when `birth_date` is provided it must be `YYYY-MM-DD`
- numeric fields must be numeric
- invalid files are moved to failed directory
Importer enrichment note:
- `full_name` is source truth for identity display
- `first_name` / `last_name` are optional and may be absent in public snapshots
- when both are missing, importer may derive them from `full_name` as a best-effort enrichment step
- this enrichment is convenience-only and does not override source truth semantics
## Import Command
Run import:
```bash ```bash
docker compose exec web python manage.py migrate docker compose exec web python manage.py import_snapshots
``` ```
Run end-to-end daily orchestration manually (extractors -> import):
```bash
docker compose exec web python manage.py run_daily_orchestration
```
Command behavior:
- scans `STATIC_DATASET_INCOMING_DIR` for `.json` files
- validates strict schema
- computes SHA-256 checksum
- creates `ImportRun` + `ImportFile` records
- upserts relational entities (`Competition`, `Season`, `Team`, `Player`, `PlayerSeason`, `PlayerSeasonStats`)
- skips duplicate content using checksum
- moves valid files to archive
- moves invalid files to failed
Import lifecycle summary:
1. extractor writes normalized snapshots to `incoming`
2. `import_snapshots` validates + upserts to PostgreSQL
3. imported files move to `archive`
4. invalid files move to `failed` with error details in `ImportFile`
### Source Identity Namespacing
Raw external IDs are **not globally unique** across basketball data sources. HoopScout v2 uses a namespaced identity for imported entities:
- `Competition`: unique key is `(source_name, source_uid)`
- `Team`: unique key is `(source_name, source_uid)`
- `Player`: unique key is `(source_name, source_uid)`
`source_uid` values from different sources (for example `lba` and `bcl`) can safely overlap without overwriting each other.
Import history is visible in Django admin:
- `ImportRun`
- `ImportFile`
## Extractor Framework (v2)
v2 keeps extraction and import as two separate steps:
1. **Extractors** fetch public source content and emit normalized JSON snapshots.
2. **Importer** (`import_snapshots`) validates and upserts those snapshots into PostgreSQL.
Extractor pipeline:
- `fetch` (public endpoint/page requests with conservative HTTP behavior)
- `parse` (source-specific structure)
- `normalize` (map to HoopScout snapshot schema)
- `emit` (write JSON file to incoming directory or custom path)
Built-in extractor in this phase:
- `public_json_snapshot` (generic JSON feed extractor for MVP usage)
- `lba` (Lega Basket Serie A MVP extractor)
- `bcl` (Basketball Champions League MVP extractor)
Run extractor:
```bash
docker compose exec web python manage.py run_extractor public_json_snapshot
```
Run extractor with explicit output path (debugging):
```bash
docker compose exec web python manage.py run_extractor public_json_snapshot --output-path /app/snapshots/incoming
```
Dry-run validation (no file write):
```bash
docker compose exec web python manage.py run_extractor public_json_snapshot --dry-run
```
Run only the LBA extractor:
```bash
docker compose exec web python manage.py run_lba_extractor
```
Run only the BCL extractor:
```bash
docker compose exec web python manage.py run_bcl_extractor
```
### Daily orchestration behavior
`run_daily_orchestration` performs:
1. run configured extractors in order from `DAILY_ORCHESTRATION_EXTRACTORS`
2. write snapshots to incoming dir
3. run `import_snapshots`
4. log extractor/import summary
Extractor environment variables:
- `EXTRACTOR_USER_AGENT`
- `EXTRACTOR_HTTP_TIMEOUT_SECONDS`
- `EXTRACTOR_HTTP_RETRIES`
- `EXTRACTOR_RETRY_SLEEP_SECONDS`
- `EXTRACTOR_REQUEST_DELAY_SECONDS`
- `EXTRACTOR_PUBLIC_JSON_URL`
- `EXTRACTOR_PUBLIC_SOURCE_NAME`
- `EXTRACTOR_INCLUDE_RAW_PAYLOAD`
- `EXTRACTOR_LBA_STATS_URL`
- `EXTRACTOR_LBA_SEASON_LABEL`
- `EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID`
- `EXTRACTOR_LBA_COMPETITION_NAME`
- `EXTRACTOR_BCL_STATS_URL`
- `EXTRACTOR_BCL_SEASON_LABEL`
- `EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID`
- `EXTRACTOR_BCL_COMPETITION_NAME`
- `DAILY_ORCHESTRATION_EXTRACTORS`
- `DAILY_ORCHESTRATION_INTERVAL_SECONDS`
Notes:
- extraction is intentionally low-frequency and uses retries conservatively
- only public pages/endpoints should be targeted
- emitted snapshots must match the same schema consumed by `import_snapshots`
- `public_json_snapshot` uses the same required-vs-optional field contract as `SnapshotSchemaValidator` (no stricter extractor-only required bio/physical fields)
- optional scheduler container runs `scripts/scheduler.sh` loop using:
- image: `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
- command: `/app/scripts/scheduler.sh`
- interval: `DAILY_ORCHESTRATION_INTERVAL_SECONDS`
- disabled idle interval: `SCHEDULER_DISABLED_SLEEP_SECONDS`
### Scheduler entrypoint/runtime expectations
- scheduler uses the same app image and base `entrypoint.sh` as web
- scheduler requires database connectivity and snapshot volumes
- scheduler is disabled unless:
- compose `scheduler` profile is started
- `SCHEDULER_ENABLED=1`
- if scheduler service is started while disabled (`SCHEDULER_ENABLED=0`), it does not exit; it enters idle sleep mode to avoid restart loops with `restart: unless-stopped`
- this keeps default runtime simple while supporting daily automation
### LBA extractor assumptions and limitations (MVP)
- `source_name` is fixed to `lba`
- the extractor expects one stable public JSON payload that includes player/team/stat rows
- competition is configured by environment and emitted as:
- `competition_external_id` from `EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID`
- `competition_name` from `EXTRACTOR_LBA_COMPETITION_NAME`
- season is configured by `EXTRACTOR_LBA_SEASON_LABEL`
- parser supports payload keys: `records`, `data`, `players`, `items`
- normalization supports nested `player` and `team` objects with common stat aliases (`gp/mpg/ppg/rpg/apg/spg/bpg/tov`)
- public-source player bio/physical fields are often incomplete; extractor allows them to be missing and emits `null` for optional fields
- no live HTTP calls in tests; tests use fixtures/mocked responses only
### BCL extractor assumptions and limitations (MVP)
- `source_name` is fixed to `bcl`
- the extractor expects one stable public JSON payload that includes player/team/stat rows
- competition is configured by environment and emitted as:
- `competition_external_id` from `EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID`
- `competition_name` from `EXTRACTOR_BCL_COMPETITION_NAME`
- season is configured by `EXTRACTOR_BCL_SEASON_LABEL`
- parser supports payload keys: `records`, `data`, `players`, `items`
- normalization supports nested `player` and `team` objects with common stat aliases (`gp/mpg/ppg/rpg/apg/spg/bpg/tov`)
- public-source player bio/physical fields are often incomplete; extractor allows them to be missing and emits `null` for optional fields
- no live HTTP calls in tests; tests use fixtures/mocked responses only
## Testing ## Testing
Run all tests: - runtime `web` image stays lean and may not include `pytest` tooling
- runtime containers (`web`/`nginx`/`scheduler`) are for serving/orchestration, not preloaded test tooling
- run tests with the development compose stack (or a dedicated test image/profile) and install dev dependencies first
- local example (one-off):
```bash ```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q' docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
``` ```
Run a focused module: ## Migration and Superuser Commands
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_api.py'
```
## Frontend Assets (Tailwind)
Build Tailwind once:
```bash
docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build'
```
If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once:
```bash
sudo chown -R "$(id -u):$(id -g)" static
```
Run Tailwind in watch mode during development:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up tailwind
```
Source CSS lives in `static/src/tailwind.css` and compiles to `static/css/main.css`.
HTMX is served from local static assets (`static/vendor/htmx.min.js`) instead of a CDN dependency.
## Production Configuration
Use production settings in deployed environments:
```bash
DJANGO_SETTINGS_MODULE=config.settings.production
DJANGO_DEBUG=0
DJANGO_ENV=production
```
When `DJANGO_DEBUG=0`, startup fails fast unless:
- `DJANGO_SECRET_KEY` is a real non-default value
- `DJANGO_ALLOWED_HOSTS` is set
- `DJANGO_CSRF_TRUSTED_ORIGINS` is set (for production settings)
Additional production safety checks:
- `DJANGO_SECRET_KEY` must be strong and non-default in non-development environments
- `DJANGO_ALLOWED_HOSTS` must not contain localhost-style values
- `DJANGO_CSRF_TRUSTED_ORIGINS` must be explicit HTTPS origins only (no localhost/http)
Production settings enable hardened defaults such as:
- secure cookies
- HSTS
- security headers
- `ManifestStaticFilesStorage` for static asset integrity/versioning
### Production Configuration Checklist
- `DJANGO_SETTINGS_MODULE=config.settings.production`
- `DJANGO_ENV=production`
- `DJANGO_DEBUG=0`
- strong `DJANGO_SECRET_KEY` (unique, non-default, >= 32 chars)
- explicit `DJANGO_ALLOWED_HOSTS` (no localhost values)
- explicit `DJANGO_CSRF_TRUSTED_ORIGINS` with HTTPS origins only
- `DJANGO_SECURE_SSL_REDIRECT=1` and `DJANGO_SECURE_HSTS_SECONDS` set appropriately
## Superuser and Auth
Create superuser:
```bash ```bash
docker compose exec web python manage.py migrate
docker compose exec web python manage.py createsuperuser docker compose exec web python manage.py createsuperuser
``` ```
Default auth routes: ## Health Endpoints
- Signup: `/users/signup/` - app health: `/health/`
- Login: `/users/login/` - nginx healthcheck proxies `/health/` to `web`
- Logout: `/users/logout/`
## Ingestion and Manual Sync ## Player Search (v2)
### Trigger via Django Admin Public player search is server-rendered (Django templates) with HTMX partial updates.
- Open `/admin/` -> `IngestionRun` Supported filters:
- Use admin actions: - free text name search
- `Queue full sync (default provider)` - nominal position, inferred role
- `Queue incremental sync (default provider)` - competition, season, team
- `Retry selected ingestion runs` - nationality
- age, height, weight ranges
- stats thresholds: games, MPG, PPG, RPG, APG, SPG, BPG, TOV, FG%, 3P%, FT%
### Trigger from shell (manual) Search correctness:
- combined team/competition/season/stat filters are applied to the same `PlayerSeason` context (no cross-row false positives)
- filtering happens at database level with Django ORM
```bash Search metric semantics:
docker compose exec web python manage.py shell - result columns are labeled as **Best Eligible**
``` - each displayed metric is `MAX` over eligible player-season rows for that metric in the current filter context
- different metric columns for one player may come from different eligible seasons
- when no eligible value exists for a metric in the current context, the UI shows `-`
```python ### API Search Metric Transparency
from apps.ingestion.tasks import trigger_full_sync
trigger_full_sync.delay(provider_namespace="balldontlie")
```
### Logs and diagnostics `GET /api/players/` now exposes sortable metric fields directly in each list row:
- `ppg_value`
- `mpg_value`
- Run-level status/counters: `IngestionRun` These fields use the same **best eligible** semantics as UI search. They are computed from eligible
- Structured error records: `IngestionError` player-season rows in the current filter context and may be `null` when no eligible data exists.
- Provider entity mappings + diagnostic payload snippets: `ExternalMapping`
- `IngestionRun.error_summary` captures top-level failure/partial-failure context
### Scheduled sync via Celery Beat API list responses also include:
- `sort`: effective sort key applied
- `metric_sort_keys`: metric-based sort keys currently supported
- `metric_semantics`: plain-language metric contract used for sorting/interpretation
Configure scheduled sync through environment variables: Pagination and sorting:
- querystring is preserved
- HTMX navigation keeps URL state in sync with current filters/page/sort
- `INGESTION_SCHEDULE_ENABLED` (`0`/`1`) ## Saved Searches and Watchlist (v2)
- `INGESTION_SCHEDULE_CRON` (5-field cron expression, default `*/30 * * * *`)
- `INGESTION_SCHEDULE_PROVIDER_NAMESPACE` (optional; falls back to default provider namespace)
- `INGESTION_SCHEDULE_JOB_TYPE` (`incremental` or `full_sync`)
- `INGESTION_PREVENT_OVERLAP` (`0`/`1`) to skip obvious overlapping runs
- `INGESTION_OVERLAP_WINDOW_MINUTES` overlap guard window
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron. Authenticated users can:
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync. - save current search filters from the player search page
- re-run saved searches from scouting pages
- rename/update/delete saved searches
- update saved search filters via structured JSON in the edit screen
- add/remove favorite players inline (HTMX-friendly) and browse watchlist
Valid cron examples: ## GitFlow
- `*/30 * * * *` every 30 minutes Required branch model:
- `0 * * * *` hourly - `main`: production
- `15 2 * * *` daily at 02:15 - `develop`: integration
- `feature/*`, `release/*`, `hotfix/*`
Failure behavior for invalid cron values: This v2 work branch is:
- `feature/hoopscout-v2-static-architecture`
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web) ## Notes on Legacy Layers
- periodic ingestion task is disabled until cron is fixed
- an error is logged at startup indicating the invalid schedule value
## Provider Backend Selection Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation.
They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks.
Provider backend is selected via environment variables: By default:
- `apps.providers` is not installed
- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`) - `/providers/` routes are not mounted
- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`) - legacy provider-specific settings are not required
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
The adapter follows the published balldontlie OpenAPI contract: server `https://api.balldontlie.io`, NBA endpoints under `/nba/v1/*`, cursor pagination via `meta.next_cursor`, and `stats` ingestion filtered by `seasons[]`.
Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized.
Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md).
## GitFlow Workflow
GitFlow is required in this repository:
- `main`: production branch
- `develop`: integration branch
- `feature/*`: new feature branches from `develop`
- `release/*`: release hardening branches from `develop`
- `hotfix/*`: urgent production fixes from `main`
Read full details in [CONTRIBUTING.md](CONTRIBUTING.md) and [docs/workflow.md](docs/workflow.md).
### Repository Bootstrap Commands
Run these from the current `main` branch to initialize local GitFlow usage:
```bash
git checkout main
git pull origin main
git checkout -b develop
git push -u origin develop
```
Start a feature branch:
```bash
git checkout develop
git pull origin develop
git checkout -b feature/player-search-tuning
```
Start a release branch:
```bash
git checkout develop
git pull origin develop
git checkout -b release/0.1.0
```
Start a hotfix branch:
```bash
git checkout main
git pull origin main
git checkout -b hotfix/fix-redis-persistence
```
## Release Notes / Changelog Convention
- Use [CHANGELOG.md](CHANGELOG.md) with an `Unreleased` section.
- For each merged PR, add short entries under:
- `Added`
- `Changed`
- `Fixed`
- On release, move `Unreleased` items to a dated version section (`[x.y.z] - YYYY-MM-DD`).

View File

@ -45,6 +45,8 @@ class PlayerListSerializer(serializers.ModelSerializer):
inferred_role = serializers.CharField(source="inferred_role.name", allow_null=True) inferred_role = serializers.CharField(source="inferred_role.name", allow_null=True)
origin_competition = serializers.CharField(source="origin_competition.name", allow_null=True) origin_competition = serializers.CharField(source="origin_competition.name", allow_null=True)
origin_team = serializers.CharField(source="origin_team.name", allow_null=True) origin_team = serializers.CharField(source="origin_team.name", allow_null=True)
ppg_value = serializers.SerializerMethodField()
mpg_value = serializers.SerializerMethodField()
class Meta: class Meta:
model = Player model = Player
@ -59,10 +61,20 @@ class PlayerListSerializer(serializers.ModelSerializer):
"origin_team", "origin_team",
"height_cm", "height_cm",
"weight_kg", "weight_kg",
"ppg_value",
"mpg_value",
"dominant_hand", "dominant_hand",
"is_active", "is_active",
] ]
def get_ppg_value(self, obj):
value = getattr(obj, "ppg_value", None)
return str(value) if value is not None else None
def get_mpg_value(self, obj):
value = getattr(obj, "mpg_value", None)
return float(value) if value is not None else None
class PlayerAliasSerializer(serializers.Serializer): class PlayerAliasSerializer(serializers.Serializer):
alias = serializers.CharField() alias = serializers.CharField()

View File

@ -9,6 +9,7 @@ from apps.players.forms import PlayerSearchForm
from apps.players.models import Player from apps.players.models import Player
from apps.players.services.search import ( from apps.players.services.search import (
METRIC_SORT_KEYS, METRIC_SORT_KEYS,
SEARCH_METRIC_SEMANTICS_TEXT,
annotate_player_metrics, annotate_player_metrics,
apply_sorting, apply_sorting,
base_player_queryset, base_player_queryset,
@ -67,15 +68,18 @@ class PlayerSearchApiView(ReadOnlyBaseAPIView, generics.ListAPIView):
form = self.get_search_form() form = self.get_search_form()
if form.is_bound and not form.is_valid(): if form.is_bound and not form.is_valid():
return self._validation_error_response() return self._validation_error_response()
return super().list(request, *args, **kwargs) response = super().list(request, *args, **kwargs)
response.data["sort"] = form.cleaned_data.get("sort", "name_asc")
response.data["metric_semantics"] = SEARCH_METRIC_SEMANTICS_TEXT
response.data["metric_sort_keys"] = sorted(METRIC_SORT_KEYS)
return response
def get_queryset(self): def get_queryset(self):
form = self.get_search_form() form = self.get_search_form()
queryset = base_player_queryset() queryset = base_player_queryset()
queryset = filter_players(queryset, form.cleaned_data) queryset = filter_players(queryset, form.cleaned_data)
sort_key = form.cleaned_data.get("sort", "name_asc") sort_key = form.cleaned_data.get("sort", "name_asc")
if sort_key in METRIC_SORT_KEYS: queryset = annotate_player_metrics(queryset, form.cleaned_data)
queryset = annotate_player_metrics(queryset, form.cleaned_data)
queryset = apply_sorting(queryset, sort_key) queryset = apply_sorting(queryset, sort_key)
return queryset return queryset

View File

@ -5,16 +5,16 @@ from .models import Competition, Season, TeamSeason
@admin.register(Competition) @admin.register(Competition)
class CompetitionAdmin(admin.ModelAdmin): class CompetitionAdmin(admin.ModelAdmin):
list_display = ("name", "competition_type", "gender", "country", "is_active") list_display = ("name", "source_name", "source_uid", "competition_type", "gender", "country", "is_active")
list_filter = ("competition_type", "gender", "country", "is_active") list_filter = ("competition_type", "gender", "country", "is_active")
search_fields = ("name", "slug") search_fields = ("name", "slug", "source_name", "source_uid")
@admin.register(Season) @admin.register(Season)
class SeasonAdmin(admin.ModelAdmin): class SeasonAdmin(admin.ModelAdmin):
list_display = ("label", "start_date", "end_date", "is_current") list_display = ("label", "source_uid", "start_date", "end_date", "is_current")
list_filter = ("is_current",) list_filter = ("is_current",)
search_fields = ("label",) search_fields = ("label", "source_uid")
@admin.register(TeamSeason) @admin.register(TeamSeason)

View File

@ -0,0 +1,32 @@
# Generated by Django 5.2.12 on 2026-03-13 12:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('competitions', '0002_initial'),
('players', '0005_player_weight_index'),
]
operations = [
migrations.AddField(
model_name='competition',
name='source_uid',
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
),
migrations.AddField(
model_name='season',
name='source_uid',
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
),
migrations.AddIndex(
model_name='competition',
index=models.Index(fields=['source_uid'], name='competition_source__1c043a_idx'),
),
migrations.AddIndex(
model_name='season',
index=models.Index(fields=['source_uid'], name='competition_source__41e6a6_idx'),
),
]

View File

@ -0,0 +1,35 @@
# Generated by Django 5.2.12 on 2026-03-13 15:08
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("competitions", "0003_competition_source_uid_season_source_uid_and_more"),
]
operations = [
migrations.AddField(
model_name="competition",
name="source_name",
field=models.CharField(blank=True, default="", max_length=120),
),
migrations.AlterField(
model_name="competition",
name="source_uid",
field=models.CharField(blank=True, max_length=120, null=True),
),
migrations.AddConstraint(
model_name="competition",
constraint=models.UniqueConstraint(
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
fields=("source_name", "source_uid"),
name="uq_competition_source_namespace_uid",
),
),
migrations.AddIndex(
model_name="competition",
index=models.Index(fields=["source_name", "source_uid"], name="competition_source__4c5f3d_idx"),
),
]

View File

@ -14,6 +14,8 @@ class Competition(models.Model):
name = models.CharField(max_length=220) name = models.CharField(max_length=220)
slug = models.SlugField(max_length=240, unique=True) slug = models.SlugField(max_length=240, unique=True)
source_name = models.CharField(max_length=120, blank=True, default="")
source_uid = models.CharField(max_length=120, blank=True, null=True)
competition_type = models.CharField(max_length=24, choices=CompetitionType.choices) competition_type = models.CharField(max_length=24, choices=CompetitionType.choices)
gender = models.CharField(max_length=16, choices=Gender.choices, default=Gender.MEN) gender = models.CharField(max_length=16, choices=Gender.choices, default=Gender.MEN)
level = models.PositiveSmallIntegerField(default=1) level = models.PositiveSmallIntegerField(default=1)
@ -31,10 +33,17 @@ class Competition(models.Model):
class Meta: class Meta:
ordering = ["name"] ordering = ["name"]
constraints = [ constraints = [
models.UniqueConstraint(fields=["name", "country"], name="uq_competition_name_country") models.UniqueConstraint(fields=["name", "country"], name="uq_competition_name_country"),
models.UniqueConstraint(
fields=["source_name", "source_uid"],
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
name="uq_competition_source_namespace_uid",
),
] ]
indexes = [ indexes = [
models.Index(fields=["name"]), models.Index(fields=["name"]),
models.Index(fields=["source_name", "source_uid"]),
models.Index(fields=["source_uid"]),
models.Index(fields=["country"]), models.Index(fields=["country"]),
models.Index(fields=["competition_type"]), models.Index(fields=["competition_type"]),
models.Index(fields=["gender"]), models.Index(fields=["gender"]),
@ -46,6 +55,7 @@ class Competition(models.Model):
class Season(models.Model): class Season(models.Model):
source_uid = models.CharField(max_length=120, blank=True, null=True, unique=True)
label = models.CharField(max_length=40, unique=True) label = models.CharField(max_length=40, unique=True)
start_date = models.DateField() start_date = models.DateField()
end_date = models.DateField() end_date = models.DateField()
@ -57,6 +67,7 @@ class Season(models.Model):
models.CheckConstraint(condition=models.Q(end_date__gte=models.F("start_date")), name="ck_season_dates") models.CheckConstraint(condition=models.Q(end_date__gte=models.F("start_date")), name="ck_season_dates")
] ]
indexes = [ indexes = [
models.Index(fields=["source_uid"]),
models.Index(fields=["is_current"]), models.Index(fields=["is_current"]),
models.Index(fields=["start_date"]), models.Index(fields=["start_date"]),
models.Index(fields=["end_date"]), models.Index(fields=["end_date"]),

View File

@ -1,117 +1,109 @@
from django.contrib import admin from django.contrib import admin
from django.contrib import messages from django.conf import settings
from django.db.models import Count
from apps.providers.registry import get_default_provider_namespace from .models import ImportFile, ImportRun, IngestionError, IngestionRun
from .models import IngestionError, IngestionRun
from .tasks import trigger_full_sync, trigger_incremental_sync
class IngestionErrorInline(admin.TabularInline): class ImportFileInline(admin.TabularInline):
model = IngestionError model = ImportFile
extra = 0 extra = 0
readonly_fields = ("provider_namespace", "entity_type", "external_id", "severity", "message", "occurred_at") readonly_fields = (
"relative_path",
"source_name",
@admin.register(IngestionRun) "snapshot_date",
class IngestionRunAdmin(admin.ModelAdmin):
list_display = (
"provider_namespace",
"job_type",
"status", "status",
"records_processed", "checksum",
"records_created", "file_size_bytes",
"records_updated", "rows_total",
"records_failed", "rows_upserted",
"error_count", "rows_failed",
"short_error_summary", "error_message",
"processed_at",
"created_at",
)
@admin.register(ImportRun)
class ImportRunAdmin(admin.ModelAdmin):
list_display = (
"id",
"source",
"status",
"files_total",
"files_processed",
"rows_total",
"rows_upserted",
"rows_failed",
"started_at", "started_at",
"finished_at", "finished_at",
"created_at",
) )
list_filter = ("provider_namespace", "job_type", "status") list_filter = ("source", "status")
search_fields = ("provider_namespace",) search_fields = ("source", "error_summary")
inlines = (IngestionErrorInline,)
readonly_fields = ( readonly_fields = (
"provider_namespace", "source",
"job_type",
"status", "status",
"triggered_by", "triggered_by",
"started_at", "started_at",
"finished_at", "finished_at",
"records_processed", "files_total",
"records_created", "files_processed",
"records_updated", "rows_total",
"records_failed", "rows_upserted",
"rows_failed",
"error_summary", "error_summary",
"context", "context",
"raw_payload",
"created_at", "created_at",
) )
actions = ( inlines = (ImportFileInline,)
"enqueue_full_sync_default_provider",
"enqueue_incremental_sync_default_provider",
"retry_selected_runs", @admin.register(ImportFile)
class ImportFileAdmin(admin.ModelAdmin):
list_display = (
"id",
"import_run",
"relative_path",
"source_name",
"snapshot_date",
"status",
"rows_total",
"rows_upserted",
"rows_failed",
"processed_at",
)
list_filter = ("status",)
search_fields = ("relative_path", "source_name", "checksum", "error_message")
readonly_fields = (
"import_run",
"relative_path",
"source_name",
"snapshot_date",
"status",
"checksum",
"file_size_bytes",
"rows_total",
"rows_upserted",
"rows_failed",
"error_message",
"payload_preview",
"processed_at",
"created_at",
) )
@admin.action(description="Queue full sync (default provider)")
def enqueue_full_sync_default_provider(self, request, queryset):
provider_namespace = get_default_provider_namespace()
trigger_full_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
self.message_user(request, f"Queued full sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Queue incremental sync (default provider)") class LegacyIngestionRunAdmin(admin.ModelAdmin):
def enqueue_incremental_sync_default_provider(self, request, queryset): list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at")
provider_namespace = get_default_provider_namespace() list_filter = ("provider_namespace", "job_type", "status")
trigger_incremental_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id) search_fields = ("provider_namespace", "error_summary")
self.message_user(request, f"Queued incremental sync task for {provider_namespace}.", level=messages.SUCCESS)
@admin.action(description="Retry selected ingestion runs")
def retry_selected_runs(self, request, queryset):
count = 0
for run in queryset:
if run.job_type == IngestionRun.JobType.INCREMENTAL:
trigger_incremental_sync.delay(
provider_namespace=run.provider_namespace,
triggered_by_id=request.user.id,
context={"retry_of": run.id},
)
else:
trigger_full_sync.delay(
provider_namespace=run.provider_namespace,
triggered_by_id=request.user.id,
context={"retry_of": run.id},
)
count += 1
self.message_user(request, f"Queued {count} retry task(s).", level=messages.SUCCESS)
def get_queryset(self, request):
queryset = super().get_queryset(request)
return queryset.annotate(_error_count=Count("errors"))
@admin.display(ordering="_error_count", description="Errors")
def error_count(self, obj):
return getattr(obj, "_error_count", 0)
@admin.display(description="Error summary")
def short_error_summary(self, obj):
if not obj.error_summary:
return "-"
return (obj.error_summary[:90] + "...") if len(obj.error_summary) > 90 else obj.error_summary
@admin.register(IngestionError) class LegacyIngestionErrorAdmin(admin.ModelAdmin):
class IngestionErrorAdmin(admin.ModelAdmin):
list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at") list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at")
list_filter = ("severity", "provider_namespace") list_filter = ("severity", "provider_namespace")
search_fields = ("entity_type", "external_id", "message") search_fields = ("entity_type", "external_id", "message")
readonly_fields = (
"ingestion_run",
"provider_namespace", if settings.LEGACY_PROVIDER_STACK_ENABLED:
"entity_type", admin.site.register(IngestionRun, LegacyIngestionRunAdmin)
"external_id", admin.site.register(IngestionError, LegacyIngestionErrorAdmin)
"severity",
"message",
"raw_payload",
"occurred_at",
)

View File

@ -0,0 +1,26 @@
from .bcl import BCLSnapshotExtractor
from .base import (
BaseSnapshotExtractor,
ExtractionResult,
ExtractorConfigError,
ExtractorError,
ExtractorFetchError,
ExtractorNormalizationError,
ExtractorParseError,
)
from .lba import LBASnapshotExtractor
from .registry import available_extractors, create_extractor
__all__ = [
"BaseSnapshotExtractor",
"BCLSnapshotExtractor",
"LBASnapshotExtractor",
"ExtractionResult",
"ExtractorError",
"ExtractorConfigError",
"ExtractorFetchError",
"ExtractorParseError",
"ExtractorNormalizationError",
"available_extractors",
"create_extractor",
]

View File

@ -0,0 +1,150 @@
from __future__ import annotations
import json
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import date
from pathlib import Path
from typing import Any
from django.conf import settings
from apps.ingestion.snapshots import SnapshotSchemaValidator
logger = logging.getLogger(__name__)
class ExtractorError(RuntimeError):
"""Base extractor exception."""
class ExtractorConfigError(ExtractorError):
"""Raised when extractor configuration is invalid."""
class ExtractorFetchError(ExtractorError):
"""Raised when remote/source fetch fails."""
class ExtractorParseError(ExtractorError):
"""Raised when fetched content cannot be parsed."""
class ExtractorNormalizationError(ExtractorError):
"""Raised when source rows cannot be normalized."""
@dataclass
class ExtractionResult:
extractor_name: str
source_name: str
snapshot_date: date
records_count: int
output_path: Path | None
class BaseSnapshotExtractor(ABC):
extractor_name = "base"
source_name = "unknown_source"
@abstractmethod
def fetch(self) -> Any:
"""Fetch source payload from a source endpoint/resource."""
@abstractmethod
def parse(self, payload: Any) -> list[dict[str, Any]]:
"""Parse fetched payload into source-specific record dictionaries."""
@abstractmethod
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
"""Normalize a source record into HoopScout snapshot record shape."""
def resolve_snapshot_date(self) -> date:
return date.today()
def normalize_records(self, source_records: list[dict[str, Any]]) -> list[dict[str, Any]]:
normalized: list[dict[str, Any]] = []
for idx, row in enumerate(source_records):
if not isinstance(row, dict):
raise ExtractorNormalizationError(f"Parsed record at index {idx} must be an object.")
normalized.append(self.normalize_record(row))
return normalized
def build_snapshot(self, records: list[dict[str, Any]], snapshot_date: date) -> dict[str, Any]:
return {
"source_name": self.source_name,
"snapshot_date": snapshot_date.isoformat(),
"records": records,
}
def default_output_dir(self) -> Path:
return Path(settings.STATIC_DATASET_INCOMING_DIR)
def snapshot_filename(self, snapshot_date: date) -> str:
return f"{self.extractor_name}-{snapshot_date.isoformat()}.json"
def emit_snapshot(
self,
snapshot: dict[str, Any],
*,
output_path: str | Path | None = None,
indent: int = 2,
) -> Path:
if output_path is None:
destination = self.default_output_dir()
destination.mkdir(parents=True, exist_ok=True)
file_path = destination / self.snapshot_filename(date.fromisoformat(snapshot["snapshot_date"]))
else:
target = Path(output_path)
if target.suffix.lower() == ".json":
target.parent.mkdir(parents=True, exist_ok=True)
file_path = target
else:
target.mkdir(parents=True, exist_ok=True)
file_path = target / self.snapshot_filename(date.fromisoformat(snapshot["snapshot_date"]))
file_path.write_text(json.dumps(snapshot, indent=indent, ensure_ascii=True), encoding="utf-8")
return file_path
def run(
self,
*,
output_path: str | Path | None = None,
snapshot_date: date | None = None,
write_output: bool = True,
indent: int = 2,
) -> ExtractionResult:
payload = self.fetch()
source_rows = self.parse(payload)
normalized_rows = self.normalize_records(source_rows)
resolved_snapshot_date = snapshot_date or self.resolve_snapshot_date()
snapshot = self.build_snapshot(normalized_rows, resolved_snapshot_date)
validated = SnapshotSchemaValidator.validate(snapshot)
snapshot["records"] = validated.records
output_file: Path | None = None
if write_output:
output_file = self.emit_snapshot(snapshot, output_path=output_path, indent=indent)
logger.info(
"extractor_snapshot_written extractor=%s source=%s records=%s path=%s",
self.extractor_name,
validated.source_name,
len(validated.records),
output_file,
)
else:
logger.info(
"extractor_snapshot_validated extractor=%s source=%s records=%s write_output=0",
self.extractor_name,
validated.source_name,
len(validated.records),
)
return ExtractionResult(
extractor_name=self.extractor_name,
source_name=validated.source_name,
snapshot_date=validated.snapshot_date,
records_count=len(validated.records),
output_path=output_file,
)

View File

@ -0,0 +1,171 @@
from __future__ import annotations
from typing import Any
from django.conf import settings
from .base import BaseSnapshotExtractor, ExtractorConfigError, ExtractorNormalizationError, ExtractorParseError
from .http import ResponsibleHttpClient
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
for key in keys:
value = record.get(key)
if value not in (None, ""):
return value
return None
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
for key in keys:
value = record.get(key)
if isinstance(value, str):
stripped = value.strip()
if stripped:
return stripped
return None
ESSENTIAL_FIELDS = {
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
"games_played",
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
}
class BCLSnapshotExtractor(BaseSnapshotExtractor):
"""
Basketball Champions League MVP extractor.
Scope is intentionally conservative:
- one configured public stats endpoint
- one configured season label
- normalized player-season rows only
"""
extractor_name = "bcl"
source_name = "bcl"
def __init__(self, *, http_client: ResponsibleHttpClient | None = None):
self.url = settings.EXTRACTOR_BCL_STATS_URL.strip()
self.season_label = settings.EXTRACTOR_BCL_SEASON_LABEL.strip()
self.competition_external_id = settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID.strip()
self.competition_name = settings.EXTRACTOR_BCL_COMPETITION_NAME.strip()
self.include_raw_payload = settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD
self.http_client = http_client or ResponsibleHttpClient(
user_agent=settings.EXTRACTOR_USER_AGENT,
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
retries=settings.EXTRACTOR_HTTP_RETRIES,
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
)
if not self.url:
raise ExtractorConfigError("EXTRACTOR_BCL_STATS_URL is required for bcl extractor.")
if not self.season_label:
raise ExtractorConfigError("EXTRACTOR_BCL_SEASON_LABEL is required for bcl extractor.")
if not self.competition_external_id:
raise ExtractorConfigError("EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID is required.")
if not self.competition_name:
raise ExtractorConfigError("EXTRACTOR_BCL_COMPETITION_NAME is required.")
def fetch(self) -> Any:
return self.http_client.get_json(self.url)
def parse(self, payload: Any) -> list[dict[str, Any]]:
if isinstance(payload, list):
return payload
if not isinstance(payload, dict):
raise ExtractorParseError("BCL payload must be a JSON object or array.")
for key in ("records", "data", "players", "items"):
rows = payload.get(key)
if isinstance(rows, list):
return rows
raise ExtractorParseError("BCL payload must contain one of: records, data, players, items.")
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
player_obj = source_record.get("player") if isinstance(source_record.get("player"), dict) else {}
team_obj = source_record.get("team") if isinstance(source_record.get("team"), dict) else {}
full_name = _first_non_empty(
source_record,
"full_name",
"player_name",
"name",
) or _first_non_empty(player_obj, "full_name", "name")
first_name = _first_non_empty(source_record, "first_name") or _first_non_empty(player_obj, "first_name")
last_name = _first_non_empty(source_record, "last_name") or _first_non_empty(player_obj, "last_name")
player_external_id = _first_non_empty(
source_record, "player_external_id", "player_id", "athlete_id"
) or _first_non_empty(player_obj, "id", "player_id")
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
team_obj, "id", "team_id"
)
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
team_obj, "name"
)
normalized = {
"competition_external_id": self.competition_external_id,
"competition_name": self.competition_name,
"season": self.season_label,
"team_external_id": team_external_id,
"team_name": team_name,
"player_external_id": player_external_id,
"full_name": full_name,
"first_name": first_name,
"last_name": last_name,
"birth_date": _first_non_empty(source_record, "birth_date") or _first_non_empty(
player_obj, "birth_date", "dob"
),
"nationality": _first_non_empty(source_record, "nationality")
or _first_non_empty(player_obj, "nationality", "country"),
"height_cm": _first_non_empty(source_record, "height_cm") or _first_non_empty(player_obj, "height_cm"),
"weight_kg": _first_non_empty(source_record, "weight_kg") or _first_non_empty(player_obj, "weight_kg"),
"position": _first_non_empty(source_record, "position") or _first_non_empty(player_obj, "position"),
"role": _first_non_empty(source_record, "role"),
"games_played": _first_non_empty(source_record, "games_played", "gp"),
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
"fg_pct": _first_non_empty(source_record, "fg_pct", "fg_percentage"),
"three_pt_pct": _first_non_empty(
source_record, "three_pt_pct", "three_point_pct", "3p_pct", "three_pct"
),
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
}
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
if missing:
raise ExtractorNormalizationError(f"bcl row missing required fields: {', '.join(sorted(missing))}")
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
normalized["season"] = str(normalized["season"]).strip()
if self.include_raw_payload:
normalized["raw_payload"] = source_record
return normalized

View File

@ -0,0 +1,109 @@
from __future__ import annotations
import logging
import time
from typing import Any
import requests
from .base import ExtractorFetchError
logger = logging.getLogger(__name__)
class ResponsibleHttpClient:
"""
Small HTTP helper for public-source extraction:
- explicit User-Agent
- request timeout
- conservative retries
- low-frequency pacing (fixed delay between requests)
"""
RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
def __init__(
self,
*,
user_agent: str,
timeout_seconds: float,
retries: int,
retry_sleep_seconds: float,
request_delay_seconds: float,
session: requests.Session | None = None,
):
self.user_agent = user_agent
self.timeout_seconds = timeout_seconds
self.retries = retries
self.retry_sleep_seconds = retry_sleep_seconds
self.request_delay_seconds = request_delay_seconds
self.session = session or requests.Session()
self._last_request_at: float | None = None
def _pace(self) -> None:
if self.request_delay_seconds <= 0:
return
now = time.monotonic()
if self._last_request_at is None:
self._last_request_at = now
return
elapsed = now - self._last_request_at
remaining = self.request_delay_seconds - elapsed
if remaining > 0:
time.sleep(remaining)
self._last_request_at = time.monotonic()
def get_json(
self,
url: str,
*,
params: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> Any:
merged_headers = {"User-Agent": self.user_agent}
if headers:
merged_headers.update(headers)
attempts = self.retries + 1
for attempt in range(1, attempts + 1):
try:
self._pace()
response = self.session.get(
url,
params=params,
headers=merged_headers,
timeout=self.timeout_seconds,
)
if response.status_code in self.RETRYABLE_STATUS_CODES:
if attempt < attempts:
logger.warning(
"extractor_http_retryable_status status=%s url=%s attempt=%s/%s",
response.status_code,
url,
attempt,
attempts,
)
time.sleep(self.retry_sleep_seconds)
continue
raise ExtractorFetchError(
f"Retryable status exhausted: status={response.status_code} url={url}"
)
response.raise_for_status()
return response.json()
except requests.RequestException as exc:
if attempt < attempts:
logger.warning(
"extractor_http_request_retry error=%s url=%s attempt=%s/%s",
exc,
url,
attempt,
attempts,
)
time.sleep(self.retry_sleep_seconds)
continue
raise ExtractorFetchError(f"Request failed after retries: {exc}") from exc
except ValueError as exc:
raise ExtractorFetchError(f"Invalid JSON response from {url}: {exc}") from exc
raise ExtractorFetchError(f"Unexpected retry loop exit for {url}")

View File

@ -0,0 +1,171 @@
from __future__ import annotations
from typing import Any
from django.conf import settings
from .base import BaseSnapshotExtractor, ExtractorConfigError, ExtractorNormalizationError, ExtractorParseError
from .http import ResponsibleHttpClient
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
for key in keys:
value = record.get(key)
if value not in (None, ""):
return value
return None
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
for key in keys:
value = record.get(key)
if isinstance(value, str):
stripped = value.strip()
if stripped:
return stripped
return None
ESSENTIAL_FIELDS = {
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
"games_played",
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
}
class LBASnapshotExtractor(BaseSnapshotExtractor):
"""
LBA (Lega Basket Serie A) MVP extractor.
Scope is intentionally conservative:
- one configured public stats endpoint
- one configured season label
- normalized player-season rows only
"""
extractor_name = "lba"
source_name = "lba"
def __init__(self, *, http_client: ResponsibleHttpClient | None = None):
self.url = settings.EXTRACTOR_LBA_STATS_URL.strip()
self.season_label = settings.EXTRACTOR_LBA_SEASON_LABEL.strip()
self.competition_external_id = settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID.strip()
self.competition_name = settings.EXTRACTOR_LBA_COMPETITION_NAME.strip()
self.include_raw_payload = settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD
self.http_client = http_client or ResponsibleHttpClient(
user_agent=settings.EXTRACTOR_USER_AGENT,
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
retries=settings.EXTRACTOR_HTTP_RETRIES,
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
)
if not self.url:
raise ExtractorConfigError("EXTRACTOR_LBA_STATS_URL is required for lba extractor.")
if not self.season_label:
raise ExtractorConfigError("EXTRACTOR_LBA_SEASON_LABEL is required for lba extractor.")
if not self.competition_external_id:
raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID is required.")
if not self.competition_name:
raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_NAME is required.")
def fetch(self) -> Any:
return self.http_client.get_json(self.url)
def parse(self, payload: Any) -> list[dict[str, Any]]:
if isinstance(payload, list):
return payload
if not isinstance(payload, dict):
raise ExtractorParseError("LBA payload must be a JSON object or array.")
for key in ("records", "data", "players", "items"):
rows = payload.get(key)
if isinstance(rows, list):
return rows
raise ExtractorParseError("LBA payload must contain one of: records, data, players, items.")
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
player_obj = source_record.get("player") if isinstance(source_record.get("player"), dict) else {}
team_obj = source_record.get("team") if isinstance(source_record.get("team"), dict) else {}
full_name = _first_non_empty(
source_record,
"full_name",
"player_name",
"name",
) or _first_non_empty(player_obj, "full_name", "name")
first_name = _first_non_empty(source_record, "first_name") or _first_non_empty(player_obj, "first_name")
last_name = _first_non_empty(source_record, "last_name") or _first_non_empty(player_obj, "last_name")
player_external_id = _first_non_empty(
source_record, "player_external_id", "player_id", "athlete_id"
) or _first_non_empty(player_obj, "id", "player_id")
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
team_obj, "id", "team_id"
)
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
team_obj, "name"
)
normalized = {
"competition_external_id": self.competition_external_id,
"competition_name": self.competition_name,
"season": self.season_label,
"team_external_id": team_external_id,
"team_name": team_name,
"player_external_id": player_external_id,
"full_name": full_name,
"first_name": first_name,
"last_name": last_name,
"birth_date": _first_non_empty(source_record, "birth_date") or _first_non_empty(
player_obj, "birth_date", "dob"
),
"nationality": _first_non_empty(source_record, "nationality")
or _first_non_empty(player_obj, "nationality", "country"),
"height_cm": _first_non_empty(source_record, "height_cm") or _first_non_empty(player_obj, "height_cm"),
"weight_kg": _first_non_empty(source_record, "weight_kg") or _first_non_empty(player_obj, "weight_kg"),
"position": _first_non_empty(source_record, "position") or _first_non_empty(player_obj, "position"),
"role": _first_non_empty(source_record, "role"),
"games_played": _first_non_empty(source_record, "games_played", "gp"),
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
"fg_pct": _first_non_empty(source_record, "fg_pct", "fg_percentage"),
"three_pt_pct": _first_non_empty(
source_record, "three_pt_pct", "three_point_pct", "3p_pct", "three_pct"
),
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
}
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
if missing:
raise ExtractorNormalizationError(f"lba row missing required fields: {', '.join(sorted(missing))}")
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
normalized["season"] = str(normalized["season"]).strip()
if self.include_raw_payload:
normalized["raw_payload"] = source_record
return normalized

View File

@ -0,0 +1,132 @@
from __future__ import annotations
from typing import Any
from django.conf import settings
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
from .base import (
BaseSnapshotExtractor,
ExtractorConfigError,
ExtractorNormalizationError,
ExtractorParseError,
)
from .http import ResponsibleHttpClient
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
for key in keys:
if key in record and record[key] not in (None, ""):
return record[key]
return None
class PublicJsonSnapshotExtractor(BaseSnapshotExtractor):
"""
Generic public JSON extractor for MVP v2.
This extractor is intentionally generic and lightweight:
- fetch from one public JSON endpoint
- parse list-like payloads
- normalize into HoopScout snapshot schema
"""
extractor_name = "public_json_snapshot"
def __init__(
self,
*,
url: str | None = None,
source_name: str | None = None,
include_raw_payload: bool | None = None,
http_client: ResponsibleHttpClient | None = None,
):
self.url = (url or settings.EXTRACTOR_PUBLIC_JSON_URL).strip()
self.source_name = (source_name or settings.EXTRACTOR_PUBLIC_SOURCE_NAME).strip()
self.include_raw_payload = (
settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD if include_raw_payload is None else include_raw_payload
)
if not self.url:
raise ExtractorConfigError("EXTRACTOR_PUBLIC_JSON_URL is required for public_json_snapshot extractor.")
if not self.source_name:
raise ExtractorConfigError("EXTRACTOR_PUBLIC_SOURCE_NAME must not be empty.")
self.http_client = http_client or ResponsibleHttpClient(
user_agent=settings.EXTRACTOR_USER_AGENT,
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
retries=settings.EXTRACTOR_HTTP_RETRIES,
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
)
def fetch(self) -> Any:
return self.http_client.get_json(self.url)
def parse(self, payload: Any) -> list[dict[str, Any]]:
if isinstance(payload, list):
return payload
if not isinstance(payload, dict):
raise ExtractorParseError("Fetched payload must be a JSON object or array.")
rows = payload.get("records")
if isinstance(rows, list):
return rows
data_rows = payload.get("data")
if isinstance(data_rows, list):
return data_rows
raise ExtractorParseError("Payload must contain 'records' or 'data' list.")
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
normalized = {
"competition_external_id": _first_non_empty(
source_record, "competition_external_id", "competition_id", "league_id"
),
"competition_name": _first_non_empty(
source_record, "competition_name", "competition", "league_name"
),
"season": _first_non_empty(source_record, "season", "season_label", "season_name"),
"team_external_id": _first_non_empty(source_record, "team_external_id", "team_id"),
"team_name": _first_non_empty(source_record, "team_name", "team"),
"player_external_id": _first_non_empty(source_record, "player_external_id", "player_id"),
"full_name": _first_non_empty(source_record, "full_name", "player_name", "name"),
"first_name": _first_non_empty(source_record, "first_name"),
"last_name": _first_non_empty(source_record, "last_name"),
"birth_date": _first_non_empty(source_record, "birth_date"),
"nationality": _first_non_empty(source_record, "nationality", "nationality_code"),
"height_cm": _first_non_empty(source_record, "height_cm"),
"weight_kg": _first_non_empty(source_record, "weight_kg"),
"position": _first_non_empty(source_record, "position"),
"role": _first_non_empty(source_record, "role"),
"games_played": _first_non_empty(source_record, "games_played", "gp"),
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
"fg_pct": _first_non_empty(source_record, "fg_pct"),
"three_pt_pct": _first_non_empty(
source_record, "three_pt_pct", "three_point_pct", "three_pct", "3p_pct"
),
"ft_pct": _first_non_empty(source_record, "ft_pct"),
}
missing = [key for key in REQUIRED_RECORD_FIELDS if normalized.get(key) in (None, "")]
if missing:
raise ExtractorNormalizationError(
f"public_json_snapshot row missing required fields: {', '.join(sorted(missing))}"
)
normalized["season"] = str(normalized["season"]).strip()
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
if self.include_raw_payload:
normalized["raw_payload"] = source_record
return normalized

View File

@ -0,0 +1,26 @@
from __future__ import annotations
from .bcl import BCLSnapshotExtractor
from .base import BaseSnapshotExtractor, ExtractorConfigError
from .lba import LBASnapshotExtractor
from .public_json import PublicJsonSnapshotExtractor
EXTRACTOR_REGISTRY: dict[str, type[BaseSnapshotExtractor]] = {
BCLSnapshotExtractor.extractor_name: BCLSnapshotExtractor,
LBASnapshotExtractor.extractor_name: LBASnapshotExtractor,
PublicJsonSnapshotExtractor.extractor_name: PublicJsonSnapshotExtractor,
}
def available_extractors() -> list[str]:
return sorted(EXTRACTOR_REGISTRY.keys())
def create_extractor(extractor_name: str) -> BaseSnapshotExtractor:
try:
extractor_cls = EXTRACTOR_REGISTRY[extractor_name]
except KeyError as exc:
raise ExtractorConfigError(
f"Unknown extractor '{extractor_name}'. Available: {', '.join(available_extractors())}"
) from exc
return extractor_cls()

View File

View File

@ -0,0 +1,23 @@
from django.conf import settings
from django.core.management.base import BaseCommand
from apps.ingestion.services.snapshot_import import SnapshotImporter
class Command(BaseCommand):
help = "Import static JSON snapshots from incoming directory into PostgreSQL."
def handle(self, *args, **options):
importer = SnapshotImporter(
incoming_dir=settings.STATIC_DATASET_INCOMING_DIR,
archive_dir=settings.STATIC_DATASET_ARCHIVE_DIR,
failed_dir=settings.STATIC_DATASET_FAILED_DIR,
)
run = importer.run()
self.stdout.write(
self.style.SUCCESS(
f"Import run {run.id} completed: status={run.status} "
f"files={run.files_processed}/{run.files_total} "
f"rows_upserted={run.rows_upserted} rows_failed={run.rows_failed}"
)
)

View File

@ -0,0 +1,61 @@
from __future__ import annotations
from django.core.management.base import BaseCommand, CommandError
from django.utils.dateparse import parse_date
from apps.ingestion.extractors import ExtractorError, create_extractor
class Command(BaseCommand):
help = "Run only the BCL extractor and emit an import-ready snapshot JSON."
def add_arguments(self, parser):
parser.add_argument(
"--output-path",
dest="output_path",
default=None,
help="Directory or .json path to write output (default incoming dir).",
)
parser.add_argument(
"--snapshot-date",
dest="snapshot_date",
default=None,
help="Override snapshot date in YYYY-MM-DD format.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate without writing output.",
)
parser.add_argument(
"--indent",
type=int,
default=2,
help="JSON indent level for emitted file.",
)
def handle(self, *args, **options):
snapshot_date = None
if options["snapshot_date"]:
snapshot_date = parse_date(options["snapshot_date"])
if snapshot_date is None:
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
try:
extractor = create_extractor("bcl")
result = extractor.run(
output_path=options["output_path"],
snapshot_date=snapshot_date,
write_output=not options["dry_run"],
indent=options["indent"],
)
except ExtractorError as exc:
raise CommandError(str(exc)) from exc
output = str(result.output_path) if result.output_path else "<dry-run>"
self.stdout.write(
self.style.SUCCESS(
f"BCL extractor completed: source={result.source_name} "
f"date={result.snapshot_date} records={result.records_count} output={output}"
)
)

View File

@ -0,0 +1,45 @@
from __future__ import annotations
from django.core.management.base import BaseCommand, CommandError
from django.utils.dateparse import parse_date
from apps.ingestion.services.daily_orchestration import run_daily_orchestration
class Command(BaseCommand):
help = "Run daily HoopScout v2 workflow: extract snapshots, then import snapshots."
def add_arguments(self, parser):
parser.add_argument(
"--snapshot-date",
dest="snapshot_date",
default=None,
help="Override snapshot date for all extractor outputs (YYYY-MM-DD).",
)
def handle(self, *args, **options):
snapshot_date = None
if options["snapshot_date"]:
snapshot_date = parse_date(options["snapshot_date"])
if snapshot_date is None:
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
try:
result = run_daily_orchestration(snapshot_date=snapshot_date)
except Exception as exc: # noqa: BLE001
raise CommandError(str(exc)) from exc
extractor_summary = ", ".join(
f"{row.extractor_name}:{row.records_count}" for row in result.extractors_run
)
self.stdout.write(
self.style.SUCCESS(
"Daily orchestration completed: "
f"extractors=[{extractor_summary}] "
f"import_run={result.import_run_id} "
f"import_status={result.import_status} "
f"files_processed={result.files_processed} "
f"rows_upserted={result.rows_upserted} "
f"rows_failed={result.rows_failed}"
)
)

View File

@ -0,0 +1,63 @@
from __future__ import annotations
from django.core.management.base import BaseCommand, CommandError
from django.utils.dateparse import parse_date
from apps.ingestion.extractors import ExtractorError, available_extractors, create_extractor
class Command(BaseCommand):
help = "Run a snapshot extractor and emit importable JSON snapshots."
def add_arguments(self, parser):
parser.add_argument("extractor_name", choices=available_extractors())
parser.add_argument(
"--output-path",
dest="output_path",
default=None,
help="Directory or .json file path where snapshot should be written. Defaults to incoming dir.",
)
parser.add_argument(
"--snapshot-date",
dest="snapshot_date",
default=None,
help="Override snapshot date in YYYY-MM-DD format.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate extraction/normalization without writing snapshot file.",
)
parser.add_argument(
"--indent",
type=int,
default=2,
help="JSON indent level for emitted snapshots.",
)
def handle(self, *args, **options):
snapshot_date = None
if options["snapshot_date"]:
snapshot_date = parse_date(options["snapshot_date"])
if snapshot_date is None:
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
try:
extractor = create_extractor(options["extractor_name"])
result = extractor.run(
output_path=options["output_path"],
snapshot_date=snapshot_date,
write_output=not options["dry_run"],
indent=options["indent"],
)
except ExtractorError as exc:
raise CommandError(str(exc)) from exc
output_path = str(result.output_path) if result.output_path else "<dry-run>"
self.stdout.write(
self.style.SUCCESS(
f"Extractor {result.extractor_name} completed: "
f"source={result.source_name} date={result.snapshot_date} "
f"records={result.records_count} output={output_path}"
)
)

View File

@ -0,0 +1,61 @@
from __future__ import annotations
from django.core.management.base import BaseCommand, CommandError
from django.utils.dateparse import parse_date
from apps.ingestion.extractors import ExtractorError, create_extractor
class Command(BaseCommand):
help = "Run only the LBA extractor and emit an import-ready snapshot JSON."
def add_arguments(self, parser):
parser.add_argument(
"--output-path",
dest="output_path",
default=None,
help="Directory or .json path to write output (default incoming dir).",
)
parser.add_argument(
"--snapshot-date",
dest="snapshot_date",
default=None,
help="Override snapshot date in YYYY-MM-DD format.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate without writing output.",
)
parser.add_argument(
"--indent",
type=int,
default=2,
help="JSON indent level for emitted file.",
)
def handle(self, *args, **options):
snapshot_date = None
if options["snapshot_date"]:
snapshot_date = parse_date(options["snapshot_date"])
if snapshot_date is None:
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
try:
extractor = create_extractor("lba")
result = extractor.run(
output_path=options["output_path"],
snapshot_date=snapshot_date,
write_output=not options["dry_run"],
indent=options["indent"],
)
except ExtractorError as exc:
raise CommandError(str(exc)) from exc
output = str(result.output_path) if result.output_path else "<dry-run>"
self.stdout.write(
self.style.SUCCESS(
f"LBA extractor completed: source={result.source_name} "
f"date={result.snapshot_date} records={result.records_count} output={output}"
)
)

View File

@ -0,0 +1,91 @@
# Generated by Django 5.2.12 on 2026-03-13 12:44
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ingestion', '0002_ingestionrun_error_summary'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='ImportRun',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.CharField(default='snapshot', max_length=80)),
('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('success', 'Success'), ('failed', 'Failed'), ('canceled', 'Canceled')], default='pending', max_length=24)),
('started_at', models.DateTimeField(blank=True, null=True)),
('finished_at', models.DateTimeField(blank=True, null=True)),
('files_total', models.PositiveIntegerField(default=0)),
('files_processed', models.PositiveIntegerField(default=0)),
('rows_total', models.PositiveIntegerField(default=0)),
('rows_upserted', models.PositiveIntegerField(default=0)),
('rows_failed', models.PositiveIntegerField(default=0)),
('error_summary', models.TextField(blank=True, default='')),
('context', models.JSONField(blank=True, default=dict)),
('created_at', models.DateTimeField(auto_now_add=True)),
('triggered_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='import_runs', to=settings.AUTH_USER_MODEL)),
],
options={
'ordering': ['-created_at'],
},
),
migrations.CreateModel(
name='ImportFile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('relative_path', models.CharField(max_length=260)),
('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('success', 'Success'), ('failed', 'Failed'), ('skipped', 'Skipped')], default='pending', max_length=24)),
('checksum', models.CharField(blank=True, max_length=128)),
('file_size_bytes', models.PositiveBigIntegerField(blank=True, null=True)),
('rows_total', models.PositiveIntegerField(default=0)),
('rows_upserted', models.PositiveIntegerField(default=0)),
('rows_failed', models.PositiveIntegerField(default=0)),
('error_message', models.TextField(blank=True)),
('payload_preview', models.JSONField(blank=True, default=dict)),
('processed_at', models.DateTimeField(blank=True, null=True)),
('created_at', models.DateTimeField(auto_now_add=True)),
('import_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='files', to='ingestion.importrun')),
],
options={
'ordering': ['-created_at'],
},
),
migrations.AddIndex(
model_name='importrun',
index=models.Index(fields=['source', 'status'], name='ingestion_i_source_61db63_idx'),
),
migrations.AddIndex(
model_name='importrun',
index=models.Index(fields=['created_at'], name='ingestion_i_created_93c115_idx'),
),
migrations.AddIndex(
model_name='importrun',
index=models.Index(fields=['started_at'], name='ingestion_i_started_bf1d94_idx'),
),
migrations.AddIndex(
model_name='importrun',
index=models.Index(fields=['finished_at'], name='ingestion_i_finishe_73cbed_idx'),
),
migrations.AddIndex(
model_name='importfile',
index=models.Index(fields=['import_run', 'status'], name='ingestion_i_import__075f75_idx'),
),
migrations.AddIndex(
model_name='importfile',
index=models.Index(fields=['relative_path'], name='ingestion_i_relativ_183e34_idx'),
),
migrations.AddIndex(
model_name='importfile',
index=models.Index(fields=['processed_at'], name='ingestion_i_process_dfc080_idx'),
),
migrations.AddConstraint(
model_name='importfile',
constraint=models.UniqueConstraint(fields=('import_run', 'relative_path'), name='uq_import_file_per_run_path'),
),
]

View File

@ -0,0 +1,27 @@
# Generated by Django 5.2.12 on 2026-03-13 12:59
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ingestion', '0003_importrun_importfile_and_more'),
]
operations = [
migrations.AddField(
model_name='importfile',
name='snapshot_date',
field=models.DateField(blank=True, null=True),
),
migrations.AddField(
model_name='importfile',
name='source_name',
field=models.CharField(blank=True, max_length=120),
),
migrations.AddIndex(
model_name='importfile',
index=models.Index(fields=['source_name', 'snapshot_date'], name='ingestion_i_source__de6843_idx'),
),
]

View File

@ -2,6 +2,93 @@ from django.conf import settings
from django.db import models from django.db import models
class ImportRun(models.Model):
class RunStatus(models.TextChoices):
PENDING = "pending", "Pending"
RUNNING = "running", "Running"
SUCCESS = "success", "Success"
FAILED = "failed", "Failed"
CANCELED = "canceled", "Canceled"
source = models.CharField(max_length=80, default="snapshot")
status = models.CharField(max_length=24, choices=RunStatus.choices, default=RunStatus.PENDING)
triggered_by = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.SET_NULL,
blank=True,
null=True,
related_name="import_runs",
)
started_at = models.DateTimeField(blank=True, null=True)
finished_at = models.DateTimeField(blank=True, null=True)
files_total = models.PositiveIntegerField(default=0)
files_processed = models.PositiveIntegerField(default=0)
rows_total = models.PositiveIntegerField(default=0)
rows_upserted = models.PositiveIntegerField(default=0)
rows_failed = models.PositiveIntegerField(default=0)
error_summary = models.TextField(blank=True, default="")
context = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
indexes = [
models.Index(fields=["source", "status"]),
models.Index(fields=["created_at"]),
models.Index(fields=["started_at"]),
models.Index(fields=["finished_at"]),
]
def __str__(self) -> str:
return f"{self.source} | {self.status} | {self.created_at:%Y-%m-%d %H:%M}"
class ImportFile(models.Model):
class FileStatus(models.TextChoices):
PENDING = "pending", "Pending"
PROCESSING = "processing", "Processing"
SUCCESS = "success", "Success"
FAILED = "failed", "Failed"
SKIPPED = "skipped", "Skipped"
import_run = models.ForeignKey(
"ingestion.ImportRun",
on_delete=models.CASCADE,
related_name="files",
)
relative_path = models.CharField(max_length=260)
source_name = models.CharField(max_length=120, blank=True)
snapshot_date = models.DateField(blank=True, null=True)
status = models.CharField(max_length=24, choices=FileStatus.choices, default=FileStatus.PENDING)
checksum = models.CharField(max_length=128, blank=True)
file_size_bytes = models.PositiveBigIntegerField(blank=True, null=True)
rows_total = models.PositiveIntegerField(default=0)
rows_upserted = models.PositiveIntegerField(default=0)
rows_failed = models.PositiveIntegerField(default=0)
error_message = models.TextField(blank=True)
payload_preview = models.JSONField(default=dict, blank=True)
processed_at = models.DateTimeField(blank=True, null=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
constraints = [
models.UniqueConstraint(
fields=["import_run", "relative_path"],
name="uq_import_file_per_run_path",
),
]
indexes = [
models.Index(fields=["import_run", "status"]),
models.Index(fields=["relative_path"]),
models.Index(fields=["source_name", "snapshot_date"]),
models.Index(fields=["processed_at"]),
]
def __str__(self) -> str:
return f"{self.relative_path} [{self.status}]"
class IngestionRun(models.Model): class IngestionRun(models.Model):
class RunStatus(models.TextChoices): class RunStatus(models.TextChoices):
PENDING = "pending", "Pending" PENDING = "pending", "Pending"

View File

@ -1,9 +1,14 @@
from django.conf import settings
from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
from .sync import run_sync_job
__all__ = [ __all__ = [
"start_ingestion_run", "start_ingestion_run",
"finish_ingestion_run", "finish_ingestion_run",
"log_ingestion_error", "log_ingestion_error",
"run_sync_job",
] ]
if settings.LEGACY_PROVIDER_STACK_ENABLED:
from .sync import run_sync_job # pragma: no cover - legacy provider stack only.
__all__.append("run_sync_job")

View File

@ -0,0 +1,84 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import date
from pathlib import Path
from django.conf import settings
from apps.ingestion.extractors import create_extractor
from apps.ingestion.services.snapshot_import import SnapshotImporter
logger = logging.getLogger(__name__)
@dataclass
class ExtractorRunSummary:
extractor_name: str
records_count: int
output_path: Path | None
@dataclass
class DailyOrchestrationResult:
extractors_run: list[ExtractorRunSummary]
import_run_id: int
import_status: str
files_processed: int
rows_upserted: int
rows_failed: int
def parse_enabled_extractors(raw_value: str) -> list[str]:
return [item.strip() for item in raw_value.split(",") if item.strip()]
def run_daily_orchestration(*, snapshot_date: date | None = None) -> DailyOrchestrationResult:
extractor_names = parse_enabled_extractors(settings.DAILY_ORCHESTRATION_EXTRACTORS)
if not extractor_names:
raise ValueError("DAILY_ORCHESTRATION_EXTRACTORS cannot be empty.")
summaries: list[ExtractorRunSummary] = []
for extractor_name in extractor_names:
logger.info("daily_orchestration_extractor_start extractor=%s", extractor_name)
extractor = create_extractor(extractor_name)
result = extractor.run(snapshot_date=snapshot_date)
summaries.append(
ExtractorRunSummary(
extractor_name=extractor_name,
records_count=result.records_count,
output_path=result.output_path,
)
)
logger.info(
"daily_orchestration_extractor_done extractor=%s records=%s output=%s",
extractor_name,
result.records_count,
result.output_path,
)
importer = SnapshotImporter(
incoming_dir=settings.STATIC_DATASET_INCOMING_DIR,
archive_dir=settings.STATIC_DATASET_ARCHIVE_DIR,
failed_dir=settings.STATIC_DATASET_FAILED_DIR,
)
import_run = importer.run()
logger.info(
"daily_orchestration_import_done run_id=%s status=%s files=%s/%s upserted=%s failed=%s",
import_run.id,
import_run.status,
import_run.files_processed,
import_run.files_total,
import_run.rows_upserted,
import_run.rows_failed,
)
return DailyOrchestrationResult(
extractors_run=summaries,
import_run_id=import_run.id,
import_status=import_run.status,
files_processed=import_run.files_processed,
rows_upserted=import_run.rows_upserted,
rows_failed=import_run.rows_failed,
)

View File

@ -0,0 +1,357 @@
from __future__ import annotations
import hashlib
import json
import shutil
from dataclasses import dataclass
from datetime import date, datetime
from decimal import Decimal
from pathlib import Path
from typing import Any
from django.db import transaction
from django.template.defaultfilters import slugify
from django.utils import timezone
from django.utils.dateparse import parse_date
from apps.competitions.models import Competition, Season
from apps.ingestion.models import ImportFile, ImportRun
from apps.ingestion.snapshots import SnapshotSchemaValidator, SnapshotValidationError
from apps.players.models import Nationality, Player, Position, Role
from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team
@dataclass
class ImportSummary:
files_total: int = 0
files_processed: int = 0
rows_total: int = 0
rows_upserted: int = 0
rows_failed: int = 0
def _safe_move(src: Path, destination_dir: Path) -> Path:
destination_dir.mkdir(parents=True, exist_ok=True)
candidate = destination_dir / src.name
if candidate.exists():
ts = datetime.utcnow().strftime("%Y%m%d%H%M%S")
candidate = destination_dir / f"{src.stem}-{ts}{src.suffix}"
shutil.move(str(src), str(candidate))
return candidate
def _file_checksum(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def _normalize_decimal(value: float | int | str) -> Decimal:
return Decimal(str(value)).quantize(Decimal("0.01"))
def _parse_season_dates(label: str) -> tuple[date, date]:
if "-" in label:
first = label.split("-", 1)[0]
else:
first = label
year = int(first)
return date(year, 9, 1), date(year + 1, 7, 31)
def _parse_optional_birth_date(value: str | None) -> date | None:
if value in (None, ""):
return None
return parse_date(value)
def _split_name_parts(full_name: str) -> tuple[str, str]:
parts = full_name.strip().split(maxsplit=1)
if not parts:
return "", ""
if len(parts) == 1:
return parts[0], ""
return parts[0], parts[1]
def _resolve_nationality(value: str | None) -> Nationality | None:
if not value:
return None
token = value.strip()
if not token:
return None
if len(token) == 2:
code = token.upper()
obj, _ = Nationality.objects.get_or_create(
iso2_code=code,
defaults={"name": code},
)
return obj
return Nationality.objects.filter(name__iexact=token).first()
def _position_code(position_value: str) -> str:
token = position_value.strip().upper().replace(" ", "_")
return (token[:10] or "UNK")
def _role_code(role_value: str) -> str:
token = slugify(role_value).replace("-", "_")
return (token[:32] or "unknown")
def _player_season_source_uid(record: dict[str, Any], source_name: str, snapshot_date: date) -> str:
return (
f"{source_name}:{snapshot_date.isoformat()}:"
f"{record['competition_external_id']}:{record['season']}:"
f"{record['team_external_id']}:{record['player_external_id']}"
)
def _source_slug(*, source_name: str, base_name: str, fallback_prefix: str, fallback_external_id: str) -> str:
base_slug = slugify(base_name) or f"{fallback_prefix}-{fallback_external_id}"
source_slug = slugify(source_name) or "snapshot"
return f"{source_slug}-{base_slug}"
def _normalized_source_name(source_name: str) -> str:
return source_name.strip().lower()
def _upsert_record(record: dict[str, Any], *, source_name: str, snapshot_date: date) -> None:
source_key = _normalized_source_name(source_name)
competition_slug = _source_slug(
source_name=source_key,
base_name=record["competition_name"],
fallback_prefix="competition",
fallback_external_id=record["competition_external_id"],
)
competition, _ = Competition.objects.update_or_create(
source_name=source_key,
source_uid=record["competition_external_id"],
defaults={
"name": record["competition_name"],
"slug": competition_slug,
"competition_type": Competition.CompetitionType.LEAGUE,
"is_active": True,
},
)
start_date, end_date = _parse_season_dates(record["season"])
season, _ = Season.objects.update_or_create(
source_uid=f"season:{record['season']}",
defaults={
"label": record["season"],
"start_date": start_date,
"end_date": end_date,
"is_current": False,
},
)
team_slug = _source_slug(
source_name=source_key,
base_name=record["team_name"],
fallback_prefix="team",
fallback_external_id=record["team_external_id"],
)
team, _ = Team.objects.update_or_create(
source_name=source_key,
source_uid=record["team_external_id"],
defaults={
"name": record["team_name"],
"slug": team_slug,
"short_name": "",
},
)
position_value = record.get("position")
position = None
if position_value:
position, _ = Position.objects.get_or_create(
code=_position_code(position_value),
defaults={"name": position_value},
)
role = None
if record.get("role"):
role, _ = Role.objects.get_or_create(
code=_role_code(record["role"]),
defaults={"name": record["role"]},
)
first_name = record.get("first_name") or ""
last_name = record.get("last_name") or ""
if not first_name and not last_name:
first_name, last_name = _split_name_parts(record["full_name"])
player, _ = Player.objects.update_or_create(
source_name=source_key,
source_uid=record["player_external_id"],
defaults={
"first_name": first_name,
"last_name": last_name,
"full_name": record["full_name"],
"birth_date": _parse_optional_birth_date(record.get("birth_date")),
"nationality": _resolve_nationality(record.get("nationality")),
"nominal_position": position,
"inferred_role": role,
"height_cm": record.get("height_cm"),
"weight_kg": record.get("weight_kg"),
"is_active": True,
},
)
player_season, _ = PlayerSeason.objects.update_or_create(
source_uid=_player_season_source_uid(record, source_name=source_key, snapshot_date=snapshot_date),
defaults={
"player": player,
"season": season,
"team": team,
"competition": competition,
"games_played": int(record["games_played"]),
"games_started": 0,
"minutes_played": int(round(float(record["minutes_per_game"]) * int(record["games_played"]))),
},
)
PlayerSeasonStats.objects.update_or_create(
player_season=player_season,
defaults={
"points": _normalize_decimal(record["points_per_game"]),
"rebounds": _normalize_decimal(record["rebounds_per_game"]),
"assists": _normalize_decimal(record["assists_per_game"]),
"steals": _normalize_decimal(record["steals_per_game"]),
"blocks": _normalize_decimal(record["blocks_per_game"]),
"turnovers": _normalize_decimal(record["turnovers_per_game"]),
"fg_pct": _normalize_decimal(record["fg_pct"]),
"three_pct": _normalize_decimal(record["three_pt_pct"]),
"ft_pct": _normalize_decimal(record["ft_pct"]),
},
)
class SnapshotImporter:
def __init__(self, *, incoming_dir: str, archive_dir: str, failed_dir: str):
self.incoming_dir = Path(incoming_dir)
self.archive_dir = Path(archive_dir)
self.failed_dir = Path(failed_dir)
def _list_input_files(self) -> list[Path]:
if not self.incoming_dir.exists():
return []
return sorted(path for path in self.incoming_dir.iterdir() if path.is_file() and path.suffix.lower() == ".json")
def run(self, *, triggered_by=None) -> ImportRun:
run = ImportRun.objects.create(
source="static_snapshot_json",
status=ImportRun.RunStatus.RUNNING,
triggered_by=triggered_by,
started_at=timezone.now(),
context={
"incoming_dir": str(self.incoming_dir),
"archive_dir": str(self.archive_dir),
"failed_dir": str(self.failed_dir),
},
)
summary = ImportSummary()
files = self._list_input_files()
summary.files_total = len(files)
for path in files:
checksum = _file_checksum(path)
file_row = ImportFile.objects.create(
import_run=run,
relative_path=path.name,
status=ImportFile.FileStatus.PROCESSING,
checksum=checksum,
file_size_bytes=path.stat().st_size,
)
# Duplicate file content previously imported successfully.
already_imported = ImportFile.objects.filter(
checksum=checksum,
status=ImportFile.FileStatus.SUCCESS,
).exclude(pk=file_row.pk).exists()
if already_imported:
file_row.status = ImportFile.FileStatus.SKIPPED
file_row.error_message = "Skipped duplicate checksum already imported successfully."
file_row.processed_at = timezone.now()
file_row.save(update_fields=["status", "error_message", "processed_at"])
_safe_move(path, self.archive_dir)
summary.files_processed += 1
continue
try:
payload = json.loads(path.read_text(encoding="utf-8"))
validated = SnapshotSchemaValidator.validate(payload)
file_row.source_name = validated.source_name
file_row.snapshot_date = validated.snapshot_date
file_row.rows_total = len(validated.records)
with transaction.atomic():
for record in validated.records:
_upsert_record(record, source_name=validated.source_name, snapshot_date=validated.snapshot_date)
file_row.status = ImportFile.FileStatus.SUCCESS
file_row.rows_upserted = len(validated.records)
file_row.payload_preview = {
"source_name": validated.source_name,
"snapshot_date": validated.snapshot_date.isoformat(),
"sample_record": validated.records[0],
}
_safe_move(path, self.archive_dir)
except (json.JSONDecodeError, SnapshotValidationError, ValueError) as exc:
file_row.status = ImportFile.FileStatus.FAILED
file_row.error_message = str(exc)
_safe_move(path, self.failed_dir)
except Exception as exc: # noqa: BLE001
file_row.status = ImportFile.FileStatus.FAILED
file_row.error_message = f"Unhandled import error: {exc}"
_safe_move(path, self.failed_dir)
file_row.processed_at = timezone.now()
file_row.save(
update_fields=[
"source_name",
"snapshot_date",
"status",
"rows_total",
"rows_upserted",
"rows_failed",
"error_message",
"payload_preview",
"processed_at",
]
)
summary.files_processed += 1
summary.rows_total += file_row.rows_total
summary.rows_upserted += file_row.rows_upserted
summary.rows_failed += file_row.rows_failed + (1 if file_row.status == ImportFile.FileStatus.FAILED else 0)
run.status = ImportRun.RunStatus.SUCCESS if summary.rows_failed == 0 else ImportRun.RunStatus.FAILED
run.files_total = summary.files_total
run.files_processed = summary.files_processed
run.rows_total = summary.rows_total
run.rows_upserted = summary.rows_upserted
run.rows_failed = summary.rows_failed
run.finished_at = timezone.now()
if summary.rows_failed:
run.error_summary = f"{summary.rows_failed} file/row import error(s)."
run.save(
update_fields=[
"status",
"files_total",
"files_processed",
"rows_total",
"rows_upserted",
"rows_failed",
"error_summary",
"finished_at",
]
)
return run

View File

@ -0,0 +1,3 @@
from .schema import SnapshotSchemaValidator, SnapshotValidationError, SnapshotValidationResult
__all__ = ["SnapshotSchemaValidator", "SnapshotValidationError", "SnapshotValidationResult"]

View File

@ -0,0 +1,203 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
from typing import Any
from django.utils.dateparse import parse_date
REQUIRED_RECORD_FIELDS = {
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
"games_played",
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
}
OPTIONAL_RECORD_FIELDS = {
"first_name",
"last_name",
"birth_date",
"nationality",
"height_cm",
"weight_kg",
"position",
}
ALLOWED_TOP_LEVEL_FIELDS = {
"source_name",
"snapshot_date",
"records",
"source_metadata",
"raw_payload",
}
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | {
"role",
"source_metadata",
"raw_payload",
}
@dataclass
class SnapshotValidationResult:
source_name: str
snapshot_date: date
records: list[dict[str, Any]]
class SnapshotValidationError(ValueError):
pass
class SnapshotSchemaValidator:
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
@staticmethod
def _require_string(value: Any, field: str) -> str:
if not isinstance(value, str) or not value.strip():
raise SnapshotValidationError(f"{field} must be a non-empty string")
return value.strip()
@staticmethod
def _optional_string(value: Any, field: str) -> str | None:
if value in (None, ""):
return None
if not isinstance(value, str):
raise SnapshotValidationError(f"{field} must be a string when provided")
stripped = value.strip()
return stripped or None
@staticmethod
def _require_non_negative_int(value: Any, field: str) -> int:
if isinstance(value, bool):
raise SnapshotValidationError(f"{field} must be a non-negative integer")
try:
parsed = int(value)
except (TypeError, ValueError) as exc:
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
if parsed < 0:
raise SnapshotValidationError(f"{field} must be a non-negative integer")
return parsed
@classmethod
def _optional_non_negative_int(cls, value: Any, field: str) -> int | None:
if value in (None, ""):
return None
return cls._require_non_negative_int(value, field)
@staticmethod
def _require_float(value: Any, field: str) -> float:
try:
parsed = float(value)
except (TypeError, ValueError) as exc:
raise SnapshotValidationError(f"{field} must be numeric") from exc
return parsed
@classmethod
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
if unknown:
raise SnapshotValidationError(
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
)
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
if missing:
raise SnapshotValidationError(
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
)
normalized = dict(record)
for field in (
"competition_external_id",
"competition_name",
"season",
"team_external_id",
"team_name",
"player_external_id",
"full_name",
):
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
for field in ("first_name", "last_name", "nationality", "position"):
normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}")
if record.get("role") is not None:
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
birth_date_raw = record.get("birth_date")
if birth_date_raw in (None, ""):
normalized["birth_date"] = None
else:
birth_date = parse_date(str(birth_date_raw))
if not birth_date:
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
normalized["birth_date"] = birth_date.isoformat()
normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
for field in (
"minutes_per_game",
"points_per_game",
"rebounds_per_game",
"assists_per_game",
"steals_per_game",
"blocks_per_game",
"turnovers_per_game",
"fg_pct",
"three_pt_pct",
"ft_pct",
):
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
return normalized
@classmethod
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
if not isinstance(payload, dict):
raise SnapshotValidationError("Snapshot root must be an object")
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
if unknown:
raise SnapshotValidationError(
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
)
source_name = cls._require_string(payload.get("source_name"), "source_name")
snapshot_date_raw = payload.get("snapshot_date")
snapshot_date = parse_date(str(snapshot_date_raw))
if not snapshot_date:
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
records = payload.get("records")
if not isinstance(records, list) or not records:
raise SnapshotValidationError("records must be a non-empty array")
normalized_records: list[dict[str, Any]] = []
for index, record in enumerate(records):
if not isinstance(record, dict):
raise SnapshotValidationError(f"record[{index}] must be an object")
normalized_records.append(cls._validate_record(record, index=index))
return SnapshotValidationResult(
source_name=source_name,
snapshot_date=snapshot_date,
records=normalized_records,
)

View File

@ -37,6 +37,8 @@ class PlayerCareerEntryInline(admin.TabularInline):
class PlayerAdmin(admin.ModelAdmin): class PlayerAdmin(admin.ModelAdmin):
list_display = ( list_display = (
"full_name", "full_name",
"source_name",
"source_uid",
"birth_date", "birth_date",
"nationality", "nationality",
"nominal_position", "nominal_position",
@ -53,7 +55,7 @@ class PlayerAdmin(admin.ModelAdmin):
"origin_competition", "origin_competition",
"origin_team", "origin_team",
) )
search_fields = ("full_name", "first_name", "last_name") search_fields = ("full_name", "first_name", "last_name", "source_name", "source_uid")
inlines = (PlayerAliasInline, PlayerCareerEntryInline) inlines = (PlayerAliasInline, PlayerCareerEntryInline)
actions = ("recompute_origin_fields",) actions = ("recompute_origin_fields",)

View File

@ -25,10 +25,8 @@ class PlayerSearchForm(forms.Form):
nominal_position = forms.ModelChoiceField(queryset=Position.objects.none(), required=False) nominal_position = forms.ModelChoiceField(queryset=Position.objects.none(), required=False)
inferred_role = forms.ModelChoiceField(queryset=Role.objects.none(), required=False) inferred_role = forms.ModelChoiceField(queryset=Role.objects.none(), required=False)
competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False) competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False)
origin_competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False)
nationality = forms.ModelChoiceField(queryset=Nationality.objects.none(), required=False) nationality = forms.ModelChoiceField(queryset=Nationality.objects.none(), required=False)
team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False) team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False)
origin_team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False)
season = forms.ModelChoiceField(queryset=Season.objects.none(), required=False) season = forms.ModelChoiceField(queryset=Season.objects.none(), required=False)
age_min = forms.IntegerField(required=False, min_value=0, max_value=60, label="Min age") age_min = forms.IntegerField(required=False, min_value=0, max_value=60, label="Min age")
@ -60,20 +58,6 @@ class PlayerSearchForm(forms.Form):
three_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="3P% max") three_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="3P% max")
ft_pct_min = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% min") ft_pct_min = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% min")
ft_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% max") ft_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% max")
efficiency_metric_min = forms.DecimalField(
required=False,
min_value=0,
decimal_places=2,
max_digits=6,
label="Impact metric min",
)
efficiency_metric_max = forms.DecimalField(
required=False,
min_value=0,
decimal_places=2,
max_digits=6,
label="Impact metric max",
)
sort = forms.ChoiceField(choices=SORT_CHOICES, required=False, initial="name_asc") sort = forms.ChoiceField(choices=SORT_CHOICES, required=False, initial="name_asc")
page_size = forms.TypedChoiceField( page_size = forms.TypedChoiceField(
@ -88,10 +72,8 @@ class PlayerSearchForm(forms.Form):
self.fields["nominal_position"].queryset = Position.objects.order_by("code") self.fields["nominal_position"].queryset = Position.objects.order_by("code")
self.fields["inferred_role"].queryset = Role.objects.order_by("name") self.fields["inferred_role"].queryset = Role.objects.order_by("name")
self.fields["competition"].queryset = Competition.objects.order_by("name") self.fields["competition"].queryset = Competition.objects.order_by("name")
self.fields["origin_competition"].queryset = Competition.objects.order_by("name")
self.fields["nationality"].queryset = Nationality.objects.order_by("name") self.fields["nationality"].queryset = Nationality.objects.order_by("name")
self.fields["team"].queryset = Team.objects.order_by("name") self.fields["team"].queryset = Team.objects.order_by("name")
self.fields["origin_team"].queryset = Team.objects.order_by("name")
self.fields["season"].queryset = Season.objects.order_by("-start_date") self.fields["season"].queryset = Season.objects.order_by("-start_date")
def clean(self): def clean(self):
@ -110,7 +92,6 @@ class PlayerSearchForm(forms.Form):
self._validate_min_max(cleaned_data, "fg_pct_min", "fg_pct_max") self._validate_min_max(cleaned_data, "fg_pct_min", "fg_pct_max")
self._validate_min_max(cleaned_data, "three_pct_min", "three_pct_max") self._validate_min_max(cleaned_data, "three_pct_min", "three_pct_max")
self._validate_min_max(cleaned_data, "ft_pct_min", "ft_pct_max") self._validate_min_max(cleaned_data, "ft_pct_min", "ft_pct_max")
self._validate_min_max(cleaned_data, "efficiency_metric_min", "efficiency_metric_max")
if not cleaned_data.get("sort"): if not cleaned_data.get("sort"):
cleaned_data["sort"] = "name_asc" cleaned_data["sort"] = "name_asc"

View File

@ -0,0 +1,24 @@
# Generated by Django 5.2.12 on 2026-03-13 12:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('competitions', '0003_competition_source_uid_season_source_uid_and_more'),
('players', '0005_player_weight_index'),
('teams', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='player',
name='source_uid',
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
),
migrations.AddIndex(
model_name='player',
index=models.Index(fields=['source_uid'], name='players_pla_source__93bb47_idx'),
),
]

View File

@ -0,0 +1,39 @@
# Generated by Django 5.2.12 on 2026-03-13 15:08
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("players", "0006_player_source_uid_and_more"),
]
operations = [
migrations.AddField(
model_name="player",
name="source_name",
field=models.CharField(blank=True, default="", max_length=120),
),
migrations.AlterField(
model_name="player",
name="source_uid",
field=models.CharField(blank=True, max_length=120, null=True),
),
migrations.RemoveConstraint(
model_name="player",
name="uq_player_full_name_birth_date",
),
migrations.AddConstraint(
model_name="player",
constraint=models.UniqueConstraint(
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
fields=("source_name", "source_uid"),
name="uq_player_source_namespace_uid",
),
),
migrations.AddIndex(
model_name="player",
index=models.Index(fields=["source_name", "source_uid"], name="players_pla_source__73848c_idx"),
),
]

View File

@ -58,6 +58,8 @@ class Player(TimeStampedModel):
first_name = models.CharField(max_length=120) first_name = models.CharField(max_length=120)
last_name = models.CharField(max_length=120) last_name = models.CharField(max_length=120)
full_name = models.CharField(max_length=260) full_name = models.CharField(max_length=260)
source_name = models.CharField(max_length=120, blank=True, default="")
source_uid = models.CharField(max_length=120, blank=True, null=True)
birth_date = models.DateField(blank=True, null=True) birth_date = models.DateField(blank=True, null=True)
nationality = models.ForeignKey( nationality = models.ForeignKey(
"players.Nationality", "players.Nationality",
@ -108,12 +110,15 @@ class Player(TimeStampedModel):
ordering = ["full_name", "id"] ordering = ["full_name", "id"]
constraints = [ constraints = [
models.UniqueConstraint( models.UniqueConstraint(
fields=["full_name", "birth_date"], fields=["source_name", "source_uid"],
name="uq_player_full_name_birth_date", condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
name="uq_player_source_namespace_uid",
) )
] ]
indexes = [ indexes = [
models.Index(fields=["full_name"]), models.Index(fields=["full_name"]),
models.Index(fields=["source_name", "source_uid"]),
models.Index(fields=["source_uid"]),
models.Index(fields=["last_name", "first_name"]), models.Index(fields=["last_name", "first_name"]),
models.Index(fields=["birth_date"]), models.Index(fields=["birth_date"]),
models.Index(fields=["nationality"]), models.Index(fields=["nationality"]),

View File

@ -14,7 +14,6 @@ from django.db.models import (
Value, Value,
When, When,
) )
from django.db.models.functions import Coalesce
from apps.players.models import Player from apps.players.models import Player
from apps.stats.models import PlayerSeason from apps.stats.models import PlayerSeason
@ -22,7 +21,8 @@ from apps.stats.models import PlayerSeason
METRIC_SORT_KEYS = {"ppg_desc", "ppg_asc", "mpg_desc", "mpg_asc"} METRIC_SORT_KEYS = {"ppg_desc", "ppg_asc", "mpg_desc", "mpg_asc"}
SEARCH_METRIC_SEMANTICS_TEXT = ( SEARCH_METRIC_SEMANTICS_TEXT = (
"Search metrics are best eligible values per player (max per metric across eligible player-season rows). " "Search metrics are best eligible values per player (max per metric across eligible player-season rows). "
"With season/team/competition/stat filters, eligibility is scoped by those filters." "With season/team/competition/stat filters, eligibility is scoped by those filters. "
"When no eligible stat exists in the current filter context, metric cells show '-'."
) )
@ -73,8 +73,6 @@ def _season_scope_filter_keys() -> tuple[str, ...]:
"three_pct_max", "three_pct_max",
"ft_pct_min", "ft_pct_min",
"ft_pct_max", "ft_pct_max",
"efficiency_metric_min",
"efficiency_metric_max",
) )
@ -121,7 +119,6 @@ def _apply_player_season_scope_filters(queryset, data: dict):
("fg_pct_min", "fg_pct_max", "stats__fg_pct"), ("fg_pct_min", "fg_pct_max", "stats__fg_pct"),
("three_pct_min", "three_pct_max", "stats__three_pct"), ("three_pct_min", "three_pct_max", "stats__three_pct"),
("ft_pct_min", "ft_pct_max", "stats__ft_pct"), ("ft_pct_min", "ft_pct_max", "stats__ft_pct"),
("efficiency_metric_min", "efficiency_metric_max", "stats__player_efficiency_rating"),
) )
for min_key, max_key, field_name in stat_pairs: for min_key, max_key, field_name in stat_pairs:
queryset = _apply_min_max_filter(queryset, min_key, max_key, field_name, data) queryset = _apply_min_max_filter(queryset, min_key, max_key, field_name, data)
@ -149,11 +146,6 @@ def _build_metric_context_filter(data: dict) -> Q:
("fg_pct_min", "fg_pct_max", "player_seasons__stats__fg_pct"), ("fg_pct_min", "fg_pct_max", "player_seasons__stats__fg_pct"),
("three_pct_min", "three_pct_max", "player_seasons__stats__three_pct"), ("three_pct_min", "three_pct_max", "player_seasons__stats__three_pct"),
("ft_pct_min", "ft_pct_max", "player_seasons__stats__ft_pct"), ("ft_pct_min", "ft_pct_max", "player_seasons__stats__ft_pct"),
(
"efficiency_metric_min",
"efficiency_metric_max",
"player_seasons__stats__player_efficiency_rating",
),
) )
for min_key, max_key, field_name in minmax_pairs: for min_key, max_key, field_name in minmax_pairs:
min_value = data.get(min_key) min_value = data.get(min_key)
@ -188,10 +180,6 @@ def filter_players(queryset, data: dict):
queryset = queryset.filter(inferred_role=data["inferred_role"]) queryset = queryset.filter(inferred_role=data["inferred_role"])
if data.get("nationality"): if data.get("nationality"):
queryset = queryset.filter(nationality=data["nationality"]) queryset = queryset.filter(nationality=data["nationality"])
if data.get("origin_competition"):
queryset = queryset.filter(origin_competition=data["origin_competition"])
if data.get("origin_team"):
queryset = queryset.filter(origin_team=data["origin_team"])
queryset = _apply_min_max_filter(queryset, "height_min", "height_max", "height_cm", data) queryset = _apply_min_max_filter(queryset, "height_min", "height_max", "height_cm", data)
queryset = _apply_min_max_filter(queryset, "weight_min", "weight_max", "weight_kg", data) queryset = _apply_min_max_filter(queryset, "weight_min", "weight_max", "weight_kg", data)
@ -235,47 +223,62 @@ def annotate_player_metrics(queryset, data: dict | None = None):
output_field=FloatField(), output_field=FloatField(),
), ),
), ),
default=Value(0.0), default=Value(None),
output_field=FloatField(), output_field=FloatField(),
) )
return queryset.annotate( return queryset.annotate(
games_played_value=Coalesce( games_played_value=Max(
Max("player_seasons__games_played", filter=context_filter), "player_seasons__games_played",
Value(0, output_field=IntegerField()), filter=context_filter,
output_field=IntegerField(), output_field=IntegerField(),
), ),
mpg_value=Coalesce(Max(mpg_expression, filter=context_filter), Value(0.0)), mpg_value=Max(mpg_expression, filter=context_filter),
ppg_value=Coalesce( ppg_value=Max(
Max("player_seasons__stats__points", filter=context_filter), "player_seasons__stats__points",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
rpg_value=Coalesce( rpg_value=Max(
Max("player_seasons__stats__rebounds", filter=context_filter), "player_seasons__stats__rebounds",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
apg_value=Coalesce( apg_value=Max(
Max("player_seasons__stats__assists", filter=context_filter), "player_seasons__stats__assists",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
spg_value=Coalesce( spg_value=Max(
Max("player_seasons__stats__steals", filter=context_filter), "player_seasons__stats__steals",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
bpg_value=Coalesce( bpg_value=Max(
Max("player_seasons__stats__blocks", filter=context_filter), "player_seasons__stats__blocks",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
top_efficiency=Coalesce( tov_value=Max(
Max("player_seasons__stats__player_efficiency_rating", filter=context_filter), "player_seasons__stats__turnovers",
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)), filter=context_filter,
output_field=DecimalField(max_digits=6, decimal_places=2), output_field=DecimalField(max_digits=6, decimal_places=2),
), ),
fg_pct_value=Max(
"player_seasons__stats__fg_pct",
filter=context_filter,
output_field=DecimalField(max_digits=5, decimal_places=2),
),
three_pct_value=Max(
"player_seasons__stats__three_pct",
filter=context_filter,
output_field=DecimalField(max_digits=5, decimal_places=2),
),
ft_pct_value=Max(
"player_seasons__stats__ft_pct",
filter=context_filter,
output_field=DecimalField(max_digits=5, decimal_places=2),
),
) )

View File

@ -7,7 +7,7 @@ from apps.scouting.models import FavoritePlayer
from apps.stats.models import PlayerSeason from apps.stats.models import PlayerSeason
from .forms import PlayerSearchForm from .forms import PlayerSearchForm
from .models import Player, PlayerCareerEntry from .models import Player
from .services.search import ( from .services.search import (
SEARCH_METRIC_SEMANTICS_TEXT, SEARCH_METRIC_SEMANTICS_TEXT,
annotate_player_metrics, annotate_player_metrics,
@ -92,12 +92,6 @@ class PlayerDetailView(DetailView):
"competition", "competition",
"stats", "stats",
).order_by("-season__start_date", "-id") ).order_by("-season__start_date", "-id")
career_queryset = PlayerCareerEntry.objects.select_related(
"team",
"competition",
"season",
"role_snapshot",
).order_by("-start_date", "-id")
return ( return (
Player.objects.select_related( Player.objects.select_related(
@ -108,9 +102,7 @@ class PlayerDetailView(DetailView):
"origin_team", "origin_team",
) )
.prefetch_related( .prefetch_related(
"aliases",
Prefetch("player_seasons", queryset=season_queryset), Prefetch("player_seasons", queryset=season_queryset),
Prefetch("career_entries", queryset=career_queryset),
) )
) )
@ -146,7 +138,6 @@ class PlayerDetailView(DetailView):
context["age"] = calculate_age(player.birth_date) context["age"] = calculate_age(player.birth_date)
context["current_assignment"] = current_assignment context["current_assignment"] = current_assignment
context["career_entries"] = player.career_entries.all()
context["season_rows"] = season_rows context["season_rows"] = season_rows
context["is_favorite"] = False context["is_favorite"] = False
if self.request.user.is_authenticated: if self.request.user.is_authenticated:

View File

@ -1,4 +1,8 @@
from django import forms from django import forms
import json
from decimal import Decimal
from apps.players.forms import PlayerSearchForm
from .models import SavedSearch from .models import SavedSearch
@ -10,3 +14,61 @@ class SavedSearchForm(forms.ModelForm):
widgets = { widgets = {
"name": forms.TextInput(attrs={"placeholder": "e.g. EuroLeague guards under 24"}), "name": forms.TextInput(attrs={"placeholder": "e.g. EuroLeague guards under 24"}),
} }
class SavedSearchUpdateForm(forms.ModelForm):
filters_json = forms.CharField(
required=False,
label="Filters (JSON)",
widget=forms.Textarea(attrs={"rows": 8, "class": "font-mono"}),
help_text="Structured search filters payload. Leave blank to keep current filters.",
)
class Meta:
model = SavedSearch
fields = ["name", "is_public", "filters_json"]
widgets = {
"name": forms.TextInput(attrs={"placeholder": "e.g. Italian wings - updated"}),
}
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.instance and self.instance.pk and not self.initial.get("filters_json"):
self.initial["filters_json"] = json.dumps(self.instance.filters, indent=2, sort_keys=True)
def clean_filters_json(self):
raw = self.cleaned_data.get("filters_json")
if not raw:
return self.instance.filters
try:
parsed = json.loads(raw)
except json.JSONDecodeError as exc:
raise forms.ValidationError("Invalid JSON format.") from exc
if not isinstance(parsed, dict):
raise forms.ValidationError("Filters JSON must be an object.")
form = PlayerSearchForm(parsed)
if not form.is_valid():
raise forms.ValidationError("Filters JSON contains invalid search parameters.")
validated = {}
for key, value in form.cleaned_data.items():
if value in (None, ""):
continue
if hasattr(value, "pk"):
validated[key] = value.pk
elif isinstance(value, Decimal):
validated[key] = str(value)
else:
validated[key] = value
if not validated:
raise forms.ValidationError("Filters JSON does not contain valid searchable filters.")
return validated
def save(self, commit=True):
instance = super().save(commit=False)
instance.filters = self.cleaned_data["filters_json"]
if commit:
instance.save()
return instance

View File

@ -7,7 +7,7 @@ from django.utils import timezone
from django.views import View from django.views import View
from django.views.generic import ListView, TemplateView, UpdateView from django.views.generic import ListView, TemplateView, UpdateView
from .forms import SavedSearchForm from .forms import SavedSearchForm, SavedSearchUpdateForm
from .models import FavoritePlayer, SavedSearch from .models import FavoritePlayer, SavedSearch
from .services.saved_searches import extract_filters_from_params, saved_search_to_querystring from .services.saved_searches import extract_filters_from_params, saved_search_to_querystring
@ -81,7 +81,7 @@ class SavedSearchCreateView(LoginRequiredMixin, View):
class SavedSearchUpdateView(LoginRequiredMixin, UpdateView): class SavedSearchUpdateView(LoginRequiredMixin, UpdateView):
model = SavedSearch model = SavedSearch
form_class = SavedSearchForm form_class = SavedSearchUpdateForm
template_name = "scouting/saved_search_edit.html" template_name = "scouting/saved_search_edit.html"
def get_queryset(self): def get_queryset(self):
@ -96,6 +96,13 @@ class SavedSearchDeleteView(LoginRequiredMixin, View):
def post(self, request, pk, *args, **kwargs): def post(self, request, pk, *args, **kwargs):
saved_search = get_object_or_404(SavedSearch, pk=pk, user=request.user) saved_search = get_object_or_404(SavedSearch, pk=pk, user=request.user)
saved_search.delete() saved_search.delete()
if request.headers.get("HX-Request") == "true":
saved_searches = SavedSearch.objects.filter(user=request.user).order_by("-updated_at")
return render(
request,
"scouting/partials/saved_search_table.html",
{"saved_searches": saved_searches},
)
messages.success(request, "Saved search deleted.") messages.success(request, "Saved search deleted.")
return redirect("scouting:index") return redirect("scouting:index")

View File

@ -5,9 +5,9 @@ from .models import PlayerSeason, PlayerSeasonStats
@admin.register(PlayerSeason) @admin.register(PlayerSeason)
class PlayerSeasonAdmin(admin.ModelAdmin): class PlayerSeasonAdmin(admin.ModelAdmin):
list_display = ("player", "season", "team", "competition", "games_played", "minutes_played") list_display = ("player", "season", "source_uid", "team", "competition", "games_played", "minutes_played")
list_filter = ("season", "competition") list_filter = ("season", "competition")
search_fields = ("player__full_name", "team__name", "competition__name", "season__label") search_fields = ("player__full_name", "team__name", "competition__name", "season__label", "source_uid")
@admin.register(PlayerSeasonStats) @admin.register(PlayerSeasonStats)

View File

@ -0,0 +1,25 @@
# Generated by Django 5.2.12 on 2026-03-13 12:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('competitions', '0003_competition_source_uid_season_source_uid_and_more'),
('players', '0006_player_source_uid_and_more'),
('stats', '0002_playerseasonstats_search_indexes'),
('teams', '0002_team_source_uid_team_teams_team_source__940258_idx'),
]
operations = [
migrations.AddField(
model_name='playerseason',
name='source_uid',
field=models.CharField(blank=True, max_length=160, null=True, unique=True),
),
migrations.AddIndex(
model_name='playerseason',
index=models.Index(fields=['source_uid'], name='stats_playe_source__57b701_idx'),
),
]

View File

@ -4,6 +4,7 @@ from django.db import models
class PlayerSeason(models.Model): class PlayerSeason(models.Model):
player = models.ForeignKey("players.Player", on_delete=models.CASCADE, related_name="player_seasons") player = models.ForeignKey("players.Player", on_delete=models.CASCADE, related_name="player_seasons")
season = models.ForeignKey("competitions.Season", on_delete=models.CASCADE, related_name="player_seasons") season = models.ForeignKey("competitions.Season", on_delete=models.CASCADE, related_name="player_seasons")
source_uid = models.CharField(max_length=160, blank=True, null=True, unique=True)
team = models.ForeignKey( team = models.ForeignKey(
"teams.Team", "teams.Team",
on_delete=models.SET_NULL, on_delete=models.SET_NULL,
@ -31,6 +32,7 @@ class PlayerSeason(models.Model):
) )
] ]
indexes = [ indexes = [
models.Index(fields=["source_uid"]),
models.Index(fields=["player", "season"]), models.Index(fields=["player", "season"]),
models.Index(fields=["season", "team"]), models.Index(fields=["season", "team"]),
models.Index(fields=["season", "competition"]), models.Index(fields=["season", "competition"]),

View File

@ -5,6 +5,6 @@ from .models import Team
@admin.register(Team) @admin.register(Team)
class TeamAdmin(admin.ModelAdmin): class TeamAdmin(admin.ModelAdmin):
list_display = ("name", "short_name", "country", "is_national_team") list_display = ("name", "source_name", "source_uid", "short_name", "country", "is_national_team")
list_filter = ("is_national_team", "country") list_filter = ("is_national_team", "country")
search_fields = ("name", "short_name", "slug") search_fields = ("name", "short_name", "slug", "source_name", "source_uid")

View File

@ -0,0 +1,23 @@
# Generated by Django 5.2.12 on 2026-03-13 12:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('players', '0006_player_source_uid_and_more'),
('teams', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='team',
name='source_uid',
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
),
migrations.AddIndex(
model_name='team',
index=models.Index(fields=['source_uid'], name='teams_team_source__940258_idx'),
),
]

View File

@ -0,0 +1,35 @@
# Generated by Django 5.2.12 on 2026-03-13 15:08
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("teams", "0002_team_source_uid_team_teams_team_source__940258_idx"),
]
operations = [
migrations.AddField(
model_name="team",
name="source_name",
field=models.CharField(blank=True, default="", max_length=120),
),
migrations.AlterField(
model_name="team",
name="source_uid",
field=models.CharField(blank=True, max_length=120, null=True),
),
migrations.AddConstraint(
model_name="team",
constraint=models.UniqueConstraint(
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
fields=("source_name", "source_uid"),
name="uq_team_source_namespace_uid",
),
),
migrations.AddIndex(
model_name="team",
index=models.Index(fields=["source_name", "source_uid"], name="teams_team_source__8035ae_idx"),
),
]

View File

@ -5,6 +5,8 @@ class Team(models.Model):
name = models.CharField(max_length=200) name = models.CharField(max_length=200)
short_name = models.CharField(max_length=80, blank=True) short_name = models.CharField(max_length=80, blank=True)
slug = models.SlugField(max_length=220, unique=True) slug = models.SlugField(max_length=220, unique=True)
source_name = models.CharField(max_length=120, blank=True, default="")
source_uid = models.CharField(max_length=120, blank=True, null=True)
country = models.ForeignKey( country = models.ForeignKey(
"players.Nationality", "players.Nationality",
on_delete=models.SET_NULL, on_delete=models.SET_NULL,
@ -20,11 +22,18 @@ class Team(models.Model):
class Meta: class Meta:
ordering = ["name"] ordering = ["name"]
constraints = [ constraints = [
models.UniqueConstraint(fields=["name", "country"], name="uq_team_name_country") models.UniqueConstraint(fields=["name", "country"], name="uq_team_name_country"),
models.UniqueConstraint(
fields=["source_name", "source_uid"],
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
name="uq_team_source_namespace_uid",
),
] ]
indexes = [ indexes = [
models.Index(fields=["name"]), models.Index(fields=["name"]),
models.Index(fields=["slug"]), models.Index(fields=["slug"]),
models.Index(fields=["source_name", "source_uid"]),
models.Index(fields=["source_uid"]),
models.Index(fields=["country"]), models.Index(fields=["country"]),
models.Index(fields=["is_national_team"]), models.Index(fields=["is_national_team"]),
] ]

View File

@ -1,3 +1,8 @@
from .celery import app as celery_app """
HoopScout v2 runtime package.
__all__ = ("celery_app",) Celery is intentionally not auto-loaded at import time in v2 foundation runtime.
Legacy task modules remain in-repo and can be loaded explicitly if needed.
"""
__all__ = ()

View File

@ -28,12 +28,12 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
def build_periodic_schedule() -> dict: def build_periodic_schedule() -> dict:
if not settings.INGESTION_SCHEDULE_ENABLED: if not getattr(settings, "INGESTION_SCHEDULE_ENABLED", False):
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.") logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
return {} return {}
try: try:
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON) schedule_kwargs = _parse_cron_expression(getattr(settings, "INGESTION_SCHEDULE_CRON", "*/30 * * * *"))
return { return {
"ingestion.scheduled_provider_sync": { "ingestion.scheduled_provider_sync": {
"task": "apps.ingestion.tasks.scheduled_provider_sync", "task": "apps.ingestion.tasks.scheduled_provider_sync",
@ -44,7 +44,7 @@ def build_periodic_schedule() -> dict:
logger.error( logger.error(
"Invalid periodic ingestion schedule config. Task disabled. " "Invalid periodic ingestion schedule config. Task disabled. "
"INGESTION_SCHEDULE_CRON=%r error=%s", "INGESTION_SCHEDULE_CRON=%r error=%s",
settings.INGESTION_SCHEDULE_CRON, getattr(settings, "INGESTION_SCHEDULE_CRON", ""),
exc, exc,
) )
return {} return {}

View File

@ -72,10 +72,14 @@ INSTALLED_APPS = [
"apps.teams", "apps.teams",
"apps.stats", "apps.stats",
"apps.scouting", "apps.scouting",
"apps.providers",
"apps.ingestion", "apps.ingestion",
] ]
# v2 default runtime is snapshot-first. Legacy provider stack is opt-in.
LEGACY_PROVIDER_STACK_ENABLED = env_bool("LEGACY_PROVIDER_STACK_ENABLED", False)
if LEGACY_PROVIDER_STACK_ENABLED:
INSTALLED_APPS.append("apps.providers")
MIDDLEWARE = [ MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware", "django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware", "django.contrib.sessions.middleware.SessionMiddleware",
@ -142,47 +146,83 @@ LOGIN_URL = "users:login"
LOGIN_REDIRECT_URL = "core:dashboard" LOGIN_REDIRECT_URL = "core:dashboard"
LOGOUT_REDIRECT_URL = "core:home" LOGOUT_REDIRECT_URL = "core:home"
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0") # HoopScout v2 static dataset storage (volume-backed directories).
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0") STATIC_DATASET_INCOMING_DIR = os.getenv(
CELERY_ACCEPT_CONTENT = ["json"] "STATIC_DATASET_INCOMING_DIR",
CELERY_TASK_SERIALIZER = "json" os.getenv("SNAPSHOT_INCOMING_DIR", str(BASE_DIR / "snapshots" / "incoming")),
CELERY_RESULT_SERIALIZER = "json"
CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
INGESTION_SCHEDULE_ENABLED = env_bool("INGESTION_SCHEDULE_ENABLED", False)
INGESTION_SCHEDULE_CRON = os.getenv("INGESTION_SCHEDULE_CRON", "*/30 * * * *").strip()
INGESTION_SCHEDULE_PROVIDER_NAMESPACE = os.getenv("INGESTION_SCHEDULE_PROVIDER_NAMESPACE", "").strip()
INGESTION_SCHEDULE_JOB_TYPE = os.getenv("INGESTION_SCHEDULE_JOB_TYPE", "incremental").strip().lower()
INGESTION_PREVENT_OVERLAP = env_bool("INGESTION_PREVENT_OVERLAP", True)
INGESTION_OVERLAP_WINDOW_MINUTES = int(os.getenv("INGESTION_OVERLAP_WINDOW_MINUTES", "180"))
if INGESTION_SCHEDULE_JOB_TYPE not in {"incremental", "full_sync"}:
raise ImproperlyConfigured("INGESTION_SCHEDULE_JOB_TYPE must be either 'incremental' or 'full_sync'.")
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
PROVIDER_MVP_DATA_FILE = os.getenv(
"PROVIDER_MVP_DATA_FILE",
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
) )
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3")) STATIC_DATASET_ARCHIVE_DIR = os.getenv(
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1")) "STATIC_DATASET_ARCHIVE_DIR",
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10")) os.getenv("SNAPSHOT_ARCHIVE_DIR", str(BASE_DIR / "snapshots" / "archive")),
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io") )
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "") STATIC_DATASET_FAILED_DIR = os.getenv(
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5")) "STATIC_DATASET_FAILED_DIR",
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100")) os.getenv("SNAPSHOT_FAILED_DIR", str(BASE_DIR / "snapshots" / "failed")),
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10")) )
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False) # v2 extractor framework runtime settings.
PROVIDER_BALLDONTLIE_SEASONS = [ EXTRACTOR_USER_AGENT = os.getenv("EXTRACTOR_USER_AGENT", "HoopScoutBot/2.0 (+https://younerd.org)")
int(value.strip()) EXTRACTOR_HTTP_TIMEOUT_SECONDS = float(os.getenv("EXTRACTOR_HTTP_TIMEOUT_SECONDS", "15"))
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",") EXTRACTOR_HTTP_RETRIES = int(os.getenv("EXTRACTOR_HTTP_RETRIES", "2"))
if value.strip().isdigit() EXTRACTOR_RETRY_SLEEP_SECONDS = float(os.getenv("EXTRACTOR_RETRY_SLEEP_SECONDS", "1.0"))
] EXTRACTOR_REQUEST_DELAY_SECONDS = float(os.getenv("EXTRACTOR_REQUEST_DELAY_SECONDS", "0.5"))
EXTRACTOR_PUBLIC_JSON_URL = os.getenv("EXTRACTOR_PUBLIC_JSON_URL", "").strip()
EXTRACTOR_PUBLIC_SOURCE_NAME = os.getenv("EXTRACTOR_PUBLIC_SOURCE_NAME", "public_json_source").strip()
EXTRACTOR_INCLUDE_RAW_PAYLOAD = env_bool("EXTRACTOR_INCLUDE_RAW_PAYLOAD", False)
EXTRACTOR_LBA_STATS_URL = os.getenv("EXTRACTOR_LBA_STATS_URL", "").strip()
EXTRACTOR_LBA_SEASON_LABEL = os.getenv("EXTRACTOR_LBA_SEASON_LABEL", "").strip()
EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = os.getenv("EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID", "lba-serie-a").strip()
EXTRACTOR_LBA_COMPETITION_NAME = os.getenv("EXTRACTOR_LBA_COMPETITION_NAME", "Lega Basket Serie A").strip()
EXTRACTOR_BCL_STATS_URL = os.getenv("EXTRACTOR_BCL_STATS_URL", "").strip()
EXTRACTOR_BCL_SEASON_LABEL = os.getenv("EXTRACTOR_BCL_SEASON_LABEL", "").strip()
EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = os.getenv("EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID", "bcl").strip()
EXTRACTOR_BCL_COMPETITION_NAME = os.getenv("EXTRACTOR_BCL_COMPETITION_NAME", "Basketball Champions League").strip()
# Simple daily orchestration settings (extract -> import).
DAILY_ORCHESTRATION_EXTRACTORS = os.getenv("DAILY_ORCHESTRATION_EXTRACTORS", "lba,bcl")
DAILY_ORCHESTRATION_INTERVAL_SECONDS = int(os.getenv("DAILY_ORCHESTRATION_INTERVAL_SECONDS", "86400"))
if EXTRACTOR_HTTP_TIMEOUT_SECONDS <= 0:
raise ImproperlyConfigured("EXTRACTOR_HTTP_TIMEOUT_SECONDS must be > 0.")
if EXTRACTOR_HTTP_RETRIES < 0:
raise ImproperlyConfigured("EXTRACTOR_HTTP_RETRIES must be >= 0.")
if EXTRACTOR_RETRY_SLEEP_SECONDS < 0:
raise ImproperlyConfigured("EXTRACTOR_RETRY_SLEEP_SECONDS must be >= 0.")
if EXTRACTOR_REQUEST_DELAY_SECONDS < 0:
raise ImproperlyConfigured("EXTRACTOR_REQUEST_DELAY_SECONDS must be >= 0.")
if DAILY_ORCHESTRATION_INTERVAL_SECONDS < 60:
raise ImproperlyConfigured("DAILY_ORCHESTRATION_INTERVAL_SECONDS must be >= 60.")
# Optional scheduler command settings for future v2 snapshot jobs.
SCHEDULER_ENABLED = env_bool("SCHEDULER_ENABLED", False)
SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))
if SCHEDULER_INTERVAL_SECONDS < 30:
raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.")
if LEGACY_PROVIDER_STACK_ENABLED:
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
PROVIDER_MVP_DATA_FILE = os.getenv(
"PROVIDER_MVP_DATA_FILE",
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
)
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
PROVIDER_BALLDONTLIE_SEASONS = [
int(value.strip())
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
if value.strip().isdigit()
]
LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper() LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper()
LOG_SQL = env_bool("DJANGO_LOG_SQL", False) LOG_SQL = env_bool("DJANGO_LOG_SQL", False)

View File

@ -1,4 +1,5 @@
from django.contrib import admin from django.contrib import admin
from django.conf import settings
from django.urls import include, path from django.urls import include, path
urlpatterns = [ urlpatterns = [
@ -11,6 +12,8 @@ urlpatterns = [
path("teams/", include("apps.teams.urls")), path("teams/", include("apps.teams.urls")),
path("stats/", include("apps.stats.urls")), path("stats/", include("apps.stats.urls")),
path("scouting/", include("apps.scouting.urls")), path("scouting/", include("apps.scouting.urls")),
path("providers/", include("apps.providers.urls")),
path("ingestion/", include("apps.ingestion.urls")), path("ingestion/", include("apps.ingestion.urls")),
] ]
if settings.LEGACY_PROVIDER_STACK_ENABLED:
urlpatterns.append(path("providers/", include("apps.providers.urls")))

View File

@ -3,25 +3,28 @@ services:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes: volumes:
- .:/app - .:/app
- node_modules_data:/app/node_modules - static_data_dev:/app/staticfiles
- static_data:/app/staticfiles - media_data_dev:/app/media
- media_data:/app/media - snapshots_incoming_dev:/app/snapshots/incoming
- runtime_data:/app/runtime - snapshots_archive_dev:/app/snapshots/archive
- snapshots_failed_dev:/app/snapshots/failed
celery_worker: nginx:
volumes:
- static_data_dev:/var/www/static:ro
- media_data_dev:/var/www/media:ro
scheduler:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes: volumes:
- .:/app - .:/app
- runtime_data:/app/runtime - snapshots_incoming_dev:/app/snapshots/incoming
- snapshots_archive_dev:/app/snapshots/archive
- snapshots_failed_dev:/app/snapshots/failed
celery_beat: volumes:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" static_data_dev:
volumes: media_data_dev:
- .:/app snapshots_incoming_dev:
- runtime_data:/app/runtime snapshots_archive_dev:
snapshots_failed_dev:
tailwind:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- node_modules_data:/app/node_modules

View File

@ -2,14 +2,5 @@ services:
web: web:
environment: environment:
DJANGO_SETTINGS_MODULE: config.settings.production DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_DEBUG: "0" DJANGO_ENV: production
celery_worker:
environment:
DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_DEBUG: "0"
celery_beat:
environment:
DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_DEBUG: "0" DJANGO_DEBUG: "0"

View File

@ -1,13 +1,43 @@
services: services:
web:
image: registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
depends_on:
postgres:
condition: service_healthy
user: "10001:10001"
volumes:
- static_data:/app/staticfiles
- media_data:/app/media
- snapshots_incoming:/app/snapshots/incoming
- snapshots_archive:/app/snapshots/archive
- snapshots_failed:/app/snapshots/failed
expose:
- "8000"
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/health/ || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 25s
restart: unless-stopped
nginx: nginx:
image: nginx:1.27-alpine image: registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}
build:
context: .
dockerfile: nginx/Dockerfile
depends_on: depends_on:
web: web:
condition: service_healthy condition: service_healthy
ports: ports:
- "80:80" - "80:80"
volumes: volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- static_data:/var/www/static:ro - static_data:/var/www/static:ro
- media_data:/var/www/media:ro - media_data:/var/www/media:ro
read_only: true read_only: true
@ -22,91 +52,6 @@ services:
start_period: 10s start_period: 10s
restart: unless-stopped restart: unless-stopped
web:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
user: "10001:10001"
volumes:
- static_data:/app/staticfiles
- media_data:/app/media
- runtime_data:/app/runtime
expose:
- "8000"
healthcheck:
test: ["CMD-SHELL", "curl -f http://127.0.0.1:8000/health/ || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 20s
restart: unless-stopped
tailwind:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
command: npm run dev
user: "10001:10001"
profiles:
- dev
restart: unless-stopped
celery_worker:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: celery -A config worker -l info
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
healthcheck:
test: ["CMD-SHELL", "celery -A config inspect ping -d celery@$$HOSTNAME | grep -q pong || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: unless-stopped
celery_beat:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
healthcheck:
test: ["CMD-SHELL", "test -f /app/runtime/celerybeat-schedule || exit 1"]
interval: 30s
timeout: 5s
retries: 10
start_period: 20s
restart: unless-stopped
postgres: postgres:
image: postgres:16-alpine image: postgres:16-alpine
environment: environment:
@ -122,22 +67,39 @@ services:
retries: 5 retries: 5
restart: unless-stopped restart: unless-stopped
redis: scheduler:
image: redis:7-alpine profiles: ["scheduler"]
command: redis-server --save 60 1 --loglevel warning image: registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
environment:
SCHEDULER_ENABLED: ${SCHEDULER_ENABLED:-0}
SCHEDULER_DISABLED_SLEEP_SECONDS: ${SCHEDULER_DISABLED_SLEEP_SECONDS:-300}
DAILY_ORCHESTRATION_INTERVAL_SECONDS: ${DAILY_ORCHESTRATION_INTERVAL_SECONDS:-86400}
command: /app/scripts/scheduler.sh
depends_on:
postgres:
condition: service_healthy
user: "10001:10001"
volumes: volumes:
- redis_data:/data - snapshots_incoming:/app/snapshots/incoming
- snapshots_archive:/app/snapshots/archive
- snapshots_failed:/app/snapshots/failed
healthcheck: healthcheck:
test: ["CMD", "redis-cli", "ping"] test: ["CMD-SHELL", "grep -qa 'scheduler.sh' /proc/1/cmdline || exit 1"]
interval: 10s interval: 30s
timeout: 5s timeout: 5s
retries: 5 retries: 3
start_period: 20s
restart: unless-stopped restart: unless-stopped
volumes: volumes:
postgres_data: postgres_data:
static_data: static_data:
media_data: media_data:
runtime_data: snapshots_incoming:
redis_data: snapshots_archive:
node_modules_data: snapshots_failed:

View File

@ -0,0 +1,58 @@
# Runtime Consistency Checklist (v2)
Use this checklist when runtime/docs changes are made.
## Compose and Runtime
- `docker-compose.yml` contains only v2 default runtime services:
- `web`, `nginx`, `postgres`
- optional `scheduler` profile service
- `docker-compose.dev.yml` is mutable (source bind mounts allowed for dev only).
- `docker-compose.release.yml` is settings-focused and keeps release runtime immutable.
## Image/Registry Strategy
- `web` image: `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}`
- `nginx` image: `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}`
- optional scheduler image: `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
## Entrypoints
- `entrypoint.sh`:
- waits for PostgreSQL
- creates snapshot directories
- optionally runs `migrate` and `collectstatic` when booting gunicorn
- `scripts/scheduler.sh`:
- runs `run_daily_orchestration` loop
- idle-sleeps when `SCHEDULER_ENABLED=0`
## Snapshot Lifecycle
1. Extractor writes snapshots to `incoming`.
2. `import_snapshots` validates + upserts into PostgreSQL.
3. Success => file moved to `archive`.
4. Failure => file moved to `failed`.
## Source Identity Rule
Raw IDs are not global. Imported identities are namespaced by source:
- `Competition`: `(source_name, source_uid)`
- `Team`: `(source_name, source_uid)`
- `Player`: `(source_name, source_uid)`
## Legacy Isolation
- `LEGACY_PROVIDER_STACK_ENABLED=0` by default.
- With default setting:
- `apps.providers` is not installed
- `/providers/` routes are not mounted
- legacy provider settings are not required
## Verification Commands
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml config
./scripts/verify_release_topology.sh
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
```

View File

@ -8,6 +8,10 @@ done
echo "PostgreSQL is available." echo "PostgreSQL is available."
mkdir -p "${STATIC_DATASET_INCOMING_DIR:-${SNAPSHOT_INCOMING_DIR:-/app/snapshots/incoming}}" \
"${STATIC_DATASET_ARCHIVE_DIR:-${SNAPSHOT_ARCHIVE_DIR:-/app/snapshots/archive}}" \
"${STATIC_DATASET_FAILED_DIR:-${SNAPSHOT_FAILED_DIR:-/app/snapshots/failed}}"
if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then
echo "Running Django deployment checks..." echo "Running Django deployment checks..."
python manage.py check --deploy --fail-level WARNING python manage.py check --deploy --fail-level WARNING
@ -19,15 +23,6 @@ if [ "${AUTO_APPLY_MIGRATIONS:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
fi fi
if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
if [ "${AUTO_BUILD_TAILWIND:-1}" = "1" ] && [ -f /app/package.json ]; then
if [ -x /app/node_modules/.bin/tailwindcss ]; then
echo "Building Tailwind assets..."
npm run build
else
echo "Tailwind dependencies missing; skipping AUTO_BUILD_TAILWIND."
fi
fi
echo "Collecting static files..." echo "Collecting static files..."
python manage.py collectstatic --noinput python manage.py collectstatic --noinput
fi fi

8
nginx/Dockerfile Normal file
View File

@ -0,0 +1,8 @@
FROM nginx:1.27-alpine
COPY nginx/nginx.conf /etc/nginx/nginx.conf
COPY nginx/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["nginx", "-g", "daemon off;"]

4
nginx/entrypoint.sh Normal file
View File

@ -0,0 +1,4 @@
#!/bin/sh
set -e
exec "$@"

35
scripts/scheduler.sh Normal file
View File

@ -0,0 +1,35 @@
#!/bin/sh
set -e
if [ "${SCHEDULER_ENABLED:-0}" != "1" ]; then
DISABLED_SLEEP="${SCHEDULER_DISABLED_SLEEP_SECONDS:-300}"
if [ "${DISABLED_SLEEP}" -lt 30 ]; then
echo "SCHEDULER_DISABLED_SLEEP_SECONDS must be >= 30"
exit 1
fi
echo "Scheduler disabled (SCHEDULER_ENABLED=${SCHEDULER_ENABLED:-0}). Entering idle mode with ${DISABLED_SLEEP}s sleep."
while true; do
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Scheduler disabled; sleeping for ${DISABLED_SLEEP}s."
sleep "${DISABLED_SLEEP}"
done
fi
INTERVAL="${DAILY_ORCHESTRATION_INTERVAL_SECONDS:-${SCHEDULER_INTERVAL_SECONDS:-86400}}"
if [ "${INTERVAL}" -lt 60 ]; then
echo "DAILY_ORCHESTRATION_INTERVAL_SECONDS/SCHEDULER_INTERVAL_SECONDS must be >= 60"
exit 1
fi
echo "Starting HoopScout scheduler loop interval=${INTERVAL}s"
while true; do
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Running daily orchestration..."
if python manage.py run_daily_orchestration; then
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Daily orchestration completed successfully."
else
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Daily orchestration failed."
fi
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Sleeping for ${INTERVAL}s."
sleep "${INTERVAL}"
done

View File

@ -30,7 +30,6 @@ check_service_bind_mount() {
} }
check_service_bind_mount "web" check_service_bind_mount "web"
check_service_bind_mount "celery_worker" check_service_bind_mount "scheduler"
check_service_bind_mount "celery_beat"
echo "Release topology verification passed." echo "Release topology verification passed."

View File

@ -22,8 +22,6 @@
<h2 class="text-base">Summary</h2> <h2 class="text-base">Summary</h2>
<dl class="mt-2 space-y-1 text-sm"> <dl class="mt-2 space-y-1 text-sm">
<div><dt class="inline font-semibold">Nationality:</dt> <dd class="inline">{{ player.nationality.name|default:"-" }}</dd></div> <div><dt class="inline font-semibold">Nationality:</dt> <dd class="inline">{{ player.nationality.name|default:"-" }}</dd></div>
<div><dt class="inline font-semibold">Origin competition:</dt> <dd class="inline">{{ player.origin_competition.name|default:"-" }}</dd></div>
<div><dt class="inline font-semibold">Origin team:</dt> <dd class="inline">{{ player.origin_team.name|default:"-" }}</dd></div>
<div><dt class="inline font-semibold">Birth date:</dt> <dd class="inline">{{ player.birth_date|date:"Y-m-d"|default:"-" }}</dd></div> <div><dt class="inline font-semibold">Birth date:</dt> <dd class="inline">{{ player.birth_date|date:"Y-m-d"|default:"-" }}</dd></div>
<div><dt class="inline font-semibold">Age:</dt> <dd class="inline">{{ age|default:"-" }}</dd></div> <div><dt class="inline font-semibold">Age:</dt> <dd class="inline">{{ age|default:"-" }}</dd></div>
<div><dt class="inline font-semibold">Height:</dt> <dd class="inline">{{ player.height_cm|default:"-" }} cm</dd></div> <div><dt class="inline font-semibold">Height:</dt> <dd class="inline">{{ player.height_cm|default:"-" }} cm</dd></div>
@ -47,14 +45,11 @@
</div> </div>
<div class="rounded-lg border border-slate-200 p-4"> <div class="rounded-lg border border-slate-200 p-4">
<h2 class="text-base">Aliases</h2> <h2 class="text-base">Snapshot Coverage</h2>
<ul class="mt-2 list-inside list-disc text-sm text-slate-700"> <dl class="mt-2 space-y-1 text-sm">
{% for alias in player.aliases.all %} <div><dt class="inline font-semibold">Seasons imported:</dt> <dd class="inline">{{ season_rows|length }}</dd></div>
<li>{{ alias.alias }}{% if alias.source %} ({{ alias.source }}){% endif %}</li> <div><dt class="inline font-semibold">Latest season:</dt> <dd class="inline">{% if season_rows %}{{ season_rows.0.season.label|default:"-" }}{% else %}-{% endif %}</dd></div>
{% empty %} </dl>
<li>No aliases recorded.</li>
{% endfor %}
</ul>
</div> </div>
</div> </div>
</section> </section>
@ -77,33 +72,6 @@
{% endif %} {% endif %}
</section> </section>
<section class="panel mt-4">
<h2>Career History</h2>
{% if career_entries %}
<div class="table-wrap mt-3">
<table class="data-table">
<thead>
<tr><th>Season</th><th>Team</th><th>Competition</th><th>Role</th><th>From</th><th>To</th></tr>
</thead>
<tbody class="divide-y divide-slate-100 bg-white">
{% for entry in career_entries %}
<tr>
<td>{{ entry.season.label|default:"-" }}</td>
<td>{{ entry.team.name|default:"-" }}</td>
<td>{{ entry.competition.name|default:"-" }}</td>
<td>{{ entry.role_snapshot.name|default:"-" }}</td>
<td>{{ entry.start_date|date:"Y-m-d"|default:"-" }}</td>
<td>{{ entry.end_date|date:"Y-m-d"|default:"-" }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="empty-state mt-3">No career entries available.</div>
{% endif %}
</section>
<section class="panel mt-4"> <section class="panel mt-4">
<h2>Season-by-Season Stats</h2> <h2>Season-by-Season Stats</h2>
{% if season_rows %} {% if season_rows %}

View File

@ -5,7 +5,7 @@
{% block content %} {% block content %}
<section class="panel"> <section class="panel">
<h1>Player Search</h1> <h1>Player Search</h1>
<p class="mt-1 text-sm text-slate-600">Filter players by profile, origin, context, and production metrics.</p> <p class="mt-1 text-sm text-slate-600">Filter players by profile, team-season context, and production metrics.</p>
{% if search_has_errors %} {% if search_has_errors %}
<div class="mt-4 rounded-md border border-rose-200 bg-rose-50 p-3 text-sm text-rose-800"> <div class="mt-4 rounded-md border border-rose-200 bg-rose-50 p-3 text-sm text-rose-800">
<p class="font-medium">Please correct the highlighted filters.</p> <p class="font-medium">Please correct the highlighted filters.</p>
@ -56,8 +56,6 @@
<div><label for="id_competition">Competition</label>{{ search_form.competition }}</div> <div><label for="id_competition">Competition</label>{{ search_form.competition }}</div>
<div><label for="id_team">Team</label>{{ search_form.team }}</div> <div><label for="id_team">Team</label>{{ search_form.team }}</div>
<div><label for="id_season">Season</label>{{ search_form.season }}</div> <div><label for="id_season">Season</label>{{ search_form.season }}</div>
<div><label for="id_origin_competition">Origin competition</label>{{ search_form.origin_competition }}</div>
<div><label for="id_origin_team">Origin team</label>{{ search_form.origin_team }}</div>
</div> </div>
<details class="rounded-lg border border-slate-200 bg-slate-50 p-3"> <details class="rounded-lg border border-slate-200 bg-slate-50 p-3">
@ -97,8 +95,6 @@
<div><label for="id_three_pct_max">3P% max</label>{{ search_form.three_pct_max }}</div> <div><label for="id_three_pct_max">3P% max</label>{{ search_form.three_pct_max }}</div>
<div><label for="id_ft_pct_min">FT% min</label>{{ search_form.ft_pct_min }}</div> <div><label for="id_ft_pct_min">FT% min</label>{{ search_form.ft_pct_min }}</div>
<div><label for="id_ft_pct_max">FT% max</label>{{ search_form.ft_pct_max }}</div> <div><label for="id_ft_pct_max">FT% max</label>{{ search_form.ft_pct_max }}</div>
<div><label for="id_efficiency_metric_min">Impact min</label>{{ search_form.efficiency_metric_min }}</div>
<div><label for="id_efficiency_metric_max">Impact max</label>{{ search_form.efficiency_metric_max }}</div>
</div> </div>
</details> </details>
</form> </form>

View File

@ -36,13 +36,18 @@
<th>Player</th> <th>Player</th>
<th>Nationality</th> <th>Nationality</th>
<th>Pos / Role</th> <th>Pos / Role</th>
<th>Origin</th>
<th>Height / Weight</th> <th>Height / Weight</th>
<th>Best Eligible Games</th> <th>Best Eligible Games</th>
<th>Best Eligible MPG</th> <th>Best Eligible MPG</th>
<th>Best Eligible PPG</th> <th>Best Eligible PPG</th>
<th>Best Eligible RPG</th> <th>Best Eligible RPG</th>
<th>Best Eligible APG</th> <th>Best Eligible APG</th>
<th>Best Eligible SPG</th>
<th>Best Eligible BPG</th>
<th>Best Eligible TOV</th>
<th>Best Eligible FG%</th>
<th>Best Eligible 3P%</th>
<th>Best Eligible FT%</th>
{% if request.user.is_authenticated %}<th>Watchlist</th>{% endif %} {% if request.user.is_authenticated %}<th>Watchlist</th>{% endif %}
</tr> </tr>
</thead> </thead>
@ -52,16 +57,18 @@
<td><a class="font-medium" href="{% url 'players:detail' player.pk %}">{{ player.full_name }}</a></td> <td><a class="font-medium" href="{% url 'players:detail' player.pk %}">{{ player.full_name }}</a></td>
<td>{{ player.nationality.name|default:"-" }}</td> <td>{{ player.nationality.name|default:"-" }}</td>
<td>{{ player.nominal_position.code|default:"-" }} / {{ player.inferred_role.name|default:"-" }}</td> <td>{{ player.nominal_position.code|default:"-" }} / {{ player.inferred_role.name|default:"-" }}</td>
<td>
{{ player.origin_competition.name|default:"-" }}
{% if player.origin_team %}<div class="text-xs text-slate-500">{{ player.origin_team.name }}</div>{% endif %}
</td>
<td>{{ player.height_cm|default:"-" }} / {{ player.weight_kg|default:"-" }}</td> <td>{{ player.height_cm|default:"-" }} / {{ player.weight_kg|default:"-" }}</td>
<td>{{ player.games_played_value|floatformat:0 }}</td> <td>{% if player.games_played_value is not None %}{{ player.games_played_value|floatformat:0 }}{% else %}-{% endif %}</td>
<td>{{ player.mpg_value|floatformat:1 }}</td> <td>{% if player.mpg_value is not None %}{{ player.mpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{{ player.ppg_value|floatformat:1 }}</td> <td>{% if player.ppg_value is not None %}{{ player.ppg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{{ player.rpg_value|floatformat:1 }}</td> <td>{% if player.rpg_value is not None %}{{ player.rpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{{ player.apg_value|floatformat:1 }}</td> <td>{% if player.apg_value is not None %}{{ player.apg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.spg_value is not None %}{{ player.spg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.bpg_value is not None %}{{ player.bpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.tov_value is not None %}{{ player.tov_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.fg_pct_value is not None %}{{ player.fg_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.three_pct_value is not None %}{{ player.three_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
<td>{% if player.ft_pct_value is not None %}{{ player.ft_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
{% if request.user.is_authenticated %} {% if request.user.is_authenticated %}
<td> <td>
{% if player.id in favorite_player_ids %} {% if player.id in favorite_player_ids %}

View File

@ -1,3 +1,4 @@
<div id="saved-search-table">
{% if saved_searches %} {% if saved_searches %}
<div class="table-wrap mt-4"> <div class="table-wrap mt-4">
<table class="data-table"> <table class="data-table">
@ -21,7 +22,14 @@
<div class="flex flex-wrap gap-2"> <div class="flex flex-wrap gap-2">
<a class="btn-secondary" href="{% url 'scouting:saved_search_run' saved_search.pk %}">Run</a> <a class="btn-secondary" href="{% url 'scouting:saved_search_run' saved_search.pk %}">Run</a>
<a class="btn-secondary" href="{% url 'scouting:saved_search_edit' saved_search.pk %}">Edit</a> <a class="btn-secondary" href="{% url 'scouting:saved_search_edit' saved_search.pk %}">Edit</a>
<form method="post" action="{% url 'scouting:saved_search_delete' saved_search.pk %}"> <form
method="post"
action="{% url 'scouting:saved_search_delete' saved_search.pk %}"
hx-post="{% url 'scouting:saved_search_delete' saved_search.pk %}"
hx-target="#saved-search-table"
hx-swap="outerHTML"
hx-indicator="#htmx-loading"
>
{% csrf_token %} {% csrf_token %}
<button class="btn-secondary" type="submit">Delete</button> <button class="btn-secondary" type="submit">Delete</button>
</form> </form>
@ -35,3 +43,4 @@
{% else %} {% else %}
<div class="empty-state mt-4">No saved searches yet.</div> <div class="empty-state mt-4">No saved searches yet.</div>
{% endif %} {% endif %}
</div>

View File

@ -7,7 +7,23 @@
<h1>Edit Saved Search</h1> <h1>Edit Saved Search</h1>
<form method="post" class="mt-4 space-y-4"> <form method="post" class="mt-4 space-y-4">
{% csrf_token %} {% csrf_token %}
{{ form.as_p }} <div>
<label for="{{ form.name.id_for_label }}">{{ form.name.label }}</label>
{{ form.name }}
{% for error in form.name.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
</div>
<div class="flex items-center gap-2">
{{ form.is_public }}
<label for="{{ form.is_public.id_for_label }}">{{ form.is_public.label }}</label>
{% for error in form.is_public.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
</div>
<div>
<label for="{{ form.filters_json.id_for_label }}">{{ form.filters_json.label }}</label>
{{ form.filters_json }}
<p class="mt-1 text-xs text-slate-500">{{ form.filters_json.help_text }}</p>
{% for error in form.filters_json.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
</div>
{% for error in form.non_field_errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
<div class="flex flex-wrap gap-2"> <div class="flex flex-wrap gap-2">
<button type="submit" class="btn">Update</button> <button type="submit" class="btn">Update</button>
<a class="btn-secondary" href="{% url 'scouting:index' %}">Cancel</a> <a class="btn-secondary" href="{% url 'scouting:index' %}">Cancel</a>

View File

@ -0,0 +1,32 @@
{
"data": [
{
"player": {
"id": "bcl-player-42",
"name": "John Carter",
"first_name": "John",
"last_name": "Carter",
"birth_date": "1999-07-14",
"nationality": "US",
"height_cm": 198,
"weight_kg": 95,
"position": "SF"
},
"team": {
"id": "bcl-team-murcia",
"name": "UCAM Murcia"
},
"gp": 12,
"mpg": 29.1,
"ppg": 16.4,
"rpg": 5.8,
"apg": 2.7,
"spg": 1.5,
"bpg": 0.6,
"tov": 2.3,
"fg_pct": 48.1,
"three_pct": 37.2,
"ft_pct": 81.4
}
]
}

View File

@ -0,0 +1,25 @@
{
"data": [
{
"player": {
"id": "bcl-player-99",
"name": "Alex Novak"
},
"team": {
"id": "bcl-team-tenerife",
"name": "Lenovo Tenerife"
},
"gp": 10,
"mpg": 27.2,
"ppg": 14.8,
"rpg": 4.1,
"apg": 3.3,
"spg": 1.2,
"bpg": 0.4,
"tov": 2.0,
"fg_pct": 47.3,
"three_pct": 38.0,
"ft_pct": 79.1
}
]
}

View File

@ -0,0 +1,32 @@
{
"data": [
{
"player": {
"id": "p-001",
"name": "Marco Rossi",
"first_name": "Marco",
"last_name": "Rossi",
"birth_date": "2000-01-05",
"nationality": "IT",
"height_cm": 190,
"weight_kg": 84,
"position": "PG"
},
"team": {
"id": "team-virtus-bologna",
"name": "Virtus Bologna"
},
"gp": 20,
"mpg": 28.3,
"ppg": 15.8,
"rpg": 3.4,
"apg": 5.9,
"spg": 1.4,
"bpg": 0.2,
"tov": 2.1,
"fg_pct": 47.6,
"three_pct": 36.5,
"ft_pct": 84.2
}
]
}

View File

@ -0,0 +1,25 @@
{
"data": [
{
"player": {
"id": "p-002",
"name": "Andrea Bianchi"
},
"team": {
"id": "team-olimpia-milano",
"name": "Olimpia Milano"
},
"gp": 18,
"mpg": 24.7,
"ppg": 12.3,
"rpg": 2.9,
"apg": 4.2,
"spg": 1.1,
"bpg": 0.1,
"tov": 1.8,
"fg_pct": 45.0,
"three_pct": 35.4,
"ft_pct": 82.7
}
]
}

View File

@ -30,6 +30,12 @@ def test_players_api_list_and_detail(client):
list_response = client.get(reverse("api:players"), data={"q": "rossi"}) list_response = client.get(reverse("api:players"), data={"q": "rossi"})
assert list_response.status_code == 200 assert list_response.status_code == 200
assert list_response.json()["count"] == 1 assert list_response.json()["count"] == 1
list_payload = list_response.json()
assert "sort" in list_payload
assert "metric_semantics" in list_payload
assert "metric_sort_keys" in list_payload
assert "ppg_value" in list_payload["results"][0]
assert "mpg_value" in list_payload["results"][0]
detail_response = client.get(reverse("api:player_detail", kwargs={"pk": player.pk})) detail_response = client.get(reverse("api:player_detail", kwargs={"pk": player.pk}))
assert detail_response.status_code == 200 assert detail_response.status_code == 200
@ -83,8 +89,6 @@ def test_players_api_search_consistent_with_ui_filters(client):
nationality=nationality, nationality=nationality,
nominal_position=position, nominal_position=position,
inferred_role=role, inferred_role=role,
origin_competition=competition,
origin_team=team,
) )
ps = PlayerSeason.objects.create( ps = PlayerSeason.objects.create(
player=matching, player=matching,
@ -113,7 +117,7 @@ def test_players_api_search_consistent_with_ui_filters(client):
) )
params = { params = {
"origin_competition": competition.id, "competition": competition.id,
"nominal_position": position.id, "nominal_position": position.id,
"points_per_game_min": "10", "points_per_game_min": "10",
"sort": "ppg_desc", "sort": "ppg_desc",
@ -175,8 +179,33 @@ def test_players_api_metric_sort_uses_best_eligible_values(client):
response = client.get(reverse("api:players"), data={"sort": "ppg_desc"}) response = client.get(reverse("api:players"), data={"sort": "ppg_desc"})
assert response.status_code == 200 assert response.status_code == 200
names = [row["full_name"] for row in response.json()["results"]] payload = response.json()
names = [row["full_name"] for row in payload["results"]]
assert names.index("Dan High") < names.index("Ion Low") assert names.index("Dan High") < names.index("Ion Low")
assert payload["sort"] == "ppg_desc"
assert "best eligible values per player" in payload["metric_semantics"]
dan = next(row for row in payload["results"] if row["full_name"] == "Dan High")
ion = next(row for row in payload["results"] if row["full_name"] == "Ion Low")
assert float(dan["ppg_value"]) > float(ion["ppg_value"])
@pytest.mark.django_db
def test_players_api_metric_fields_are_exposed_and_nullable(client):
nationality = Nationality.objects.create(name="Sweden", iso2_code="SE", iso3_code="SWE")
Player.objects.create(
first_name="No",
last_name="Stats",
full_name="No Stats",
birth_date=date(2002, 1, 1),
nationality=nationality,
)
response = client.get(reverse("api:players"), data={"sort": "name_asc"})
assert response.status_code == 200
payload = response.json()
row = next(item for item in payload["results"] if item["full_name"] == "No Stats")
assert row["ppg_value"] is None
assert row["mpg_value"] is None
@pytest.mark.django_db @pytest.mark.django_db

143
tests/test_bcl_extractor.py Normal file
View File

@ -0,0 +1,143 @@
from __future__ import annotations
import json
from datetime import date
from pathlib import Path
import pytest
from django.core.management import call_command
from apps.ingestion.extractors.bcl import BCLSnapshotExtractor
from apps.ingestion.extractors.base import ExtractorNormalizationError
from apps.ingestion.extractors.registry import create_extractor
def _load_fixture(path: str) -> dict:
fixture_path = Path(__file__).parent / "fixtures" / path
return json.loads(fixture_path.read_text(encoding="utf-8"))
@pytest.mark.django_db
def test_bcl_extractor_normalizes_fixture_payload(tmp_path, settings):
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = BCLSnapshotExtractor(http_client=FakeClient())
output_path = tmp_path / "bcl.json"
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
assert result.extractor_name == "bcl"
assert result.source_name == "bcl"
assert result.records_count == 1
payload = json.loads(output_path.read_text(encoding="utf-8"))
assert payload["source_name"] == "bcl"
assert payload["snapshot_date"] == "2026-03-13"
row = payload["records"][0]
assert row["competition_external_id"] == "bcl"
assert row["competition_name"] == "Basketball Champions League"
assert row["team_external_id"] == "bcl-team-murcia"
assert row["team_name"] == "UCAM Murcia"
assert row["player_external_id"] == "bcl-player-42"
assert row["full_name"] == "John Carter"
assert row["minutes_per_game"] == 29.1
assert row["three_pt_pct"] == 37.2
@pytest.mark.django_db
def test_bcl_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = BCLSnapshotExtractor(http_client=FakeClient())
output_path = tmp_path / "bcl-partial.json"
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
assert result.records_count == 1
payload = json.loads(output_path.read_text(encoding="utf-8"))
row = payload["records"][0]
assert row["full_name"] == "Alex Novak"
assert row["first_name"] is None
assert row["last_name"] is None
assert row["birth_date"] is None
assert row["nationality"] is None
assert row["height_cm"] is None
assert row["weight_kg"] is None
assert row["position"] is None
assert row["games_played"] == 10
@pytest.mark.django_db
def test_bcl_extractor_still_fails_when_required_stats_are_missing(settings):
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
fixture_payload["data"][0].pop("ppg")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = BCLSnapshotExtractor(http_client=FakeClient())
with pytest.raises(ExtractorNormalizationError):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
def test_bcl_extractor_registry_selection(settings):
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
extractor = create_extractor("bcl")
assert isinstance(extractor, BCLSnapshotExtractor)
@pytest.mark.django_db
def test_run_bcl_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch):
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
monkeypatch.setattr(
"apps.ingestion.extractors.bcl.ResponsibleHttpClient",
lambda **_kwargs: FakeClient(),
)
call_command(
"run_bcl_extractor",
"--output-path",
str(tmp_path),
"--snapshot-date",
"2026-03-13",
)
files = list(tmp_path.glob("bcl-2026-03-13.json"))
assert len(files) == 1
payload = json.loads(files[0].read_text(encoding="utf-8"))
assert payload["source_name"] == "bcl"
assert len(payload["records"]) == 1

View File

@ -1,38 +0,0 @@
import os
import subprocess
import sys
import pytest
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
env = os.environ.copy()
env.update(env_overrides)
return subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
env=env,
check=False,
)
@pytest.mark.django_db
def test_invalid_cron_does_not_crash_config_import_path():
result = _run_python_import(
(
"import config; "
"from config.celery import app; "
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
),
{
"DJANGO_SETTINGS_MODULE": "config.settings.development",
"DJANGO_ENV": "development",
"DJANGO_DEBUG": "1",
"INGESTION_SCHEDULE_ENABLED": "1",
"INGESTION_SCHEDULE_CRON": "bad cron value",
},
)
assert result.returncode == 0
assert "beat_schedule_size=0" in result.stdout

View File

@ -0,0 +1,95 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
import pytest
from django.core.management import call_command
from apps.ingestion.services.daily_orchestration import parse_enabled_extractors, run_daily_orchestration
@dataclass
class _FakeExtractorResult:
records_count: int
output_path: str
class _FakeExtractor:
def __init__(self, name: str):
self.name = name
def run(self, *, snapshot_date=None):
if snapshot_date:
return _FakeExtractorResult(records_count=3, output_path=f"/tmp/{self.name}-{snapshot_date}.json")
return _FakeExtractorResult(records_count=3, output_path=f"/tmp/{self.name}.json")
@dataclass
class _FakeImportRun:
id: int = 11
status: str = "success"
files_processed: int = 2
files_total: int = 2
rows_upserted: int = 20
rows_failed: int = 0
class _FakeImporter:
def __init__(self, **_kwargs):
pass
def run(self):
return _FakeImportRun()
def test_parse_enabled_extractors():
assert parse_enabled_extractors("lba,bcl") == ["lba", "bcl"]
assert parse_enabled_extractors(" lba , , bcl ") == ["lba", "bcl"]
assert parse_enabled_extractors("") == []
@pytest.mark.django_db
def test_daily_orchestration_runs_extractors_then_import(settings, monkeypatch):
settings.DAILY_ORCHESTRATION_EXTRACTORS = "lba,bcl"
monkeypatch.setattr(
"apps.ingestion.services.daily_orchestration.create_extractor",
lambda name: _FakeExtractor(name),
)
monkeypatch.setattr(
"apps.ingestion.services.daily_orchestration.SnapshotImporter",
_FakeImporter,
)
result = run_daily_orchestration(snapshot_date=date(2026, 3, 13))
assert [row.extractor_name for row in result.extractors_run] == ["lba", "bcl"]
assert result.import_run_id == 11
assert result.import_status == "success"
assert result.rows_upserted == 20
@pytest.mark.django_db
def test_daily_orchestration_raises_when_no_extractors_configured(settings):
settings.DAILY_ORCHESTRATION_EXTRACTORS = ""
with pytest.raises(ValueError, match="cannot be empty"):
run_daily_orchestration()
@pytest.mark.django_db
def test_run_daily_orchestration_command(settings, monkeypatch, capsys):
settings.DAILY_ORCHESTRATION_EXTRACTORS = "lba,bcl"
monkeypatch.setattr(
"apps.ingestion.services.daily_orchestration.create_extractor",
lambda name: _FakeExtractor(name),
)
monkeypatch.setattr(
"apps.ingestion.services.daily_orchestration.SnapshotImporter",
_FakeImporter,
)
call_command("run_daily_orchestration", "--snapshot-date", "2026-03-13")
captured = capsys.readouterr()
assert "Daily orchestration completed" in captured.out
assert "extractors=[lba:3, bcl:3]" in captured.out

View File

@ -0,0 +1,312 @@
from __future__ import annotations
import json
from datetime import date
import pytest
from django.core.management import call_command
from apps.ingestion.extractors.base import BaseSnapshotExtractor
from apps.ingestion.extractors.base import ExtractorNormalizationError
from apps.ingestion.extractors.http import ResponsibleHttpClient
from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
class DummyExtractor(BaseSnapshotExtractor):
extractor_name = "dummy"
source_name = "dummy_source"
def fetch(self):
return {"rows": [{"name": "Jane Doe"}]}
def parse(self, payload):
return payload["rows"]
def normalize_record(self, source_record):
return {
"competition_external_id": "comp-1",
"competition_name": "League One",
"season": "2025-2026",
"team_external_id": "team-1",
"team_name": "Team One",
"player_external_id": "player-1",
"full_name": source_record["name"],
"first_name": "Jane",
"last_name": "Doe",
"birth_date": "2000-01-01",
"nationality": "US",
"height_cm": 180,
"weight_kg": 75,
"position": "SG",
"games_played": 10,
"minutes_per_game": 30.0,
"points_per_game": 15.0,
"rebounds_per_game": 4.0,
"assists_per_game": 3.0,
"steals_per_game": 1.2,
"blocks_per_game": 0.4,
"turnovers_per_game": 2.0,
"fg_pct": 45.0,
"three_pt_pct": 35.0,
"ft_pct": 82.0,
}
class _FakeResponse:
def __init__(self, payload, status_code=200):
self._payload = payload
self.status_code = status_code
def raise_for_status(self):
if self.status_code >= 400:
raise RuntimeError(f"status={self.status_code}")
def json(self):
return self._payload
def _minimal_public_json_record() -> dict:
return {
"competition_external_id": "comp-1",
"competition_name": "League One",
"season": "2025-2026",
"team_external_id": "team-1",
"team_name": "Team One",
"player_external_id": "player-1",
"full_name": "Jane Doe",
"games_played": 12,
"minutes_per_game": 27.2,
"points_per_game": 13.0,
"rebounds_per_game": 4.4,
"assists_per_game": 3.1,
"steals_per_game": 1.0,
"blocks_per_game": 0.3,
"turnovers_per_game": 1.8,
"fg_pct": 46.2,
"three_pt_pct": 35.5,
"ft_pct": 82.1,
}
@pytest.mark.django_db
def test_base_extractor_run_writes_snapshot_file(tmp_path, settings):
settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming")
extractor = DummyExtractor()
result = extractor.run(snapshot_date=date(2026, 3, 13))
assert result.records_count == 1
assert result.source_name == "dummy_source"
assert result.output_path is not None
assert result.output_path.exists()
payload = json.loads(result.output_path.read_text(encoding="utf-8"))
assert payload["source_name"] == "dummy_source"
assert payload["snapshot_date"] == "2026-03-13"
assert payload["records"][0]["full_name"] == "Jane Doe"
@pytest.mark.django_db
def test_public_json_extractor_normalizes_common_field_aliases(tmp_path):
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {
"records": [
{
"competition_id": 99,
"competition_name": "National League",
"season": 2025,
"team_id": 10,
"team_name": "Blue Team",
"player_id": 123,
"player_name": "John Smith",
"first_name": "John",
"last_name": "Smith",
"birth_date": "2001-05-12",
"nationality": "US",
"height_cm": 198,
"weight_kg": 96,
"position": "SF",
"gp": 20,
"mpg": 28.5,
"ppg": 14.2,
"rpg": 5.1,
"apg": 3.2,
"spg": 1.1,
"bpg": 0.5,
"tov": 1.9,
"fg_pct": 47.3,
"three_pct": 36.1,
"ft_pct": 80.0,
}
]
}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
output_file = tmp_path / "public.json"
result = extractor.run(output_path=output_file, snapshot_date=date(2026, 3, 13))
assert result.records_count == 1
payload = json.loads(output_file.read_text(encoding="utf-8"))
row = payload["records"][0]
assert row["competition_external_id"] == "99"
assert row["team_external_id"] == "10"
assert row["player_external_id"] == "123"
assert row["full_name"] == "John Smith"
assert row["three_pt_pct"] == 36.1
@pytest.mark.django_db
def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path):
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {"records": [_minimal_public_json_record()]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
output_file = tmp_path / "public-optional.json"
result = extractor.run(output_path=output_file, snapshot_date=date(2026, 3, 13))
assert result.records_count == 1
payload = json.loads(output_file.read_text(encoding="utf-8"))
row = payload["records"][0]
assert row["full_name"] == "Jane Doe"
assert row["first_name"] is None
assert row["last_name"] is None
assert row["birth_date"] is None
assert row["nationality"] is None
assert row["height_cm"] is None
assert row["weight_kg"] is None
assert row["position"] is None
assert row.get("role") is None
@pytest.mark.django_db
def test_public_json_extractor_fails_when_required_stat_missing():
broken = _minimal_public_json_record()
broken.pop("points_per_game")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {"records": [broken]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
with pytest.raises(ExtractorNormalizationError):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS))
def test_public_json_required_fields_follow_snapshot_schema(required_field):
broken = _minimal_public_json_record()
broken.pop(required_field)
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {"records": [broken]}
extractor = PublicJsonSnapshotExtractor(
url="https://example.com/public-feed.json",
source_name="test_public_feed",
http_client=FakeClient(),
)
with pytest.raises(ExtractorNormalizationError, match="missing required fields"):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
def test_run_extractor_management_command_writes_snapshot(tmp_path, settings):
settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json"
settings.EXTRACTOR_PUBLIC_SOURCE_NAME = "cmd_test_source"
output_dir = tmp_path / "snapshots"
class FakeClient:
def get_json(self, *_args, **_kwargs):
return {
"records": [
{
"competition_external_id": "comp-a",
"competition_name": "Alpha League",
"season": "2025-2026",
"team_external_id": "team-a",
"team_name": "Alpha Team",
"player_external_id": "player-a",
"full_name": "Alpha Player",
"first_name": "Alpha",
"last_name": "Player",
"birth_date": "2000-04-01",
"nationality": "US",
"height_cm": 190,
"weight_kg": 88,
"position": "PG",
"games_played": 12,
"minutes_per_game": 31.0,
"points_per_game": 17.0,
"rebounds_per_game": 4.0,
"assists_per_game": 6.0,
"steals_per_game": 1.3,
"blocks_per_game": 0.1,
"turnovers_per_game": 2.4,
"fg_pct": 44.0,
"three_pt_pct": 37.0,
"ft_pct": 79.0,
}
]
}
monkeypatch = pytest.MonkeyPatch()
monkeypatch.setattr(
"apps.ingestion.extractors.public_json.ResponsibleHttpClient",
lambda **_kwargs: FakeClient(),
)
try:
call_command(
"run_extractor",
"public_json_snapshot",
"--output-path",
str(output_dir),
"--snapshot-date",
"2026-03-13",
)
finally:
monkeypatch.undo()
files = list(output_dir.glob("public_json_snapshot-2026-03-13.json"))
assert len(files) == 1
payload = json.loads(files[0].read_text(encoding="utf-8"))
assert payload["source_name"] == "cmd_test_source"
assert payload["records"][0]["full_name"] == "Alpha Player"
def test_http_client_retries_on_retryable_status(monkeypatch):
class FakeSession:
def __init__(self):
self.calls = 0
def get(self, *_args, **_kwargs):
self.calls += 1
if self.calls == 1:
return _FakeResponse({"error": "busy"}, status_code=429)
return _FakeResponse({"records": []}, status_code=200)
client = ResponsibleHttpClient(
user_agent="test-agent",
timeout_seconds=5,
retries=1,
retry_sleep_seconds=0,
request_delay_seconds=0,
session=FakeSession(),
)
payload = client.get_json("https://example.com/feed.json")
assert payload == {"records": []}

View File

@ -0,0 +1,363 @@
from __future__ import annotations
import json
from datetime import date
from pathlib import Path
import pytest
from django.core.management import call_command
from apps.competitions.models import Competition, Season
from apps.ingestion.models import ImportFile, ImportRun
from apps.players.models import Player
from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team
def _valid_payload() -> dict:
return {
"source_name": "official_site_feed",
"snapshot_date": "2026-03-13",
"records": [
{
"competition_external_id": "comp-nba",
"competition_name": "NBA",
"season": "2025-2026",
"team_external_id": "team-lal",
"team_name": "Los Angeles Lakers",
"player_external_id": "player-23",
"full_name": "LeBron James",
"first_name": "LeBron",
"last_name": "James",
"birth_date": "1984-12-30",
"nationality": "US",
"height_cm": 206,
"weight_kg": 113,
"position": "SF",
"role": "Primary Creator",
"games_played": 60,
"minutes_per_game": 34.5,
"points_per_game": 25.4,
"rebounds_per_game": 7.2,
"assists_per_game": 8.1,
"steals_per_game": 1.3,
"blocks_per_game": 0.7,
"turnovers_per_game": 3.2,
"fg_pct": 51.1,
"three_pt_pct": 38.4,
"ft_pct": 79.8,
}
],
}
def _valid_payload_for_source(source_name: str, *, competition_name: str = "NBA", team_name: str = "Los Angeles Lakers") -> dict:
payload = _valid_payload()
payload["source_name"] = source_name
payload["records"][0]["competition_name"] = competition_name
payload["records"][0]["team_name"] = team_name
return payload
def _write_json(path: Path, payload: dict) -> None:
path.write_text(json.dumps(payload), encoding="utf-8")
@pytest.mark.django_db
def test_valid_snapshot_import(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
file_path = incoming / "nba-2026-03-13.json"
_write_json(file_path, payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
run = ImportRun.objects.get()
assert run.status == ImportRun.RunStatus.SUCCESS
assert run.files_processed == 1
assert run.rows_upserted == 1
import_file = ImportFile.objects.get(import_run=run)
assert import_file.status == ImportFile.FileStatus.SUCCESS
assert import_file.source_name == "official_site_feed"
assert import_file.snapshot_date == date(2026, 3, 13)
assert (archive / "nba-2026-03-13.json").exists()
assert not (incoming / "nba-2026-03-13.json").exists()
assert Competition.objects.filter(source_name="official_site_feed", source_uid="comp-nba").exists()
assert Team.objects.filter(source_name="official_site_feed", source_uid="team-lal").exists()
assert Player.objects.filter(source_name="official_site_feed", source_uid="player-23").exists()
assert Season.objects.filter(source_uid="season:2025-2026").exists()
assert PlayerSeason.objects.count() == 1
assert PlayerSeasonStats.objects.count() == 1
@pytest.mark.django_db
def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
payload["records"][0].pop(optional_field, None)
file_path = incoming / "optional-missing.json"
_write_json(file_path, payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
run = ImportRun.objects.get()
assert run.status == ImportRun.RunStatus.SUCCESS
player = Player.objects.get(source_uid="player-23")
assert player.first_name == "LeBron"
assert player.last_name == "James"
assert player.birth_date is None
assert player.nationality is None
assert player.nominal_position is None
assert player.height_cm is None
assert player.weight_kg is None
assert PlayerSeasonStats.objects.count() == 1
@pytest.mark.django_db
def test_snapshot_import_preserves_single_name_part_without_forced_split(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
row = payload["records"][0]
row["first_name"] = "LeBron"
row.pop("last_name")
file_path = incoming / "single-name-part.json"
_write_json(file_path, payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
run = ImportRun.objects.get()
assert run.status == ImportRun.RunStatus.SUCCESS
player = Player.objects.get(source_uid="player-23")
assert player.first_name == "LeBron"
assert player.last_name == ""
@pytest.mark.django_db
@pytest.mark.parametrize(
("source_name", "competition_id", "competition_name"),
[
("lba", "lba-serie-a", "Lega Basket Serie A"),
("bcl", "bcl", "Basketball Champions League"),
],
)
def test_partial_public_source_snapshot_imports_for_lba_and_bcl(
tmp_path,
settings,
source_name,
competition_id,
competition_name,
):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
payload["source_name"] = source_name
row = payload["records"][0]
row["competition_external_id"] = competition_id
row["competition_name"] = competition_name
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
row.pop(optional_field, None)
_write_json(incoming / f"{source_name}.json", payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
run = ImportRun.objects.get()
assert run.status == ImportRun.RunStatus.SUCCESS
assert Competition.objects.filter(source_uid=competition_id, name=competition_name).exists()
assert Player.objects.filter(source_uid="player-23").exists()
assert PlayerSeasonStats.objects.count() == 1
@pytest.mark.django_db
def test_invalid_snapshot_rejected_and_moved_to_failed(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
del payload["records"][0]["points_per_game"]
file_path = incoming / "broken.json"
_write_json(file_path, payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
run = ImportRun.objects.get()
assert run.status == ImportRun.RunStatus.FAILED
import_file = ImportFile.objects.get(import_run=run)
assert import_file.status == ImportFile.FileStatus.FAILED
assert "missing required fields" in import_file.error_message
assert (failed / "broken.json").exists()
assert not (archive / "broken.json").exists()
assert not Competition.objects.exists()
@pytest.mark.django_db
def test_idempotent_reimport_uses_checksum_and_skips_duplicate(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
_write_json(incoming / "first.json", payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
assert Competition.objects.count() == 1
assert Player.objects.count() == 1
# Re-drop same content with different filename.
_write_json(incoming / "first-duplicate.json", payload)
call_command("import_snapshots")
assert Competition.objects.count() == 1
assert Player.objects.count() == 1
assert PlayerSeason.objects.count() == 1
duplicate_file = ImportFile.objects.filter(relative_path="first-duplicate.json").order_by("-id").first()
assert duplicate_file is not None
assert duplicate_file.status == ImportFile.FileStatus.SKIPPED
assert duplicate_file.checksum
assert "duplicate checksum" in duplicate_file.error_message.lower()
assert (archive / "first-duplicate.json").exists()
@pytest.mark.django_db
def test_same_run_second_file_same_checksum_is_skipped(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload()
_write_json(incoming / "a.json", payload)
_write_json(incoming / "b.json", payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
files = {row.relative_path: row for row in ImportFile.objects.order_by("relative_path")}
assert files["a.json"].status == ImportFile.FileStatus.SUCCESS
assert files["b.json"].status == ImportFile.FileStatus.SKIPPED
assert files["a.json"].checksum == files["b.json"].checksum
@pytest.mark.django_db
def test_same_raw_external_ids_from_different_sources_do_not_collide(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
lba_payload = _valid_payload_for_source("lba", competition_name="Lega Basket Serie A", team_name="Virtus Bologna")
bcl_payload = _valid_payload_for_source("bcl", competition_name="Basketball Champions League", team_name="AEK Athens")
_write_json(incoming / "lba.json", lba_payload)
_write_json(incoming / "bcl.json", bcl_payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
assert Competition.objects.filter(source_uid="comp-nba").count() == 2
assert Team.objects.filter(source_uid="team-lal").count() == 2
assert Player.objects.filter(source_uid="player-23").count() == 2
assert Competition.objects.filter(source_name="lba", source_uid="comp-nba", name="Lega Basket Serie A").exists()
assert Competition.objects.filter(source_name="bcl", source_uid="comp-nba", name="Basketball Champions League").exists()
assert Team.objects.filter(source_name="lba", source_uid="team-lal", name="Virtus Bologna").exists()
assert Team.objects.filter(source_name="bcl", source_uid="team-lal", name="AEK Athens").exists()
@pytest.mark.django_db
def test_reimport_same_source_payload_remains_idempotent(tmp_path, settings):
incoming = tmp_path / "incoming"
archive = tmp_path / "archive"
failed = tmp_path / "failed"
incoming.mkdir()
archive.mkdir()
failed.mkdir()
payload = _valid_payload_for_source("lba")
_write_json(incoming / "lba-1.json", payload)
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
settings.STATIC_DATASET_FAILED_DIR = str(failed)
call_command("import_snapshots")
_write_json(incoming / "lba-2.json", payload)
call_command("import_snapshots")
assert Competition.objects.filter(source_name="lba", source_uid="comp-nba").count() == 1
assert Team.objects.filter(source_name="lba", source_uid="team-lal").count() == 1
assert Player.objects.filter(source_name="lba", source_uid="player-23").count() == 1

View File

@ -1,251 +0,0 @@
import os
import pytest
from apps.competitions.models import Competition, Season
from apps.ingestion.models import IngestionError, IngestionRun
from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Nationality, Player
from apps.providers.exceptions import ProviderRateLimitError
from apps.providers.models import ExternalMapping
from apps.stats.models import PlayerSeason, PlayerSeasonStats
from apps.teams.models import Team
@pytest.mark.django_db
def test_run_full_sync_creates_domain_objects(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
assert run.status == IngestionRun.RunStatus.SUCCESS
assert Competition.objects.count() >= 1
assert Team.objects.count() >= 1
assert Season.objects.count() >= 1
assert Player.objects.count() >= 1
assert PlayerSeason.objects.count() >= 1
assert PlayerSeasonStats.objects.count() >= 1
assert Player.objects.filter(origin_competition__isnull=False).exists()
assert run.context.get("completed_steps") == [
"competitions",
"teams",
"seasons",
"players",
"player_stats",
"player_careers",
]
assert run.context.get("source_counts", {}).get("players", 0) >= 1
@pytest.mark.django_db
def test_full_sync_is_idempotent(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
counts_after_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
}
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
counts_after_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
}
assert counts_after_first == counts_after_second
@pytest.mark.django_db
def test_incremental_sync_runs_successfully(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(
provider_namespace="mvp_demo",
job_type=IngestionRun.JobType.INCREMENTAL,
cursor="demo-cursor",
)
assert run.status == IngestionRun.RunStatus.SUCCESS
assert run.records_processed > 0
assert run.started_at is not None
assert run.finished_at is not None
assert run.finished_at >= run.started_at
assert run.error_summary == ""
@pytest.mark.django_db
def test_run_sync_handles_rate_limit(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
with pytest.raises(ProviderRateLimitError):
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
run = IngestionRun.objects.order_by("-id").first()
assert run is not None
assert run.status == IngestionRun.RunStatus.FAILED
assert run.started_at is not None
assert run.finished_at is not None
assert "Rate limit" in run.error_summary
assert IngestionError.objects.filter(ingestion_run=run).exists()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
class StableProvider:
def sync_all(self):
return {
"competitions": [
{
"external_id": "competition-nba",
"name": "NBA",
"slug": "nba",
"competition_type": "league",
"gender": "men",
"level": 1,
"country": None,
"is_active": True,
}
],
"teams": [
{
"external_id": "team-14",
"name": "Los Angeles Lakers",
"short_name": "LAL",
"slug": "los-angeles-lakers",
"country": None,
"is_national_team": False,
}
],
"seasons": [
{
"external_id": "season-2024",
"label": "2024-2025",
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"is_current": False,
}
],
"players": [
{
"external_id": "player-237",
"first_name": "LeBron",
"last_name": "James",
"full_name": "LeBron James",
"birth_date": None,
"nationality": None,
"nominal_position": {"code": "SF", "name": "Small Forward"},
"inferred_role": {"code": "wing", "name": "Wing"},
"height_cm": None,
"weight_kg": None,
"dominant_hand": "unknown",
"is_active": True,
"aliases": [],
}
],
"player_stats": [
{
"external_id": "ps-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"games_played": 2,
"games_started": 0,
"minutes_played": 68,
"points": 25,
"rebounds": 9,
"assists": 8,
"steals": 1.5,
"blocks": 0.5,
"turnovers": 3.5,
"fg_pct": 55.0,
"three_pct": 45.0,
"ft_pct": 95.0,
"usage_rate": None,
"true_shooting_pct": None,
"player_efficiency_rating": None,
}
],
"player_careers": [
{
"external_id": "career-2024-237-14",
"player_external_id": "player-237",
"team_external_id": "team-14",
"competition_external_id": "competition-nba",
"season_external_id": "season-2024",
"role_code": "",
"shirt_number": None,
"start_date": "2024-10-01",
"end_date": "2025-06-30",
"notes": "Imported from balldontlie aggregated box scores",
}
],
}
def sync_incremental(self, *, cursor: str | None = None):
payload = self.sync_all()
payload["cursor"] = cursor
return payload
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
lebron = Player.objects.get(full_name="LeBron James")
assert lebron.nationality is None
assert not Nationality.objects.filter(iso2_code="ZZ").exists()
counts_first = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
counts_second = {
"competition": Competition.objects.count(),
"team": Team.objects.count(),
"season": Season.objects.count(),
"player": Player.objects.count(),
"player_season": PlayerSeason.objects.count(),
"player_stats": PlayerSeasonStats.objects.count(),
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
}
assert counts_first == counts_second
@pytest.mark.django_db
def test_batch_transactions_preserve_prior_step_progress_on_failure(settings, monkeypatch):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
def boom(*args, **kwargs):
raise RuntimeError("teams-sync-failed")
monkeypatch.setattr("apps.ingestion.services.sync._sync_teams", boom)
with pytest.raises(RuntimeError):
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
run = IngestionRun.objects.order_by("-id").first()
assert run is not None
assert run.status == IngestionRun.RunStatus.FAILED
assert Competition.objects.exists()
assert Team.objects.count() == 0
assert run.context.get("completed_steps") == ["competitions"]
assert "Unhandled ingestion error" in run.error_summary

View File

@ -1,112 +0,0 @@
import pytest
from contextlib import contextmanager
from celery.schedules import crontab
import psycopg
from django.conf import settings
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.runs import _build_ingestion_lock_key, release_ingestion_lock, try_acquire_ingestion_lock
from apps.ingestion.tasks import scheduled_provider_sync, trigger_incremental_sync
from config.celery import app as celery_app, build_periodic_schedule
@pytest.mark.django_db
def test_periodic_task_registered():
assert "apps.ingestion.tasks.scheduled_provider_sync" in celery_app.tasks
@pytest.mark.django_db
def test_build_periodic_schedule_enabled(settings):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "15 * * * *"
schedule = build_periodic_schedule()
assert "ingestion.scheduled_provider_sync" in schedule
entry = schedule["ingestion.scheduled_provider_sync"]
assert entry["task"] == "apps.ingestion.tasks.scheduled_provider_sync"
assert isinstance(entry["schedule"], crontab)
assert entry["schedule"]._orig_minute == "15"
@pytest.mark.django_db
def test_build_periodic_schedule_disabled(settings):
settings.INGESTION_SCHEDULE_ENABLED = False
assert build_periodic_schedule() == {}
@pytest.mark.django_db
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
with caplog.at_level("ERROR"):
schedule = build_periodic_schedule()
assert schedule == {}
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
@pytest.mark.django_db
def test_trigger_incremental_sync_skips_when_advisory_lock_not_acquired(settings, monkeypatch):
settings.INGESTION_PREVENT_OVERLAP = True
@contextmanager
def fake_lock(**kwargs):
yield False
monkeypatch.setattr("apps.ingestion.tasks.ingestion_advisory_lock", fake_lock)
run_id = trigger_incremental_sync.apply(
kwargs={"provider_namespace": "mvp_demo"},
).get()
skipped_run = IngestionRun.objects.get(id=run_id)
assert skipped_run.status == IngestionRun.RunStatus.CANCELED
assert "advisory lock" in skipped_run.error_summary
@pytest.mark.django_db
def test_advisory_lock_prevents_concurrent_acquisition():
provider_namespace = "mvp_demo"
job_type = IngestionRun.JobType.INCREMENTAL
lock_key = _build_ingestion_lock_key(provider_namespace=provider_namespace, job_type=job_type)
conninfo = (
f"dbname={settings.DATABASES['default']['NAME']} "
f"user={settings.DATABASES['default']['USER']} "
f"password={settings.DATABASES['default']['PASSWORD']} "
f"host={settings.DATABASES['default']['HOST']} "
f"port={settings.DATABASES['default']['PORT']}"
)
with psycopg.connect(conninfo) as external_conn:
with external_conn.cursor() as cursor:
cursor.execute("SELECT pg_advisory_lock(%s);", [lock_key])
acquired, _ = try_acquire_ingestion_lock(
provider_namespace=provider_namespace,
job_type=job_type,
)
assert acquired is False
cursor.execute("SELECT pg_advisory_unlock(%s);", [lock_key])
acquired, django_key = try_acquire_ingestion_lock(
provider_namespace=provider_namespace,
job_type=job_type,
)
assert acquired is True
release_ingestion_lock(lock_key=django_key)
@pytest.mark.django_db
def test_scheduled_provider_sync_uses_configured_job_type(settings, monkeypatch):
settings.INGESTION_SCHEDULE_JOB_TYPE = IngestionRun.JobType.FULL_SYNC
settings.INGESTION_SCHEDULE_PROVIDER_NAMESPACE = "mvp_demo"
captured = {}
def fake_runner(**kwargs):
captured.update(kwargs)
return 99
monkeypatch.setattr("apps.ingestion.tasks._run_sync_with_overlap_guard", fake_runner)
result = scheduled_provider_sync.apply().get()
assert result == 99
assert captured["provider_namespace"] == "mvp_demo"
assert captured["job_type"] == IngestionRun.JobType.FULL_SYNC

View File

@ -4,8 +4,6 @@ import pytest
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.urls import reverse from django.urls import reverse
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.sync import run_sync_job
from apps.players.models import Nationality, Player, Position, Role from apps.players.models import Nationality, Player, Position, Role
from apps.scouting.models import SavedSearch from apps.scouting.models import SavedSearch
@ -49,25 +47,3 @@ def test_saved_search_run_filters_player_results(client):
assert response.status_code == 200 assert response.status_code == 200
assert "Marco Rossi" in response.content.decode() assert "Marco Rossi" in response.content.decode()
assert "Luca Bianchi" not in response.content.decode() assert "Luca Bianchi" not in response.content.decode()
@pytest.mark.django_db
def test_ingestion_output_is_searchable_in_ui_and_api(settings, client):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
assert run.status == IngestionRun.RunStatus.SUCCESS
player = Player.objects.filter(origin_competition__isnull=False).order_by("id").first()
assert player is not None
assert player.origin_competition_id is not None
params = {"origin_competition": player.origin_competition_id}
ui_response = client.get(reverse("players:index"), data=params)
api_response = client.get(reverse("api:players"), data=params)
assert ui_response.status_code == 200
assert api_response.status_code == 200
ui_ids = {item.id for item in ui_response.context["players"]}
api_ids = {item["id"] for item in api_response.json()["results"]}
assert player.id in ui_ids
assert player.id in api_ids

143
tests/test_lba_extractor.py Normal file
View File

@ -0,0 +1,143 @@
from __future__ import annotations
import json
from datetime import date
from pathlib import Path
import pytest
from django.core.management import call_command
from apps.ingestion.extractors.lba import LBASnapshotExtractor
from apps.ingestion.extractors.base import ExtractorNormalizationError
from apps.ingestion.extractors.registry import create_extractor
def _load_fixture(path: str) -> dict:
fixture_path = Path(__file__).parent / "fixtures" / path
return json.loads(fixture_path.read_text(encoding="utf-8"))
@pytest.mark.django_db
def test_lba_extractor_normalizes_fixture_payload(tmp_path, settings):
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
fixture_payload = _load_fixture("lba/lba_players_stats.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = LBASnapshotExtractor(http_client=FakeClient())
output_path = tmp_path / "lba.json"
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
assert result.extractor_name == "lba"
assert result.source_name == "lba"
assert result.records_count == 1
payload = json.loads(output_path.read_text(encoding="utf-8"))
assert payload["source_name"] == "lba"
assert payload["snapshot_date"] == "2026-03-13"
row = payload["records"][0]
assert row["competition_external_id"] == "lba-serie-a"
assert row["competition_name"] == "Lega Basket Serie A"
assert row["team_external_id"] == "team-virtus-bologna"
assert row["team_name"] == "Virtus Bologna"
assert row["player_external_id"] == "p-001"
assert row["full_name"] == "Marco Rossi"
assert row["minutes_per_game"] == 28.3
assert row["three_pt_pct"] == 36.5
@pytest.mark.django_db
def test_lba_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = LBASnapshotExtractor(http_client=FakeClient())
output_path = tmp_path / "lba-partial.json"
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
assert result.records_count == 1
payload = json.loads(output_path.read_text(encoding="utf-8"))
row = payload["records"][0]
assert row["full_name"] == "Andrea Bianchi"
assert row["first_name"] is None
assert row["last_name"] is None
assert row["birth_date"] is None
assert row["nationality"] is None
assert row["height_cm"] is None
assert row["weight_kg"] is None
assert row["position"] is None
assert row["games_played"] == 18
@pytest.mark.django_db
def test_lba_extractor_still_fails_when_required_stats_are_missing(settings):
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
fixture_payload["data"][0].pop("ppg")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
extractor = LBASnapshotExtractor(http_client=FakeClient())
with pytest.raises(ExtractorNormalizationError):
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
@pytest.mark.django_db
def test_lba_extractor_registry_selection(settings):
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
extractor = create_extractor("lba")
assert isinstance(extractor, LBASnapshotExtractor)
@pytest.mark.django_db
def test_run_lba_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch):
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
fixture_payload = _load_fixture("lba/lba_players_stats.json")
class FakeClient:
def get_json(self, *_args, **_kwargs):
return fixture_payload
monkeypatch.setattr(
"apps.ingestion.extractors.lba.ResponsibleHttpClient",
lambda **_kwargs: FakeClient(),
)
call_command(
"run_lba_extractor",
"--output-path",
str(tmp_path),
"--snapshot-date",
"2026-03-13",
)
files = list(tmp_path.glob("lba-2026-03-13.json"))
assert len(files) == 1
payload = json.loads(files[0].read_text(encoding="utf-8"))
assert payload["source_name"] == "lba"
assert len(payload["records"]) == 1

View File

@ -4,38 +4,71 @@ import pytest
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db import IntegrityError from django.db import IntegrityError
from apps.competitions.models import Competition from apps.competitions.models import Competition, Season
from apps.ingestion.models import ImportFile, ImportRun
from apps.players.models import Nationality, Player, Position, Role from apps.players.models import Nationality, Player, Position, Role
from apps.providers.models import ExternalMapping
from apps.scouting.models import FavoritePlayer, SavedSearch from apps.scouting.models import FavoritePlayer, SavedSearch
from apps.teams.models import Team
@pytest.mark.django_db @pytest.mark.django_db
def test_player_unique_full_name_birth_date_constraint(): def test_source_uid_uniqueness_is_scoped_by_source_name():
nationality = Nationality.objects.create(name="Italy", iso2_code="IT", iso3_code="ITA") Season.objects.create(
position = Position.objects.create(code="PG", name="Point Guard") source_uid="season-2024",
role = Role.objects.create(code="playmaker", name="Playmaker") label="2024-2025",
start_date=date(2024, 10, 1),
end_date=date(2025, 6, 30),
)
Competition.objects.create(
source_name="lba",
source_uid="comp-001",
name="Serie A",
slug="serie-a",
competition_type=Competition.CompetitionType.LEAGUE,
)
Team.objects.create(source_name="lba", source_uid="team-001", name="Virtus Bologna", slug="virtus-bologna")
nationality = Nationality.objects.create(name="Spain", iso2_code="ES", iso3_code="ESP")
position = Position.objects.create(code="SF", name="Small Forward")
role = Role.objects.create(code="wing", name="Wing")
Player.objects.create( Player.objects.create(
first_name="Marco", source_name="lba",
last_name="Rossi", source_uid="player-001",
full_name="Marco Rossi", first_name="Juan",
birth_date=date(2001, 1, 1), last_name="Perez",
full_name="Juan Perez",
birth_date=date(2000, 5, 1),
nationality=nationality, nationality=nationality,
nominal_position=position, nominal_position=position,
inferred_role=role, inferred_role=role,
) )
Competition.objects.create(
source_name="bcl",
source_uid="comp-001",
name="BCL",
slug="bcl",
competition_type=Competition.CompetitionType.INTERNATIONAL,
)
Team.objects.create(source_name="bcl", source_uid="team-001", name="AEK", slug="aek")
Player.objects.create(
source_name="bcl",
source_uid="player-001",
first_name="Juan",
last_name="Perez",
full_name="Juan Perez",
birth_date=date(2000, 5, 1),
nationality=nationality,
nominal_position=position,
inferred_role=role,
)
assert Competition.objects.filter(source_uid="comp-001").count() == 2
assert Team.objects.filter(source_uid="team-001").count() == 2
assert Player.objects.filter(source_uid="player-001").count() == 2
with pytest.raises(IntegrityError): with pytest.raises(IntegrityError):
Player.objects.create( Team.objects.create(source_name="lba", source_uid="team-001", name="Another Team", slug="another-team")
first_name="Marco",
last_name="Rossi",
full_name="Marco Rossi",
birth_date=date(2001, 1, 1),
nationality=nationality,
nominal_position=position,
inferred_role=role,
)
@pytest.mark.django_db @pytest.mark.django_db
@ -50,14 +83,14 @@ def test_saved_search_unique_name_per_user_constraint():
@pytest.mark.django_db @pytest.mark.django_db
def test_favorite_unique_player_per_user_constraint(): def test_favorite_unique_player_per_user_constraint():
user = User.objects.create_user(username="u2", password="pass12345") user = User.objects.create_user(username="u2", password="pass12345")
nationality = Nationality.objects.create(name="Spain", iso2_code="ES", iso3_code="ESP") nationality = Nationality.objects.create(name="France", iso2_code="FR", iso3_code="FRA")
position = Position.objects.create(code="SF", name="Small Forward") position = Position.objects.create(code="PF", name="Power Forward")
role = Role.objects.create(code="wing", name="Wing") role = Role.objects.create(code="big", name="Big")
player = Player.objects.create( player = Player.objects.create(
first_name="Juan", first_name="Pierre",
last_name="Perez", last_name="Durand",
full_name="Juan Perez", full_name="Pierre Durand",
birth_date=date(2000, 5, 1), birth_date=date(2001, 3, 3),
nationality=nationality, nationality=nationality,
nominal_position=position, nominal_position=position,
inferred_role=role, inferred_role=role,
@ -69,24 +102,9 @@ def test_favorite_unique_player_per_user_constraint():
@pytest.mark.django_db @pytest.mark.django_db
def test_external_mapping_unique_provider_external_id_constraint(): def test_import_file_unique_path_within_import_run():
competition = Competition.objects.create( run = ImportRun.objects.create(source="daily_snapshot")
name="Liga ACB", ImportFile.objects.create(import_run=run, relative_path="players/2026-03-13.json")
slug="liga-acb",
competition_type=Competition.CompetitionType.LEAGUE,
gender=Competition.Gender.MEN,
level=1,
)
ExternalMapping.objects.create(
provider_namespace="mvp_demo",
external_id="comp-001",
content_object=competition,
)
with pytest.raises(IntegrityError): with pytest.raises(IntegrityError):
ExternalMapping.objects.create( ImportFile.objects.create(import_run=run, relative_path="players/2026-03-13.json")
provider_namespace="mvp_demo",
external_id="comp-001",
content_object=competition,
)

View File

@ -110,8 +110,6 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
nationality=nationality, nationality=nationality,
nominal_position=position, nominal_position=position,
inferred_role=role, inferred_role=role,
origin_competition=competition,
origin_team=team,
) )
player_season = PlayerSeason.objects.create( player_season = PlayerSeason.objects.create(
player=player, player=player,
@ -135,7 +133,7 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
response = client.get( response = client.get(
reverse("players:index"), reverse("players:index"),
data={ data={
"origin_competition": competition.id, "competition": competition.id,
"nominal_position": position.id, "nominal_position": position.id,
"sort": "ppg_desc", "sort": "ppg_desc",
"page_size": 20, "page_size": 20,
@ -152,7 +150,7 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
page2 = client.get( page2 = client.get(
reverse("players:index"), reverse("players:index"),
data={ data={
"origin_competition": competition.id, "competition": competition.id,
"nominal_position": position.id, "nominal_position": position.id,
"sort": "ppg_desc", "sort": "ppg_desc",
"page_size": 20, "page_size": 20,

View File

@ -127,14 +127,13 @@ def test_player_detail_page_loads(client):
height_cm=201, height_cm=201,
weight_kg=95, weight_kg=95,
) )
PlayerAlias.objects.create(player=player, alias="P. Martin")
response = client.get(reverse("players:detail", kwargs={"pk": player.pk})) response = client.get(reverse("players:detail", kwargs={"pk": player.pk}))
assert response.status_code == 200 assert response.status_code == 200
body = response.content.decode() body = response.content.decode()
assert "Paul Martin" in body assert "Paul Martin" in body
assert "P. Martin" in body assert "Summary" in body
assert "Season-by-Season Stats" in body
@pytest.mark.django_db @pytest.mark.django_db
@ -242,3 +241,44 @@ def test_player_search_results_render_best_eligible_metric_labels(client):
assert "Best Eligible PPG" in body assert "Best Eligible PPG" in body
assert "Best Eligible MPG" in body assert "Best Eligible MPG" in body
assert "best eligible values per player" in body.lower() assert "best eligible values per player" in body.lower()
@pytest.mark.django_db
def test_player_search_results_render_dash_for_missing_eligible_metrics(client):
nationality = Nationality.objects.create(name="Norway", iso2_code="NO", iso3_code="NOR")
position = Position.objects.create(code="PF", name="Power Forward")
role = Role.objects.create(code="big", name="Big")
season = Season.objects.create(label="2025-2026", start_date=date(2025, 9, 1), end_date=date(2026, 6, 30))
competition = Competition.objects.create(
name="BLNO",
slug="blno",
competition_type=Competition.CompetitionType.LEAGUE,
gender=Competition.Gender.MEN,
country=nationality,
)
team = Team.objects.create(name="Oslo", slug="oslo", country=nationality)
player = Player.objects.create(
first_name="Ole",
last_name="NoStats",
full_name="Ole NoStats",
birth_date=date(2001, 1, 1),
nationality=nationality,
nominal_position=position,
inferred_role=role,
)
PlayerSeason.objects.create(
player=player,
season=season,
team=team,
competition=competition,
games_played=0,
minutes_played=0,
)
response = client.get(reverse("players:index"), data={"season": season.id})
assert response.status_code == 200
body = response.content.decode()
assert "Ole NoStats" in body
# Missing eligible values are rendered as '-' rather than misleading zeros.
assert body.count(">-") > 0

View File

@ -1,77 +0,0 @@
import os
import pytest
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.exceptions import ProviderNotFoundError, ProviderRateLimitError
from apps.providers.registry import get_provider
@pytest.mark.django_db
def test_mvp_provider_fetch_and_search_players():
adapter = MvpDemoProviderAdapter()
players = adapter.fetch_players()
assert len(players) >= 2
results = adapter.search_players(query="luca")
assert any("Luca" in item["full_name"] for item in results)
detail = adapter.fetch_player(external_player_id="player-001")
assert detail is not None
assert detail["full_name"] == "Luca Rinaldi"
@pytest.mark.django_db
def test_mvp_provider_rate_limit_signal():
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
adapter = MvpDemoProviderAdapter()
with pytest.raises(ProviderRateLimitError):
adapter.fetch_players()
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
@pytest.mark.django_db
def test_provider_registry_resolution(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
provider = get_provider()
assert isinstance(provider, MvpDemoProviderAdapter)
with pytest.raises(ProviderNotFoundError):
get_provider("does-not-exist")
@pytest.mark.django_db
def test_demo_provider_sync_payload_uses_normalized_shape():
adapter = MvpDemoProviderAdapter()
payload = adapter.sync_all()
assert set(payload.keys()) == {
"players",
"competitions",
"teams",
"seasons",
"player_stats",
"player_careers",
"cursor",
}
assert payload["cursor"] is None
player = payload["players"][0]
assert set(player.keys()) == {
"external_id",
"first_name",
"last_name",
"full_name",
"birth_date",
"nationality",
"nominal_position",
"inferred_role",
"height_cm",
"weight_kg",
"dominant_hand",
"is_active",
"aliases",
}

View File

@ -1,263 +0,0 @@
from __future__ import annotations
import time
from typing import Any
import pytest
import requests
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
from apps.providers.clients.balldontlie import BalldontlieClient
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
from apps.providers.registry import get_default_provider_namespace, get_provider
from apps.providers.services.balldontlie_mappings import map_seasons
class _FakeResponse:
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
self.status_code = status_code
self._payload = payload or {}
self.headers = headers or {}
self.text = text
def json(self):
return self._payload
class _FakeSession:
def __init__(self, responses: list[Any]):
self._responses = responses
self.calls: list[dict[str, Any]] = []
def get(self, *args, **kwargs):
self.calls.append(kwargs)
item = self._responses.pop(0)
if isinstance(item, Exception):
raise item
return item
class _FakeBalldontlieClient:
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
if path == "/nba/v1/teams":
return {
"data": [
{
"id": 14,
"full_name": "Los Angeles Lakers",
"abbreviation": "LAL",
}
]
}
return {"data": []}
def list_paginated(
self,
path: str,
*,
params: dict[str, Any] | None = None,
per_page: int = 100,
page_limit: int = 1,
) -> list[dict[str, Any]]:
if path == "/nba/v1/players":
return [
{
"id": 237,
"first_name": "LeBron",
"last_name": "James",
"position": "F",
"team": {"id": 14},
}
]
if path == "/nba/v1/stats":
return [
{
"pts": 20,
"reb": 8,
"ast": 7,
"stl": 1,
"blk": 1,
"turnover": 3,
"fg_pct": 0.5,
"fg3_pct": 0.4,
"ft_pct": 0.9,
"min": "35:12",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
{
"pts": 30,
"reb": 10,
"ast": 9,
"stl": 2,
"blk": 0,
"turnover": 4,
"fg_pct": 0.6,
"fg3_pct": 0.5,
"ft_pct": 1.0,
"min": "33:00",
"player": {"id": 237},
"team": {"id": 14},
"game": {"season": 2024},
},
]
return []
@pytest.mark.django_db
def test_provider_registry_backend_selection(settings):
settings.PROVIDER_DEFAULT_NAMESPACE = ""
settings.PROVIDER_BACKEND = "demo"
assert get_default_provider_namespace() == "mvp_demo"
assert isinstance(get_provider(), MvpDemoProviderAdapter)
settings.PROVIDER_BACKEND = "balldontlie"
assert get_default_provider_namespace() == "balldontlie"
assert isinstance(get_provider(), BalldontlieProviderAdapter)
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
assert get_default_provider_namespace() == "mvp_demo"
@pytest.mark.django_db
def test_balldontlie_adapter_maps_payloads(settings):
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
payload = adapter.sync_all()
assert payload["competitions"][0]["external_id"] == "competition-nba"
assert payload["teams"][0]["external_id"] == "team-14"
assert payload["players"][0]["external_id"] == "player-237"
assert payload["seasons"][0]["external_id"] == "season-2024"
assert payload["player_stats"][0]["games_played"] == 2
assert payload["player_stats"][0]["points"] == 25.0
assert payload["player_stats"][0]["fg_pct"] == 55.0
player = payload["players"][0]
assert player["nationality"] is None
assert "current_team_external_id" not in player
expected_keys = {
"external_id",
"first_name",
"last_name",
"full_name",
"birth_date",
"nationality",
"nominal_position",
"inferred_role",
"height_cm",
"weight_kg",
"dominant_hand",
"is_active",
"aliases",
}
assert set(player.keys()) == expected_keys
@pytest.mark.django_db
def test_balldontlie_map_seasons_marks_latest_as_current():
seasons = map_seasons([2022, 2024, 2023, 2024])
current_rows = [row for row in seasons if row["is_current"]]
assert len(current_rows) == 1
assert current_rows[0]["external_id"] == "season-2024"
assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"]
@pytest.mark.django_db
def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings):
class _UnauthorizedStatsClient(_FakeBalldontlieClient):
def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1):
if path == "/nba/v1/stats":
raise ProviderUnauthorizedError(
provider="balldontlie",
path="stats",
status_code=401,
detail="Unauthorized",
)
return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit)
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False
adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient())
payload = adapter.sync_all()
assert payload["players"]
assert payload["teams"]
assert payload["player_stats"] == []
assert payload["player_careers"] == []
@pytest.mark.django_db
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
_FakeResponse(status_code=200, payload={"data": []}),
]
)
client = BalldontlieClient(session=session)
payload = client.get_json("players")
assert payload == {"data": []}
@pytest.mark.django_db
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
client = BalldontlieClient(session=session)
with pytest.raises(ProviderTransientError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
monkeypatch.setattr(time, "sleep", lambda _: None)
settings.PROVIDER_REQUEST_RETRIES = 2
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
session = _FakeSession(
responses=[
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
]
)
client = BalldontlieClient(session=session)
with pytest.raises(ProviderRateLimitError):
client.get_json("players")
@pytest.mark.django_db
def test_balldontlie_client_cursor_pagination(settings):
session = _FakeSession(
responses=[
_FakeResponse(
status_code=200,
payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}},
),
_FakeResponse(
status_code=200,
payload={"data": [{"id": 2}], "meta": {"next_cursor": None}},
),
]
)
client = BalldontlieClient(session=session)
rows = client.list_paginated("players", per_page=1, page_limit=5)
assert rows == [{"id": 1}, {"id": 2}]
assert "page" not in session.calls[0]["params"]
assert "cursor" not in session.calls[0]["params"]
assert session.calls[1]["params"]["cursor"] == 101

View File

@ -0,0 +1,43 @@
from __future__ import annotations
import os
import subprocess
import time
from pathlib import Path
def _repo_root() -> Path:
return Path(__file__).resolve().parent.parent
def test_scheduler_disabled_mode_stays_alive_without_exit_loop():
env = os.environ.copy()
env["SCHEDULER_ENABLED"] = "0"
env["SCHEDULER_DISABLED_SLEEP_SECONDS"] = "30"
process = subprocess.Popen(
["sh", "scripts/scheduler.sh"],
cwd=_repo_root(),
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
try:
time.sleep(1.0)
assert process.poll() is None
finally:
process.terminate()
process.wait(timeout=5)
def test_scheduler_compose_service_is_profile_gated():
compose_text = (_repo_root() / "docker-compose.yml").read_text(encoding="utf-8")
assert 'profiles: ["scheduler"]' in compose_text
assert "restart: unless-stopped" in compose_text
def test_scheduler_script_declares_idle_disabled_behavior():
scheduler_script = (_repo_root() / "scripts/scheduler.sh").read_text(encoding="utf-8")
assert "Entering idle mode" in scheduler_script
assert "SCHEDULER_DISABLED_SLEEP_SECONDS" in scheduler_script

View File

@ -15,6 +15,20 @@ def test_scouting_index_requires_login(client):
assert reverse("users:login") in response.url assert reverse("users:login") in response.url
@pytest.mark.django_db
def test_saved_search_list_requires_login(client):
response = client.get(reverse("scouting:saved_search_list"))
assert response.status_code == 302
assert reverse("users:login") in response.url
@pytest.mark.django_db
def test_watchlist_requires_login(client):
response = client.get(reverse("scouting:watchlist"))
assert response.status_code == 302
assert reverse("users:login") in response.url
@pytest.mark.django_db @pytest.mark.django_db
def test_create_saved_search_from_filters(client): def test_create_saved_search_from_filters(client):
user = User.objects.create_user(username="scout", password="pass12345") user = User.objects.create_user(username="scout", password="pass12345")
@ -60,6 +74,60 @@ def test_saved_search_run_redirects_to_players(client):
assert "q=rossi" in response.url assert "q=rossi" in response.url
@pytest.mark.django_db
def test_saved_search_update_renames_and_updates_filters(client):
user = User.objects.create_user(username="scout-update", password="pass12345")
client.force_login(user)
nationality = Nationality.objects.create(name="Germany", iso2_code="DE", iso3_code="DEU")
saved = SavedSearch.objects.create(
user=user,
name="Old Name",
filters={"q": "old", "sort": "name_asc"},
is_public=False,
)
response = client.post(
reverse("scouting:saved_search_edit", kwargs={"pk": saved.pk}),
data={
"name": "Updated Name",
"is_public": "on",
"filters_json": '{"q": "new", "nationality": %d, "sort": "ppg_desc"}' % nationality.id,
},
)
assert response.status_code == 302
saved.refresh_from_db()
assert saved.name == "Updated Name"
assert saved.is_public is True
assert saved.filters["q"] == "new"
assert saved.filters["nationality"] == nationality.id
assert saved.filters["sort"] == "ppg_desc"
@pytest.mark.django_db
def test_saved_search_delete_removes_entry(client):
user = User.objects.create_user(username="scout-delete", password="pass12345")
client.force_login(user)
saved = SavedSearch.objects.create(user=user, name="Delete Me", filters={"q": "x"})
response = client.post(reverse("scouting:saved_search_delete", kwargs={"pk": saved.pk}))
assert response.status_code == 302
assert not SavedSearch.objects.filter(pk=saved.pk).exists()
@pytest.mark.django_db
def test_saved_search_delete_htmx_renders_table(client):
user = User.objects.create_user(username="scout-delete-htmx", password="pass12345")
client.force_login(user)
saved = SavedSearch.objects.create(user=user, name="Delete HTMX", filters={"q": "x"})
response = client.post(
reverse("scouting:saved_search_delete", kwargs={"pk": saved.pk}),
HTTP_HX_REQUEST="true",
)
assert response.status_code == 200
body = response.content.decode().lower()
assert "no saved searches yet" in body
@pytest.mark.django_db @pytest.mark.django_db
def test_favorite_toggle_adds_and_removes(client): def test_favorite_toggle_adds_and_removes(client):
user = User.objects.create_user(username="scout3", password="pass12345") user = User.objects.create_user(username="scout3", password="pass12345")
@ -128,3 +196,26 @@ def test_save_search_htmx_feedback(client):
assert response.status_code == 200 assert response.status_code == 200
assert "created" in response.content.decode().lower() assert "created" in response.content.decode().lower()
@pytest.mark.django_db
def test_watchlist_page_renders_favorite_player(client):
user = User.objects.create_user(username="watch-user", password="pass12345")
client.force_login(user)
nationality = Nationality.objects.create(name="Poland", iso2_code="PL", iso3_code="POL")
position = Position.objects.create(code="C", name="Center")
role = Role.objects.create(code="rim", name="Rim Protector")
player = Player.objects.create(
first_name="Adam",
last_name="Big",
full_name="Adam Big",
birth_date=date(2001, 1, 1),
nationality=nationality,
nominal_position=position,
inferred_role=role,
)
FavoritePlayer.objects.create(user=user, player=player)
response = client.get(reverse("scouting:watchlist"))
assert response.status_code == 200
assert "Adam Big" in response.content.decode()

View File

@ -0,0 +1,15 @@
import pytest
from django.conf import settings
@pytest.mark.django_db
def test_legacy_provider_stack_disabled_by_default():
assert settings.LEGACY_PROVIDER_STACK_ENABLED is False
assert "apps.providers" not in settings.INSTALLED_APPS
@pytest.mark.django_db
def test_providers_route_not_mounted_by_default(client):
response = client.get("/providers/")
assert response.status_code == 404