Compare commits
21 Commits
main
...
feature/ho
| Author | SHA1 | Date | |
|---|---|---|---|
| 24aa827811 | |||
| 90f83091ce | |||
| f2d5e20701 | |||
| 887da3cd06 | |||
| eb6e0bf594 | |||
| b6b6753931 | |||
| 5a19587376 | |||
| 3f811827de | |||
| 48a82e812a | |||
| 6066d2a0bb | |||
| 1aad6945c7 | |||
| ad85e40688 | |||
| 20d3ee7dae | |||
| 0ed4fc57b8 | |||
| 5df973467d | |||
| 97913c4a79 | |||
| 850e4de71b | |||
| 6fc583c79f | |||
| eacff3d25e | |||
| 6aa66807e9 | |||
| bb033222e3 |
118
.env.example
118
.env.example
@ -1,87 +1,81 @@
|
|||||||
# Django
|
# HoopScout v2 runtime profile
|
||||||
DJANGO_SETTINGS_MODULE=config.settings.development
|
DJANGO_SETTINGS_MODULE=config.settings.development
|
||||||
DJANGO_ENV=development
|
DJANGO_ENV=development
|
||||||
# Required to be a strong, unique value outside development.
|
|
||||||
DJANGO_SECRET_KEY=change-me-in-production
|
|
||||||
DJANGO_DEBUG=1
|
DJANGO_DEBUG=1
|
||||||
|
DJANGO_SECRET_KEY=change-me-in-production
|
||||||
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
|
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
|
||||||
DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1
|
DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1
|
||||||
DJANGO_TIME_ZONE=UTC
|
DJANGO_TIME_ZONE=UTC
|
||||||
DJANGO_LOG_LEVEL=INFO
|
DJANGO_LOG_LEVEL=INFO
|
||||||
DJANGO_LOG_SQL=0
|
DJANGO_LOG_SQL=0
|
||||||
DJANGO_SUPERUSER_USERNAME=admin
|
|
||||||
DJANGO_SUPERUSER_EMAIL=admin@example.com
|
|
||||||
DJANGO_SUPERUSER_PASSWORD=adminpass
|
|
||||||
|
|
||||||
# Database (PostgreSQL only)
|
# Container image tags
|
||||||
|
APP_IMAGE_TAG=latest
|
||||||
|
NGINX_IMAGE_TAG=latest
|
||||||
|
# Reserved for future optional scheduler image:
|
||||||
|
# SCHEDULER_IMAGE_TAG=latest
|
||||||
|
|
||||||
|
# Web runtime behavior
|
||||||
|
GUNICORN_WORKERS=3
|
||||||
|
AUTO_APPLY_MIGRATIONS=1
|
||||||
|
AUTO_COLLECTSTATIC=1
|
||||||
|
|
||||||
|
# PostgreSQL (primary and only main database)
|
||||||
POSTGRES_DB=hoopscout
|
POSTGRES_DB=hoopscout
|
||||||
POSTGRES_USER=hoopscout
|
POSTGRES_USER=hoopscout
|
||||||
POSTGRES_PASSWORD=hoopscout
|
POSTGRES_PASSWORD=hoopscout
|
||||||
POSTGRES_HOST=postgres
|
POSTGRES_HOST=postgres
|
||||||
POSTGRES_PORT=5432
|
POSTGRES_PORT=5432
|
||||||
|
|
||||||
# Redis / Celery
|
# Development UID/GID for bind-mounted source write permissions
|
||||||
REDIS_HOST=redis
|
|
||||||
REDIS_PORT=6379
|
|
||||||
REDIS_DB=0
|
|
||||||
CELERY_BROKER_URL=redis://redis:6379/0
|
|
||||||
CELERY_RESULT_BACKEND=redis://redis:6379/0
|
|
||||||
|
|
||||||
# Runtime behavior
|
|
||||||
AUTO_APPLY_MIGRATIONS=1
|
|
||||||
AUTO_COLLECTSTATIC=1
|
|
||||||
AUTO_BUILD_TAILWIND=1
|
|
||||||
GUNICORN_WORKERS=3
|
|
||||||
# Development container UID/GID for bind-mounted source write permissions.
|
|
||||||
LOCAL_UID=1000
|
LOCAL_UID=1000
|
||||||
LOCAL_GID=1000
|
LOCAL_GID=1000
|
||||||
|
|
||||||
# Production-minded security toggles
|
# Static dataset storage (volume-backed directories)
|
||||||
DJANGO_SECURE_SSL_REDIRECT=1
|
STATIC_DATASET_INCOMING_DIR=/app/snapshots/incoming
|
||||||
DJANGO_SECURE_HSTS_SECONDS=31536000
|
STATIC_DATASET_ARCHIVE_DIR=/app/snapshots/archive
|
||||||
DJANGO_SESSION_COOKIE_SAMESITE=Lax
|
STATIC_DATASET_FAILED_DIR=/app/snapshots/failed
|
||||||
DJANGO_CSRF_COOKIE_SAMESITE=Lax
|
|
||||||
|
|
||||||
# Mandatory production variables (example values):
|
# Extractor framework (fetch -> parse -> normalize -> emit snapshot)
|
||||||
|
EXTRACTOR_USER_AGENT=HoopScoutBot/2.0 (+https://younerd.org)
|
||||||
|
EXTRACTOR_HTTP_TIMEOUT_SECONDS=15
|
||||||
|
EXTRACTOR_HTTP_RETRIES=2
|
||||||
|
EXTRACTOR_RETRY_SLEEP_SECONDS=1.0
|
||||||
|
EXTRACTOR_REQUEST_DELAY_SECONDS=0.5
|
||||||
|
EXTRACTOR_PUBLIC_JSON_URL=
|
||||||
|
EXTRACTOR_PUBLIC_SOURCE_NAME=public_json_source
|
||||||
|
EXTRACTOR_INCLUDE_RAW_PAYLOAD=0
|
||||||
|
EXTRACTOR_LBA_STATS_URL=
|
||||||
|
EXTRACTOR_LBA_SEASON_LABEL=2025-2026
|
||||||
|
EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID=lba-serie-a
|
||||||
|
EXTRACTOR_LBA_COMPETITION_NAME=Lega Basket Serie A
|
||||||
|
EXTRACTOR_BCL_STATS_URL=
|
||||||
|
EXTRACTOR_BCL_SEASON_LABEL=2025-2026
|
||||||
|
EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID=bcl
|
||||||
|
EXTRACTOR_BCL_COMPETITION_NAME=Basketball Champions League
|
||||||
|
DAILY_ORCHESTRATION_EXTRACTORS=lba,bcl
|
||||||
|
DAILY_ORCHESTRATION_INTERVAL_SECONDS=86400
|
||||||
|
|
||||||
|
# Future optional scheduler loop settings (not enabled in base v2 runtime)
|
||||||
|
SCHEDULER_ENABLED=0
|
||||||
|
SCHEDULER_INTERVAL_SECONDS=900
|
||||||
|
# When scheduler is disabled but container is started, keep it idle (avoid restart loops)
|
||||||
|
SCHEDULER_DISABLED_SLEEP_SECONDS=300
|
||||||
|
|
||||||
|
# Legacy provider-sync stack (v1-style) is disabled by default in v2.
|
||||||
|
LEGACY_PROVIDER_STACK_ENABLED=0
|
||||||
|
# Optional legacy provider settings (only when LEGACY_PROVIDER_STACK_ENABLED=1):
|
||||||
|
# PROVIDER_BACKEND=demo
|
||||||
|
# PROVIDER_DEFAULT_NAMESPACE=mvp_demo
|
||||||
|
|
||||||
|
# API safeguards (read-only API is optional)
|
||||||
|
API_THROTTLE_ANON=100/hour
|
||||||
|
API_THROTTLE_USER=1000/hour
|
||||||
|
|
||||||
|
# Production profile reminders:
|
||||||
# DJANGO_SETTINGS_MODULE=config.settings.production
|
# DJANGO_SETTINGS_MODULE=config.settings.production
|
||||||
# DJANGO_ENV=production
|
# DJANGO_ENV=production
|
||||||
# DJANGO_DEBUG=0
|
# DJANGO_DEBUG=0
|
||||||
# DJANGO_SECRET_KEY=<strong-unique-secret-at-least-32-chars>
|
# DJANGO_SECRET_KEY=<strong-unique-secret-at-least-32-chars>
|
||||||
# DJANGO_ALLOWED_HOSTS=app.example.com
|
# DJANGO_ALLOWED_HOSTS=app.example.com
|
||||||
# DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com
|
# DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com
|
||||||
|
|
||||||
# Providers / ingestion
|
|
||||||
PROVIDER_BACKEND=demo
|
|
||||||
PROVIDER_NAMESPACE_DEMO=mvp_demo
|
|
||||||
PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie
|
|
||||||
PROVIDER_DEFAULT_NAMESPACE=
|
|
||||||
PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
|
|
||||||
PROVIDER_REQUEST_RETRIES=3
|
|
||||||
PROVIDER_REQUEST_RETRY_SLEEP=1
|
|
||||||
PROVIDER_HTTP_TIMEOUT_SECONDS=10
|
|
||||||
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io
|
|
||||||
PROVIDER_BALLDONTLIE_API_KEY=
|
|
||||||
# NBA-centric MVP provider seasons to ingest (comma-separated years).
|
|
||||||
PROVIDER_BALLDONTLIE_SEASONS=2024
|
|
||||||
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
|
|
||||||
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
|
|
||||||
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
|
|
||||||
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
|
|
||||||
# When 0, a 401 on stats endpoint degrades to players/teams-only sync.
|
|
||||||
PROVIDER_BALLDONTLIE_STATS_STRICT=0
|
|
||||||
CELERY_TASK_TIME_LIMIT=1800
|
|
||||||
CELERY_TASK_SOFT_TIME_LIMIT=1500
|
|
||||||
INGESTION_SCHEDULE_ENABLED=0
|
|
||||||
# 5-field cron: minute hour day_of_month month day_of_week
|
|
||||||
# Example hourly: 0 * * * *
|
|
||||||
INGESTION_SCHEDULE_CRON=*/30 * * * *
|
|
||||||
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
|
|
||||||
INGESTION_SCHEDULE_JOB_TYPE=incremental
|
|
||||||
INGESTION_PREVENT_OVERLAP=1
|
|
||||||
INGESTION_OVERLAP_WINDOW_MINUTES=180
|
|
||||||
API_THROTTLE_ANON=100/hour
|
|
||||||
API_THROTTLE_USER=1000/hour
|
|
||||||
|
|
||||||
# Testing (used with pytest-django)
|
|
||||||
# Keep development settings for local tests unless explicitly validating production settings.
|
|
||||||
PYTEST_ADDOPTS=-q
|
|
||||||
|
|||||||
196
CONTRIBUTING.md
196
CONTRIBUTING.md
@ -1,137 +1,105 @@
|
|||||||
# Contributing to HoopScout
|
# Contributing to HoopScout v2
|
||||||
|
|
||||||
This repository follows a pragmatic GitFlow model.
|
HoopScout uses GitFlow and a pragmatic, production-minded workflow.
|
||||||
The goal is predictable releases with low process overhead.
|
|
||||||
|
|
||||||
## Branch Roles
|
## Branch Roles
|
||||||
|
|
||||||
- `main`: production-only, always releasable
|
- `main`: production-only, always releasable
|
||||||
- `develop`: integration branch for upcoming release
|
- `develop`: integration branch
|
||||||
- `feature/*`: feature work, branched from `develop`, merged into `develop`
|
- `feature/*`: feature branches from `develop`
|
||||||
- `release/*`: stabilization branch, branched from `develop`, merged into `main` and back into `develop`
|
- `release/*`: release hardening branches from `develop`
|
||||||
- `hotfix/*`: urgent production fixes, branched from `main`, merged into `main` and back into `develop`
|
- `hotfix/*`: urgent production fixes from `main`
|
||||||
|
|
||||||
## Branch Naming Convention
|
## Branch Naming
|
||||||
|
|
||||||
Use lowercase kebab-case.
|
|
||||||
|
|
||||||
|
Use lowercase kebab-case:
|
||||||
- `feature/<scope>-<short-description>`
|
- `feature/<scope>-<short-description>`
|
||||||
- `release/<major>.<minor>.<patch>`
|
- `release/<major>.<minor>.<patch>`
|
||||||
- `hotfix/<scope>-<short-description>`
|
- `hotfix/<scope>-<short-description>`
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
- `feature/hoopscout-v2-static-architecture`
|
||||||
|
- `feature/v2-snapshot-import-command`
|
||||||
|
- `release/2.0.0`
|
||||||
|
- `hotfix/nginx-proxy-timeout`
|
||||||
|
|
||||||
- `feature/search-age-height-filters`
|
## v2 Development Runtime
|
||||||
- `feature/providers-mvp-retry-logic`
|
|
||||||
- `release/0.2.0`
|
The v2 default runtime is intentionally simple:
|
||||||
- `hotfix/redis-volume-permissions`
|
- `web`
|
||||||
|
- `postgres`
|
||||||
|
- `nginx`
|
||||||
|
|
||||||
|
No Redis/Celery runtime services in the default v2 foundation.
|
||||||
|
|
||||||
|
### Start dev stack
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Start release-style stack
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verify release topology assumptions
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.release.yml config
|
||||||
|
./scripts/verify_release_topology.sh
|
||||||
|
```
|
||||||
|
|
||||||
## Day-to-Day Feature Workflow
|
## Day-to-Day Feature Workflow
|
||||||
|
|
||||||
1. Sync `develop`.
|
1. Sync `develop`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git checkout develop
|
git checkout develop
|
||||||
git pull origin develop
|
git pull origin develop
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Create branch.
|
2. Create feature branch
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git checkout -b feature/your-feature-name
|
git checkout -b feature/your-feature-name
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Implement, test, commit in small logical steps.
|
3. Implement with focused commits and tests.
|
||||||
|
4. Open PR: `feature/*` -> `develop`.
|
||||||
|
|
||||||
4. Rebase or merge latest `develop` before PR if needed.
|
## Running Tests (v2)
|
||||||
|
|
||||||
|
Runtime images are intentionally lean and may not ship `pytest`.
|
||||||
|
Use the development compose stack and install dev dependencies before running tests:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git checkout develop
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
|
||||||
git pull origin develop
|
|
||||||
git checkout feature/your-feature-name
|
|
||||||
git rebase develop
|
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Open PR: `feature/*` -> `develop`.
|
## PR Checklist
|
||||||
|
|
||||||
## Recommended Release Workflow
|
- [ ] Target branch is correct
|
||||||
|
- [ ] Scope is focused (no unrelated refactor)
|
||||||
|
- [ ] Runtime still starts with docker compose
|
||||||
|
- [ ] Tests updated/passing for changed scope
|
||||||
|
- [ ] Docs updated (`README.md`, `.env.example`, this file) when config/runtime changes
|
||||||
|
- [ ] No secrets committed
|
||||||
|
|
||||||
1. Create release branch from `develop`.
|
## v2 Foundation Rules
|
||||||
|
|
||||||
```bash
|
- Prefer management commands over distributed orchestration unless clearly justified.
|
||||||
git checkout develop
|
- Keep PostgreSQL as source of truth.
|
||||||
git pull origin develop
|
- Keep snapshot storage file-based and volume-backed.
|
||||||
git checkout -b release/0.1.0
|
- Do not introduce MongoDB or Elasticsearch as source of truth.
|
||||||
```
|
- Keep legacy provider/Celery sync code isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
|
||||||
|
- Keep runtime/docs consistency aligned with `docs/runtime-consistency-checklist.md`.
|
||||||
2. On `release/*` allow only:
|
|
||||||
- bug fixes
|
|
||||||
- docs/changelog updates
|
|
||||||
- release metadata/version updates
|
|
||||||
|
|
||||||
3. Validate release candidate in Docker.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose up -d --build
|
|
||||||
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Merge `release/*` into `main`.
|
|
||||||
5. Tag release on `main` (`v0.1.0`).
|
|
||||||
6. Merge the same `release/*` back into `develop`.
|
|
||||||
7. Delete release branch after both merges.
|
|
||||||
|
|
||||||
## Recommended Hotfix Workflow
|
|
||||||
|
|
||||||
1. Create hotfix branch from `main`.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout main
|
|
||||||
git pull origin main
|
|
||||||
git checkout -b hotfix/your-hotfix-name
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Implement minimal fix and tests.
|
|
||||||
3. Open PR: `hotfix/*` -> `main`.
|
|
||||||
4. After merge to `main`, back-merge to `develop`.
|
|
||||||
5. Tag patch release (`vX.Y.Z`).
|
|
||||||
|
|
||||||
## Pull Request Checklist
|
|
||||||
|
|
||||||
Before requesting review, confirm:
|
|
||||||
|
|
||||||
- [ ] Branch target is correct (`develop`, `main`, or release back-merge)
|
|
||||||
- [ ] Scope is focused (no unrelated refactors)
|
|
||||||
- [ ] Docker stack still starts (`docker compose up -d`)
|
|
||||||
- [ ] Tests updated and passing
|
|
||||||
- [ ] Migrations included if models changed
|
|
||||||
- [ ] Docs updated (`README`, `CONTRIBUTING`, `.env.example`) when needed
|
|
||||||
- [ ] No secrets or credentials committed
|
|
||||||
- [ ] Changelog entry added under `Unreleased`
|
|
||||||
|
|
||||||
## Issue and Feature Templates
|
|
||||||
|
|
||||||
Use repository templates in `.github/ISSUE_TEMPLATE/`:
|
|
||||||
|
|
||||||
- `bug_report.md`
|
|
||||||
- `feature_request.md`
|
|
||||||
|
|
||||||
Use `.github/PULL_REQUEST_TEMPLATE.md` for PR descriptions.
|
|
||||||
|
|
||||||
## Changelog / Release Note Convention
|
|
||||||
|
|
||||||
- Single changelog file: `CHANGELOG.md`
|
|
||||||
- Keep `Unreleased` at top
|
|
||||||
- Categorize entries under:
|
|
||||||
- `Added`
|
|
||||||
- `Changed`
|
|
||||||
- `Fixed`
|
|
||||||
- Release format:
|
|
||||||
- `## [0.1.0] - 2026-03-10`
|
|
||||||
|
|
||||||
## Repository Bootstrap Commands
|
## Repository Bootstrap Commands
|
||||||
|
|
||||||
Maintainers should run these once to start GitFlow from current `main`:
|
If `develop` is missing in a clone:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git checkout main
|
git checkout main
|
||||||
@ -139,39 +107,3 @@ git pull origin main
|
|||||||
git checkout -b develop
|
git checkout -b develop
|
||||||
git push -u origin develop
|
git push -u origin develop
|
||||||
```
|
```
|
||||||
|
|
||||||
Then start regular feature work:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout develop
|
|
||||||
git pull origin develop
|
|
||||||
git checkout -b feature/first-team-task
|
|
||||||
```
|
|
||||||
|
|
||||||
## Local Development Setup
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cp .env.example .env
|
|
||||||
docker compose up --build
|
|
||||||
```
|
|
||||||
|
|
||||||
If needed:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose exec web python manage.py migrate
|
|
||||||
docker compose exec web python manage.py createsuperuser
|
|
||||||
```
|
|
||||||
|
|
||||||
## Testing Commands
|
|
||||||
|
|
||||||
Run full suite:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
|
|
||||||
```
|
|
||||||
|
|
||||||
Run targeted modules while developing:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_players_views.py'
|
|
||||||
```
|
|
||||||
|
|||||||
12
Dockerfile
12
Dockerfile
@ -32,23 +32,19 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends libpq5 postgresql-client curl nodejs npm \
|
&& apt-get install -y --no-install-recommends libpq5 postgresql-client curl \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN groupadd --gid "${APP_GID}" "${APP_USER}" \
|
RUN groupadd --gid "${APP_GID}" "${APP_USER}" \
|
||||||
&& useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}"
|
&& useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}"
|
||||||
RUN printf '%s\n' 'export PATH="/opt/venv/bin:/home/app/.local/bin:$PATH"' > /etc/profile.d/hoopscout-path.sh
|
|
||||||
|
|
||||||
COPY --from=builder /opt/venv /opt/venv
|
COPY --from=builder /opt/venv /opt/venv
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|
||||||
RUN if [ -f package.json ]; then npm install --no-audit --no-fund; fi
|
RUN chmod +x /app/entrypoint.sh /app/scripts/scheduler.sh \
|
||||||
RUN if [ -f package.json ]; then npm run build; fi
|
&& mkdir -p /app/staticfiles /app/media /app/snapshots/incoming /app/snapshots/archive /app/snapshots/failed \
|
||||||
|
|
||||||
RUN chmod +x /app/entrypoint.sh
|
|
||||||
RUN mkdir -p /app/staticfiles /app/media /app/runtime /app/node_modules /app/static/vendor \
|
|
||||||
&& chown -R "${APP_UID}:${APP_GID}" /app /opt/venv
|
&& chown -R "${APP_UID}:${APP_GID}" /app /opt/venv
|
||||||
|
|
||||||
USER ${APP_UID}:${APP_GID}
|
USER ${APP_UID}:${APP_GID}
|
||||||
ENTRYPOINT ["/app/entrypoint.sh"]
|
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||||
CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000"]
|
CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "3", "--access-logfile", "-", "--error-logfile", "-"]
|
||||||
|
|||||||
728
README.md
728
README.md
@ -1,422 +1,466 @@
|
|||||||
# HoopScout
|
# HoopScout v2 (Foundation Reset)
|
||||||
|
|
||||||
HoopScout is a production-minded basketball scouting and player search platform.
|
HoopScout v2 is a controlled greenfield rebuild inside the existing repository.
|
||||||
The main product experience is server-rendered Django Templates with HTMX enhancements.
|
|
||||||
A minimal read-only API is included as a secondary integration surface.
|
|
||||||
|
|
||||||
## Core Stack
|
Current v2 foundation scope in this branch:
|
||||||
|
- Django + HTMX server-rendered app
|
||||||
|
- PostgreSQL as the only primary database
|
||||||
|
- nginx reverse proxy
|
||||||
|
- management-command-driven runtime operations
|
||||||
|
- static snapshot directories persisted via Docker named volumes
|
||||||
|
- strict JSON snapshot schema + import management command
|
||||||
|
- extractor framework with LBA/BCL/public JSON adapters
|
||||||
|
- daily orchestration command and optional scheduler profile
|
||||||
|
|
||||||
- Python 3.12+
|
## Runtime Architecture (v2)
|
||||||
- Django
|
|
||||||
- Django Templates + HTMX
|
|
||||||
- Tailwind CSS (CLI build pipeline)
|
|
||||||
- PostgreSQL
|
|
||||||
- Redis
|
|
||||||
- Celery + Celery Beat
|
|
||||||
- Django REST Framework (read-only API)
|
|
||||||
- pytest
|
|
||||||
- Docker / Docker Compose
|
|
||||||
- nginx
|
|
||||||
|
|
||||||
## Architecture Summary
|
Runtime services are intentionally small:
|
||||||
|
- `web` (Django/Gunicorn)
|
||||||
|
- `postgres` (primary DB)
|
||||||
|
- `nginx` (reverse proxy + static/media serving)
|
||||||
|
- optional `scheduler` profile service (runs daily extractor/import loop)
|
||||||
|
|
||||||
- Main UI: Django + HTMX (not SPA)
|
No Redis/Celery services are part of the v2 default runtime topology.
|
||||||
- Data layer: normalized domain models for players, seasons, competitions, teams, stats, scouting state
|
Legacy Celery/provider code remains in-repo but is isolated behind `LEGACY_PROVIDER_STACK_ENABLED=1`.
|
||||||
- Provider integration: adapter-based abstraction in `apps/providers`
|
Default v2 runtime keeps that stack disabled.
|
||||||
- Ingestion orchestration: `apps/ingestion` with run/error logs and Celery task execution
|
|
||||||
- Optional API: read-only DRF endpoints under `/api/`
|
|
||||||
|
|
||||||
## Repository Structure
|
## Image Strategy
|
||||||
|
|
||||||
```text
|
Compose builds and tags images as:
|
||||||
.
|
- `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}`
|
||||||
├── apps/
|
- `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}`
|
||||||
│ ├── api/
|
|
||||||
│ ├── competitions/
|
|
||||||
│ ├── core/
|
|
||||||
│ ├── ingestion/
|
|
||||||
│ ├── players/
|
|
||||||
│ ├── providers/
|
|
||||||
│ ├── scouting/
|
|
||||||
│ ├── stats/
|
|
||||||
│ ├── teams/
|
|
||||||
│ └── users/
|
|
||||||
├── config/
|
|
||||||
│ └── settings/
|
|
||||||
├── docs/
|
|
||||||
├── nginx/
|
|
||||||
├── requirements/
|
|
||||||
├── package.json
|
|
||||||
├── tailwind.config.js
|
|
||||||
├── static/
|
|
||||||
├── templates/
|
|
||||||
├── tests/
|
|
||||||
├── .github/
|
|
||||||
├── CHANGELOG.md
|
|
||||||
├── docker-compose.yml
|
|
||||||
├── Dockerfile
|
|
||||||
└── entrypoint.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quick Start
|
Reserved for future optional scheduler use:
|
||||||
|
- `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
|
||||||
|
|
||||||
1. Create local env file:
|
## Entrypoint Strategy
|
||||||
|
|
||||||
|
- `web`: `entrypoint.sh`
|
||||||
|
- waits for PostgreSQL
|
||||||
|
- optionally runs migrations/collectstatic
|
||||||
|
- ensures snapshot directories exist
|
||||||
|
- `nginx`: `nginx/entrypoint.sh`
|
||||||
|
- simple runtime entrypoint wrapper
|
||||||
|
|
||||||
|
## Compose Files
|
||||||
|
|
||||||
|
- `docker-compose.yml`: production-minded baseline runtime (immutable image filesystem)
|
||||||
|
- `docker-compose.dev.yml`: development override with source bind mount for `web`
|
||||||
|
- `docker-compose.release.yml`: production settings override (`DJANGO_SETTINGS_MODULE=config.settings.production`)
|
||||||
|
- `scripts/verify_release_topology.sh`: validates merged release compose has no source-code bind mounts for runtime services
|
||||||
|
|
||||||
|
### Start development runtime
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
```
|
|
||||||
|
|
||||||
2. Build and run services:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
|
|
||||||
```
|
|
||||||
|
|
||||||
This starts the development-oriented topology (source bind mounts enabled).
|
|
||||||
In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs).
|
|
||||||
|
|
||||||
3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose exec web python manage.py migrate
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Create a superuser:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose exec web python manage.py createsuperuser
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Open the app:
|
|
||||||
|
|
||||||
- Web: http://localhost
|
|
||||||
- Admin: http://localhost/admin/
|
|
||||||
- Health: http://localhost/health/
|
|
||||||
- API root endpoints: `/api/players/`, `/api/competitions/`, `/api/teams/`, `/api/seasons/`
|
|
||||||
|
|
||||||
## Development vs Release Compose
|
|
||||||
|
|
||||||
Base compose (`docker-compose.yml`) is release-oriented and immutable for runtime services.
|
|
||||||
Development mutability is enabled via `docker-compose.dev.yml`.
|
|
||||||
|
|
||||||
Development startup (mutable source bind mounts for `web`/`celery_*`):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
|
||||||
```
|
```
|
||||||
|
|
||||||
Development startup with Tailwind watch:
|
### Start release-style runtime
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
|
|
||||||
```
|
|
||||||
|
|
||||||
Release-style startup (immutable runtime services):
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
|
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
Optional release-style stop:
|
### Start scheduler profile (optional)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.yml -f docker-compose.release.yml down
|
docker compose --profile scheduler up -d scheduler
|
||||||
```
|
```
|
||||||
|
|
||||||
Notes:
|
For development override:
|
||||||
|
|
||||||
- In release-style mode, `web`, `celery_worker`, and `celery_beat` run from built image filesystem with no repository source bind mount.
|
```bash
|
||||||
- In development mode (with `docker-compose.dev.yml`), `web`, `celery_worker`, and `celery_beat` are mutable and bind-mount `.:/app`.
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile scheduler up -d scheduler
|
||||||
- `tailwind` is a dev-profile service and is not required for release runtime.
|
```
|
||||||
- `nginx`, `postgres`, and `redis` service naming remains unchanged.
|
|
||||||
- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`.
|
|
||||||
|
|
||||||
## Release Topology Verification
|
### Runtime Modes At A Glance
|
||||||
|
|
||||||
Inspect merged release config:
|
- development (`docker-compose.yml` + `docker-compose.dev.yml`):
|
||||||
|
- mutable source bind mounts for `web` and `scheduler`
|
||||||
|
- optimized for local iteration
|
||||||
|
- release-style (`docker-compose.yml` + `docker-compose.release.yml`):
|
||||||
|
- immutable app filesystem for runtime services
|
||||||
|
- production settings enabled for Django
|
||||||
|
- scheduler profile:
|
||||||
|
- only starts when `--profile scheduler` is used
|
||||||
|
- if started with `SCHEDULER_ENABLED=0`, scheduler stays in idle sleep mode (no restart loop exit behavior)
|
||||||
|
|
||||||
|
### Release Topology Verification
|
||||||
|
|
||||||
|
Verify merged release config and immutability:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.yml -f docker-compose.release.yml config
|
docker compose -f docker-compose.yml -f docker-compose.release.yml config
|
||||||
```
|
|
||||||
|
|
||||||
What to verify:
|
|
||||||
|
|
||||||
- `services.web.volumes` does not include a bind mount from repository path to `/app`
|
|
||||||
- `services.celery_worker.volumes` does not include a bind mount from repository path to `/app`
|
|
||||||
- `services.celery_beat.volumes` does not include a bind mount from repository path to `/app`
|
|
||||||
- persistent named volumes still exist for `postgres_data`, `static_data`, `media_data`, `runtime_data`, and `redis_data`
|
|
||||||
|
|
||||||
Automated local/CI-friendly check:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./scripts/verify_release_topology.sh
|
./scripts/verify_release_topology.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## Setup and Run Notes
|
Verification expectation:
|
||||||
|
- `web` and `scheduler` must not bind-mount repository source code in release mode.
|
||||||
|
- named volumes for DB/static/media/snapshots remain mounted.
|
||||||
|
|
||||||
- `web` service starts through `entrypoint.sh` and waits for PostgreSQL readiness.
|
## Named Volumes
|
||||||
- `web` service also builds Tailwind CSS before `collectstatic` when `AUTO_BUILD_TAILWIND=1`.
|
|
||||||
- `web`, `celery_worker`, `celery_beat`, and `tailwind` run as a non-root user inside the image.
|
|
||||||
- `celery_worker` executes background sync work.
|
|
||||||
- `celery_beat` triggers periodic provider sync (`apps.ingestion.tasks.scheduled_provider_sync`).
|
|
||||||
- `tailwind` service runs watch mode for development (`npm run dev`).
|
|
||||||
- nginx proxies web traffic and serves static/media volume mounts.
|
|
||||||
|
|
||||||
## Search Consistency Notes
|
v2 runtime uses named volumes for persistence:
|
||||||
|
- `postgres_data`
|
||||||
|
- `static_data`
|
||||||
|
- `media_data`
|
||||||
|
- `snapshots_incoming`
|
||||||
|
- `snapshots_archive`
|
||||||
|
- `snapshots_failed`
|
||||||
|
|
||||||
- The server-rendered player search page (`/players/`) and read-only players API (`/api/players/`) use the same search form and ORM filter service.
|
Development override uses separate dev-prefixed volumes to avoid ownership collisions.
|
||||||
- Sorting/filter semantics are aligned across UI, HTMX partial refreshes, and API responses.
|
|
||||||
- Search result metrics in the UI table use **best eligible semantics**:
|
|
||||||
- each metric (Games, MPG, PPG, RPG, APG) is the maximum value across eligible player-season rows
|
|
||||||
- eligibility is scoped by the active season/team/competition/stat filters
|
|
||||||
- different displayed metrics for one player can come from different eligible rows
|
|
||||||
- Metric-based API sorting (`ppg_*`, `mpg_*`) uses the same best-eligible semantics as UI search.
|
|
||||||
|
|
||||||
## Docker Volumes and Persistence
|
Snapshot volume intent:
|
||||||
|
- `snapshots_incoming`: extractor output waiting for import
|
||||||
|
- `snapshots_archive`: successfully imported files
|
||||||
|
- `snapshots_failed`: schema/processing failures for operator inspection
|
||||||
|
|
||||||
`docker-compose.yml` uses named volumes:
|
## Environment Variables
|
||||||
|
|
||||||
- `postgres_data`: PostgreSQL persistent database
|
Use `.env.example` as the source of truth.
|
||||||
- `static_data`: collected static assets
|
|
||||||
- `media_data`: user/provider media artifacts
|
|
||||||
- `runtime_data`: app runtime files (e.g., celery beat schedule)
|
|
||||||
- `redis_data`: Redis persistence (`/data` for RDB/AOF files)
|
|
||||||
- `node_modules_data`: Node modules cache for Tailwind builds in development override
|
|
||||||
|
|
||||||
This keeps persistent state outside container lifecycles.
|
Core groups:
|
||||||
|
- Django runtime/security vars
|
||||||
|
- PostgreSQL connection vars
|
||||||
|
- image tag vars (`APP_IMAGE_TAG`, `NGINX_IMAGE_TAG`)
|
||||||
|
- snapshot directory vars (`STATIC_DATASET_*`)
|
||||||
|
- optional future scheduler vars (`SCHEDULER_*`)
|
||||||
|
- daily orchestration vars (`DAILY_ORCHESTRATION_*`)
|
||||||
|
- optional legacy provider-sync toggle (`LEGACY_PROVIDER_STACK_ENABLED`)
|
||||||
|
|
||||||
In release-style mode, these volumes remain the persistence layer:
|
Operational reference:
|
||||||
|
- `docs/runtime-consistency-checklist.md`
|
||||||
|
|
||||||
- `postgres_data` for database state
|
## Snapshot Storage Convention
|
||||||
- `static_data` for collected static assets served by nginx
|
|
||||||
- `media_data` for uploaded/provider media
|
|
||||||
- `runtime_data` for Celery beat schedule/runtime files
|
|
||||||
- `redis_data` for Redis persistence
|
|
||||||
|
|
||||||
## Migrations
|
Snapshot files are expected under:
|
||||||
|
- incoming: `/app/snapshots/incoming`
|
||||||
|
- archive: `/app/snapshots/archive`
|
||||||
|
- failed: `/app/snapshots/failed`
|
||||||
|
|
||||||
Create migration files:
|
Configured via environment:
|
||||||
|
- `STATIC_DATASET_INCOMING_DIR`
|
||||||
|
- `STATIC_DATASET_ARCHIVE_DIR`
|
||||||
|
- `STATIC_DATASET_FAILED_DIR`
|
||||||
|
|
||||||
```bash
|
## Snapshot JSON Schema (MVP)
|
||||||
docker compose exec web python manage.py makemigrations
|
|
||||||
|
Each file must be a JSON object:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"source_name": "official_site_feed",
|
||||||
|
"snapshot_date": "2026-03-13",
|
||||||
|
"records": [
|
||||||
|
{
|
||||||
|
"competition_external_id": "comp-nba",
|
||||||
|
"competition_name": "NBA",
|
||||||
|
"season": "2025-2026",
|
||||||
|
"team_external_id": "team-lal",
|
||||||
|
"team_name": "Los Angeles Lakers",
|
||||||
|
"player_external_id": "player-23",
|
||||||
|
"full_name": "LeBron James",
|
||||||
|
"first_name": "LeBron",
|
||||||
|
"last_name": "James",
|
||||||
|
"birth_date": "1984-12-30",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 206,
|
||||||
|
"weight_kg": 113,
|
||||||
|
"position": "SF",
|
||||||
|
"role": "Primary Creator",
|
||||||
|
"games_played": 60,
|
||||||
|
"minutes_per_game": 34.5,
|
||||||
|
"points_per_game": 25.4,
|
||||||
|
"rebounds_per_game": 7.2,
|
||||||
|
"assists_per_game": 8.1,
|
||||||
|
"steals_per_game": 1.3,
|
||||||
|
"blocks_per_game": 0.7,
|
||||||
|
"turnovers_per_game": 3.2,
|
||||||
|
"fg_pct": 51.1,
|
||||||
|
"three_pt_pct": 38.4,
|
||||||
|
"ft_pct": 79.8,
|
||||||
|
"source_metadata": {},
|
||||||
|
"raw_payload": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source_metadata": {},
|
||||||
|
"raw_payload": {}
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Apply migrations:
|
Validation is strict:
|
||||||
|
- unknown fields are rejected
|
||||||
|
- required fields must exist:
|
||||||
|
- `competition_external_id`, `competition_name`, `season`
|
||||||
|
- `team_external_id`, `team_name`
|
||||||
|
- `player_external_id`, `full_name`
|
||||||
|
- core stats (`games_played`, `minutes_per_game`, `points_per_game`, `rebounds_per_game`, `assists_per_game`, `steals_per_game`, `blocks_per_game`, `turnovers_per_game`, `fg_pct`, `three_pt_pct`, `ft_pct`)
|
||||||
|
- optional player bio/physical fields:
|
||||||
|
- `first_name`, `last_name`, `birth_date`, `nationality`, `height_cm`, `weight_kg`, `position`, `role`
|
||||||
|
- when `birth_date` is provided it must be `YYYY-MM-DD`
|
||||||
|
- numeric fields must be numeric
|
||||||
|
- invalid files are moved to failed directory
|
||||||
|
|
||||||
|
Importer enrichment note:
|
||||||
|
- `full_name` is source truth for identity display
|
||||||
|
- `first_name` / `last_name` are optional and may be absent in public snapshots
|
||||||
|
- when both are missing, importer may derive them from `full_name` as a best-effort enrichment step
|
||||||
|
- this enrichment is convenience-only and does not override source truth semantics
|
||||||
|
|
||||||
|
## Import Command
|
||||||
|
|
||||||
|
Run import:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose exec web python manage.py migrate
|
docker compose exec web python manage.py import_snapshots
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Run end-to-end daily orchestration manually (extractors -> import):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_daily_orchestration
|
||||||
|
```
|
||||||
|
|
||||||
|
Command behavior:
|
||||||
|
- scans `STATIC_DATASET_INCOMING_DIR` for `.json` files
|
||||||
|
- validates strict schema
|
||||||
|
- computes SHA-256 checksum
|
||||||
|
- creates `ImportRun` + `ImportFile` records
|
||||||
|
- upserts relational entities (`Competition`, `Season`, `Team`, `Player`, `PlayerSeason`, `PlayerSeasonStats`)
|
||||||
|
- skips duplicate content using checksum
|
||||||
|
- moves valid files to archive
|
||||||
|
- moves invalid files to failed
|
||||||
|
|
||||||
|
Import lifecycle summary:
|
||||||
|
1. extractor writes normalized snapshots to `incoming`
|
||||||
|
2. `import_snapshots` validates + upserts to PostgreSQL
|
||||||
|
3. imported files move to `archive`
|
||||||
|
4. invalid files move to `failed` with error details in `ImportFile`
|
||||||
|
|
||||||
|
### Source Identity Namespacing
|
||||||
|
|
||||||
|
Raw external IDs are **not globally unique** across basketball data sources. HoopScout v2 uses a namespaced identity for imported entities:
|
||||||
|
- `Competition`: unique key is `(source_name, source_uid)`
|
||||||
|
- `Team`: unique key is `(source_name, source_uid)`
|
||||||
|
- `Player`: unique key is `(source_name, source_uid)`
|
||||||
|
|
||||||
|
`source_uid` values from different sources (for example `lba` and `bcl`) can safely overlap without overwriting each other.
|
||||||
|
|
||||||
|
Import history is visible in Django admin:
|
||||||
|
- `ImportRun`
|
||||||
|
- `ImportFile`
|
||||||
|
|
||||||
|
## Extractor Framework (v2)
|
||||||
|
|
||||||
|
v2 keeps extraction and import as two separate steps:
|
||||||
|
|
||||||
|
1. **Extractors** fetch public source content and emit normalized JSON snapshots.
|
||||||
|
2. **Importer** (`import_snapshots`) validates and upserts those snapshots into PostgreSQL.
|
||||||
|
|
||||||
|
Extractor pipeline:
|
||||||
|
- `fetch` (public endpoint/page requests with conservative HTTP behavior)
|
||||||
|
- `parse` (source-specific structure)
|
||||||
|
- `normalize` (map to HoopScout snapshot schema)
|
||||||
|
- `emit` (write JSON file to incoming directory or custom path)
|
||||||
|
|
||||||
|
Built-in extractor in this phase:
|
||||||
|
- `public_json_snapshot` (generic JSON feed extractor for MVP usage)
|
||||||
|
- `lba` (Lega Basket Serie A MVP extractor)
|
||||||
|
- `bcl` (Basketball Champions League MVP extractor)
|
||||||
|
|
||||||
|
Run extractor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_extractor public_json_snapshot
|
||||||
|
```
|
||||||
|
|
||||||
|
Run extractor with explicit output path (debugging):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_extractor public_json_snapshot --output-path /app/snapshots/incoming
|
||||||
|
```
|
||||||
|
|
||||||
|
Dry-run validation (no file write):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_extractor public_json_snapshot --dry-run
|
||||||
|
```
|
||||||
|
|
||||||
|
Run only the LBA extractor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_lba_extractor
|
||||||
|
```
|
||||||
|
|
||||||
|
Run only the BCL extractor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec web python manage.py run_bcl_extractor
|
||||||
|
```
|
||||||
|
|
||||||
|
### Daily orchestration behavior
|
||||||
|
|
||||||
|
`run_daily_orchestration` performs:
|
||||||
|
1. run configured extractors in order from `DAILY_ORCHESTRATION_EXTRACTORS`
|
||||||
|
2. write snapshots to incoming dir
|
||||||
|
3. run `import_snapshots`
|
||||||
|
4. log extractor/import summary
|
||||||
|
|
||||||
|
Extractor environment variables:
|
||||||
|
- `EXTRACTOR_USER_AGENT`
|
||||||
|
- `EXTRACTOR_HTTP_TIMEOUT_SECONDS`
|
||||||
|
- `EXTRACTOR_HTTP_RETRIES`
|
||||||
|
- `EXTRACTOR_RETRY_SLEEP_SECONDS`
|
||||||
|
- `EXTRACTOR_REQUEST_DELAY_SECONDS`
|
||||||
|
- `EXTRACTOR_PUBLIC_JSON_URL`
|
||||||
|
- `EXTRACTOR_PUBLIC_SOURCE_NAME`
|
||||||
|
- `EXTRACTOR_INCLUDE_RAW_PAYLOAD`
|
||||||
|
- `EXTRACTOR_LBA_STATS_URL`
|
||||||
|
- `EXTRACTOR_LBA_SEASON_LABEL`
|
||||||
|
- `EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID`
|
||||||
|
- `EXTRACTOR_LBA_COMPETITION_NAME`
|
||||||
|
- `EXTRACTOR_BCL_STATS_URL`
|
||||||
|
- `EXTRACTOR_BCL_SEASON_LABEL`
|
||||||
|
- `EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID`
|
||||||
|
- `EXTRACTOR_BCL_COMPETITION_NAME`
|
||||||
|
- `DAILY_ORCHESTRATION_EXTRACTORS`
|
||||||
|
- `DAILY_ORCHESTRATION_INTERVAL_SECONDS`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- extraction is intentionally low-frequency and uses retries conservatively
|
||||||
|
- only public pages/endpoints should be targeted
|
||||||
|
- emitted snapshots must match the same schema consumed by `import_snapshots`
|
||||||
|
- `public_json_snapshot` uses the same required-vs-optional field contract as `SnapshotSchemaValidator` (no stricter extractor-only required bio/physical fields)
|
||||||
|
- optional scheduler container runs `scripts/scheduler.sh` loop using:
|
||||||
|
- image: `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
|
||||||
|
- command: `/app/scripts/scheduler.sh`
|
||||||
|
- interval: `DAILY_ORCHESTRATION_INTERVAL_SECONDS`
|
||||||
|
- disabled idle interval: `SCHEDULER_DISABLED_SLEEP_SECONDS`
|
||||||
|
|
||||||
|
### Scheduler entrypoint/runtime expectations
|
||||||
|
|
||||||
|
- scheduler uses the same app image and base `entrypoint.sh` as web
|
||||||
|
- scheduler requires database connectivity and snapshot volumes
|
||||||
|
- scheduler is disabled unless:
|
||||||
|
- compose `scheduler` profile is started
|
||||||
|
- `SCHEDULER_ENABLED=1`
|
||||||
|
- if scheduler service is started while disabled (`SCHEDULER_ENABLED=0`), it does not exit; it enters idle sleep mode to avoid restart loops with `restart: unless-stopped`
|
||||||
|
- this keeps default runtime simple while supporting daily automation
|
||||||
|
|
||||||
|
### LBA extractor assumptions and limitations (MVP)
|
||||||
|
|
||||||
|
- `source_name` is fixed to `lba`
|
||||||
|
- the extractor expects one stable public JSON payload that includes player/team/stat rows
|
||||||
|
- competition is configured by environment and emitted as:
|
||||||
|
- `competition_external_id` from `EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID`
|
||||||
|
- `competition_name` from `EXTRACTOR_LBA_COMPETITION_NAME`
|
||||||
|
- season is configured by `EXTRACTOR_LBA_SEASON_LABEL`
|
||||||
|
- parser supports payload keys: `records`, `data`, `players`, `items`
|
||||||
|
- normalization supports nested `player` and `team` objects with common stat aliases (`gp/mpg/ppg/rpg/apg/spg/bpg/tov`)
|
||||||
|
- public-source player bio/physical fields are often incomplete; extractor allows them to be missing and emits `null` for optional fields
|
||||||
|
- no live HTTP calls in tests; tests use fixtures/mocked responses only
|
||||||
|
|
||||||
|
### BCL extractor assumptions and limitations (MVP)
|
||||||
|
|
||||||
|
- `source_name` is fixed to `bcl`
|
||||||
|
- the extractor expects one stable public JSON payload that includes player/team/stat rows
|
||||||
|
- competition is configured by environment and emitted as:
|
||||||
|
- `competition_external_id` from `EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID`
|
||||||
|
- `competition_name` from `EXTRACTOR_BCL_COMPETITION_NAME`
|
||||||
|
- season is configured by `EXTRACTOR_BCL_SEASON_LABEL`
|
||||||
|
- parser supports payload keys: `records`, `data`, `players`, `items`
|
||||||
|
- normalization supports nested `player` and `team` objects with common stat aliases (`gp/mpg/ppg/rpg/apg/spg/bpg/tov`)
|
||||||
|
- public-source player bio/physical fields are often incomplete; extractor allows them to be missing and emits `null` for optional fields
|
||||||
|
- no live HTTP calls in tests; tests use fixtures/mocked responses only
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
Run all tests:
|
- runtime `web` image stays lean and may not include `pytest` tooling
|
||||||
|
- runtime containers (`web`/`nginx`/`scheduler`) are for serving/orchestration, not preloaded test tooling
|
||||||
|
- run tests with the development compose stack (or a dedicated test image/profile) and install dev dependencies first
|
||||||
|
- local example (one-off):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
|
||||||
```
|
```
|
||||||
|
|
||||||
Run a focused module:
|
## Migration and Superuser Commands
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_api.py'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Frontend Assets (Tailwind)
|
|
||||||
|
|
||||||
Build Tailwind once:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build'
|
|
||||||
```
|
|
||||||
|
|
||||||
If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo chown -R "$(id -u):$(id -g)" static
|
|
||||||
```
|
|
||||||
|
|
||||||
Run Tailwind in watch mode during development:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up tailwind
|
|
||||||
```
|
|
||||||
|
|
||||||
Source CSS lives in `static/src/tailwind.css` and compiles to `static/css/main.css`.
|
|
||||||
HTMX is served from local static assets (`static/vendor/htmx.min.js`) instead of a CDN dependency.
|
|
||||||
|
|
||||||
## Production Configuration
|
|
||||||
|
|
||||||
Use production settings in deployed environments:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
DJANGO_SETTINGS_MODULE=config.settings.production
|
|
||||||
DJANGO_DEBUG=0
|
|
||||||
DJANGO_ENV=production
|
|
||||||
```
|
|
||||||
|
|
||||||
When `DJANGO_DEBUG=0`, startup fails fast unless:
|
|
||||||
|
|
||||||
- `DJANGO_SECRET_KEY` is a real non-default value
|
|
||||||
- `DJANGO_ALLOWED_HOSTS` is set
|
|
||||||
- `DJANGO_CSRF_TRUSTED_ORIGINS` is set (for production settings)
|
|
||||||
|
|
||||||
Additional production safety checks:
|
|
||||||
|
|
||||||
- `DJANGO_SECRET_KEY` must be strong and non-default in non-development environments
|
|
||||||
- `DJANGO_ALLOWED_HOSTS` must not contain localhost-style values
|
|
||||||
- `DJANGO_CSRF_TRUSTED_ORIGINS` must be explicit HTTPS origins only (no localhost/http)
|
|
||||||
|
|
||||||
Production settings enable hardened defaults such as:
|
|
||||||
|
|
||||||
- secure cookies
|
|
||||||
- HSTS
|
|
||||||
- security headers
|
|
||||||
- `ManifestStaticFilesStorage` for static asset integrity/versioning
|
|
||||||
|
|
||||||
### Production Configuration Checklist
|
|
||||||
|
|
||||||
- `DJANGO_SETTINGS_MODULE=config.settings.production`
|
|
||||||
- `DJANGO_ENV=production`
|
|
||||||
- `DJANGO_DEBUG=0`
|
|
||||||
- strong `DJANGO_SECRET_KEY` (unique, non-default, >= 32 chars)
|
|
||||||
- explicit `DJANGO_ALLOWED_HOSTS` (no localhost values)
|
|
||||||
- explicit `DJANGO_CSRF_TRUSTED_ORIGINS` with HTTPS origins only
|
|
||||||
- `DJANGO_SECURE_SSL_REDIRECT=1` and `DJANGO_SECURE_HSTS_SECONDS` set appropriately
|
|
||||||
|
|
||||||
## Superuser and Auth
|
|
||||||
|
|
||||||
Create superuser:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
docker compose exec web python manage.py migrate
|
||||||
docker compose exec web python manage.py createsuperuser
|
docker compose exec web python manage.py createsuperuser
|
||||||
```
|
```
|
||||||
|
|
||||||
Default auth routes:
|
## Health Endpoints
|
||||||
|
|
||||||
- Signup: `/users/signup/`
|
- app health: `/health/`
|
||||||
- Login: `/users/login/`
|
- nginx healthcheck proxies `/health/` to `web`
|
||||||
- Logout: `/users/logout/`
|
|
||||||
|
|
||||||
## Ingestion and Manual Sync
|
## Player Search (v2)
|
||||||
|
|
||||||
### Trigger via Django Admin
|
Public player search is server-rendered (Django templates) with HTMX partial updates.
|
||||||
|
|
||||||
- Open `/admin/` -> `IngestionRun`
|
Supported filters:
|
||||||
- Use admin actions:
|
- free text name search
|
||||||
- `Queue full sync (default provider)`
|
- nominal position, inferred role
|
||||||
- `Queue incremental sync (default provider)`
|
- competition, season, team
|
||||||
- `Retry selected ingestion runs`
|
- nationality
|
||||||
|
- age, height, weight ranges
|
||||||
|
- stats thresholds: games, MPG, PPG, RPG, APG, SPG, BPG, TOV, FG%, 3P%, FT%
|
||||||
|
|
||||||
### Trigger from shell (manual)
|
Search correctness:
|
||||||
|
- combined team/competition/season/stat filters are applied to the same `PlayerSeason` context (no cross-row false positives)
|
||||||
|
- filtering happens at database level with Django ORM
|
||||||
|
|
||||||
```bash
|
Search metric semantics:
|
||||||
docker compose exec web python manage.py shell
|
- result columns are labeled as **Best Eligible**
|
||||||
```
|
- each displayed metric is `MAX` over eligible player-season rows for that metric in the current filter context
|
||||||
|
- different metric columns for one player may come from different eligible seasons
|
||||||
|
- when no eligible value exists for a metric in the current context, the UI shows `-`
|
||||||
|
|
||||||
```python
|
### API Search Metric Transparency
|
||||||
from apps.ingestion.tasks import trigger_full_sync
|
|
||||||
trigger_full_sync.delay(provider_namespace="balldontlie")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Logs and diagnostics
|
`GET /api/players/` now exposes sortable metric fields directly in each list row:
|
||||||
|
- `ppg_value`
|
||||||
|
- `mpg_value`
|
||||||
|
|
||||||
- Run-level status/counters: `IngestionRun`
|
These fields use the same **best eligible** semantics as UI search. They are computed from eligible
|
||||||
- Structured error records: `IngestionError`
|
player-season rows in the current filter context and may be `null` when no eligible data exists.
|
||||||
- Provider entity mappings + diagnostic payload snippets: `ExternalMapping`
|
|
||||||
- `IngestionRun.error_summary` captures top-level failure/partial-failure context
|
|
||||||
|
|
||||||
### Scheduled sync via Celery Beat
|
API list responses also include:
|
||||||
|
- `sort`: effective sort key applied
|
||||||
|
- `metric_sort_keys`: metric-based sort keys currently supported
|
||||||
|
- `metric_semantics`: plain-language metric contract used for sorting/interpretation
|
||||||
|
|
||||||
Configure scheduled sync through environment variables:
|
Pagination and sorting:
|
||||||
|
- querystring is preserved
|
||||||
|
- HTMX navigation keeps URL state in sync with current filters/page/sort
|
||||||
|
|
||||||
- `INGESTION_SCHEDULE_ENABLED` (`0`/`1`)
|
## Saved Searches and Watchlist (v2)
|
||||||
- `INGESTION_SCHEDULE_CRON` (5-field cron expression, default `*/30 * * * *`)
|
|
||||||
- `INGESTION_SCHEDULE_PROVIDER_NAMESPACE` (optional; falls back to default provider namespace)
|
|
||||||
- `INGESTION_SCHEDULE_JOB_TYPE` (`incremental` or `full_sync`)
|
|
||||||
- `INGESTION_PREVENT_OVERLAP` (`0`/`1`) to skip obvious overlapping runs
|
|
||||||
- `INGESTION_OVERLAP_WINDOW_MINUTES` overlap guard window
|
|
||||||
|
|
||||||
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
|
Authenticated users can:
|
||||||
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
|
- save current search filters from the player search page
|
||||||
|
- re-run saved searches from scouting pages
|
||||||
|
- rename/update/delete saved searches
|
||||||
|
- update saved search filters via structured JSON in the edit screen
|
||||||
|
- add/remove favorite players inline (HTMX-friendly) and browse watchlist
|
||||||
|
|
||||||
Valid cron examples:
|
## GitFlow
|
||||||
|
|
||||||
- `*/30 * * * *` every 30 minutes
|
Required branch model:
|
||||||
- `0 * * * *` hourly
|
- `main`: production
|
||||||
- `15 2 * * *` daily at 02:15
|
- `develop`: integration
|
||||||
|
- `feature/*`, `release/*`, `hotfix/*`
|
||||||
|
|
||||||
Failure behavior for invalid cron values:
|
This v2 work branch is:
|
||||||
|
- `feature/hoopscout-v2-static-architecture`
|
||||||
|
|
||||||
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
|
## Notes on Legacy Layers
|
||||||
- periodic ingestion task is disabled until cron is fixed
|
|
||||||
- an error is logged at startup indicating the invalid schedule value
|
|
||||||
|
|
||||||
## Provider Backend Selection
|
Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation.
|
||||||
|
They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks.
|
||||||
Provider backend is selected via environment variables:
|
By default:
|
||||||
|
- `apps.providers` is not installed
|
||||||
- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`)
|
- `/providers/` routes are not mounted
|
||||||
- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`)
|
- legacy provider-specific settings are not required
|
||||||
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
|
|
||||||
|
|
||||||
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
|
|
||||||
The adapter follows the published balldontlie OpenAPI contract: server `https://api.balldontlie.io`, NBA endpoints under `/nba/v1/*`, cursor pagination via `meta.next_cursor`, and `stats` ingestion filtered by `seasons[]`.
|
|
||||||
Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized.
|
|
||||||
|
|
||||||
Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md).
|
|
||||||
|
|
||||||
## GitFlow Workflow
|
|
||||||
|
|
||||||
GitFlow is required in this repository:
|
|
||||||
|
|
||||||
- `main`: production branch
|
|
||||||
- `develop`: integration branch
|
|
||||||
- `feature/*`: new feature branches from `develop`
|
|
||||||
- `release/*`: release hardening branches from `develop`
|
|
||||||
- `hotfix/*`: urgent production fixes from `main`
|
|
||||||
|
|
||||||
Read full details in [CONTRIBUTING.md](CONTRIBUTING.md) and [docs/workflow.md](docs/workflow.md).
|
|
||||||
|
|
||||||
### Repository Bootstrap Commands
|
|
||||||
|
|
||||||
Run these from the current `main` branch to initialize local GitFlow usage:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout main
|
|
||||||
git pull origin main
|
|
||||||
git checkout -b develop
|
|
||||||
git push -u origin develop
|
|
||||||
```
|
|
||||||
|
|
||||||
Start a feature branch:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout develop
|
|
||||||
git pull origin develop
|
|
||||||
git checkout -b feature/player-search-tuning
|
|
||||||
```
|
|
||||||
|
|
||||||
Start a release branch:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout develop
|
|
||||||
git pull origin develop
|
|
||||||
git checkout -b release/0.1.0
|
|
||||||
```
|
|
||||||
|
|
||||||
Start a hotfix branch:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git checkout main
|
|
||||||
git pull origin main
|
|
||||||
git checkout -b hotfix/fix-redis-persistence
|
|
||||||
```
|
|
||||||
|
|
||||||
## Release Notes / Changelog Convention
|
|
||||||
|
|
||||||
- Use [CHANGELOG.md](CHANGELOG.md) with an `Unreleased` section.
|
|
||||||
- For each merged PR, add short entries under:
|
|
||||||
- `Added`
|
|
||||||
- `Changed`
|
|
||||||
- `Fixed`
|
|
||||||
- On release, move `Unreleased` items to a dated version section (`[x.y.z] - YYYY-MM-DD`).
|
|
||||||
|
|||||||
@ -45,6 +45,8 @@ class PlayerListSerializer(serializers.ModelSerializer):
|
|||||||
inferred_role = serializers.CharField(source="inferred_role.name", allow_null=True)
|
inferred_role = serializers.CharField(source="inferred_role.name", allow_null=True)
|
||||||
origin_competition = serializers.CharField(source="origin_competition.name", allow_null=True)
|
origin_competition = serializers.CharField(source="origin_competition.name", allow_null=True)
|
||||||
origin_team = serializers.CharField(source="origin_team.name", allow_null=True)
|
origin_team = serializers.CharField(source="origin_team.name", allow_null=True)
|
||||||
|
ppg_value = serializers.SerializerMethodField()
|
||||||
|
mpg_value = serializers.SerializerMethodField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Player
|
model = Player
|
||||||
@ -59,10 +61,20 @@ class PlayerListSerializer(serializers.ModelSerializer):
|
|||||||
"origin_team",
|
"origin_team",
|
||||||
"height_cm",
|
"height_cm",
|
||||||
"weight_kg",
|
"weight_kg",
|
||||||
|
"ppg_value",
|
||||||
|
"mpg_value",
|
||||||
"dominant_hand",
|
"dominant_hand",
|
||||||
"is_active",
|
"is_active",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_ppg_value(self, obj):
|
||||||
|
value = getattr(obj, "ppg_value", None)
|
||||||
|
return str(value) if value is not None else None
|
||||||
|
|
||||||
|
def get_mpg_value(self, obj):
|
||||||
|
value = getattr(obj, "mpg_value", None)
|
||||||
|
return float(value) if value is not None else None
|
||||||
|
|
||||||
|
|
||||||
class PlayerAliasSerializer(serializers.Serializer):
|
class PlayerAliasSerializer(serializers.Serializer):
|
||||||
alias = serializers.CharField()
|
alias = serializers.CharField()
|
||||||
|
|||||||
@ -9,6 +9,7 @@ from apps.players.forms import PlayerSearchForm
|
|||||||
from apps.players.models import Player
|
from apps.players.models import Player
|
||||||
from apps.players.services.search import (
|
from apps.players.services.search import (
|
||||||
METRIC_SORT_KEYS,
|
METRIC_SORT_KEYS,
|
||||||
|
SEARCH_METRIC_SEMANTICS_TEXT,
|
||||||
annotate_player_metrics,
|
annotate_player_metrics,
|
||||||
apply_sorting,
|
apply_sorting,
|
||||||
base_player_queryset,
|
base_player_queryset,
|
||||||
@ -67,15 +68,18 @@ class PlayerSearchApiView(ReadOnlyBaseAPIView, generics.ListAPIView):
|
|||||||
form = self.get_search_form()
|
form = self.get_search_form()
|
||||||
if form.is_bound and not form.is_valid():
|
if form.is_bound and not form.is_valid():
|
||||||
return self._validation_error_response()
|
return self._validation_error_response()
|
||||||
return super().list(request, *args, **kwargs)
|
response = super().list(request, *args, **kwargs)
|
||||||
|
response.data["sort"] = form.cleaned_data.get("sort", "name_asc")
|
||||||
|
response.data["metric_semantics"] = SEARCH_METRIC_SEMANTICS_TEXT
|
||||||
|
response.data["metric_sort_keys"] = sorted(METRIC_SORT_KEYS)
|
||||||
|
return response
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
form = self.get_search_form()
|
form = self.get_search_form()
|
||||||
queryset = base_player_queryset()
|
queryset = base_player_queryset()
|
||||||
queryset = filter_players(queryset, form.cleaned_data)
|
queryset = filter_players(queryset, form.cleaned_data)
|
||||||
sort_key = form.cleaned_data.get("sort", "name_asc")
|
sort_key = form.cleaned_data.get("sort", "name_asc")
|
||||||
if sort_key in METRIC_SORT_KEYS:
|
queryset = annotate_player_metrics(queryset, form.cleaned_data)
|
||||||
queryset = annotate_player_metrics(queryset, form.cleaned_data)
|
|
||||||
queryset = apply_sorting(queryset, sort_key)
|
queryset = apply_sorting(queryset, sort_key)
|
||||||
return queryset
|
return queryset
|
||||||
|
|
||||||
|
|||||||
@ -5,16 +5,16 @@ from .models import Competition, Season, TeamSeason
|
|||||||
|
|
||||||
@admin.register(Competition)
|
@admin.register(Competition)
|
||||||
class CompetitionAdmin(admin.ModelAdmin):
|
class CompetitionAdmin(admin.ModelAdmin):
|
||||||
list_display = ("name", "competition_type", "gender", "country", "is_active")
|
list_display = ("name", "source_name", "source_uid", "competition_type", "gender", "country", "is_active")
|
||||||
list_filter = ("competition_type", "gender", "country", "is_active")
|
list_filter = ("competition_type", "gender", "country", "is_active")
|
||||||
search_fields = ("name", "slug")
|
search_fields = ("name", "slug", "source_name", "source_uid")
|
||||||
|
|
||||||
|
|
||||||
@admin.register(Season)
|
@admin.register(Season)
|
||||||
class SeasonAdmin(admin.ModelAdmin):
|
class SeasonAdmin(admin.ModelAdmin):
|
||||||
list_display = ("label", "start_date", "end_date", "is_current")
|
list_display = ("label", "source_uid", "start_date", "end_date", "is_current")
|
||||||
list_filter = ("is_current",)
|
list_filter = ("is_current",)
|
||||||
search_fields = ("label",)
|
search_fields = ("label", "source_uid")
|
||||||
|
|
||||||
|
|
||||||
@admin.register(TeamSeason)
|
@admin.register(TeamSeason)
|
||||||
|
|||||||
@ -0,0 +1,32 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('competitions', '0002_initial'),
|
||||||
|
('players', '0005_player_weight_index'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='competition',
|
||||||
|
name='source_uid',
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='season',
|
||||||
|
name='source_uid',
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='competition',
|
||||||
|
index=models.Index(fields=['source_uid'], name='competition_source__1c043a_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='season',
|
||||||
|
index=models.Index(fields=['source_uid'], name='competition_source__41e6a6_idx'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 15:08
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("competitions", "0003_competition_source_uid_season_source_uid_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="competition",
|
||||||
|
name="source_name",
|
||||||
|
field=models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="competition",
|
||||||
|
name="source_uid",
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name="competition",
|
||||||
|
constraint=models.UniqueConstraint(
|
||||||
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
fields=("source_name", "source_uid"),
|
||||||
|
name="uq_competition_source_namespace_uid",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="competition",
|
||||||
|
index=models.Index(fields=["source_name", "source_uid"], name="competition_source__4c5f3d_idx"),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -14,6 +14,8 @@ class Competition(models.Model):
|
|||||||
|
|
||||||
name = models.CharField(max_length=220)
|
name = models.CharField(max_length=220)
|
||||||
slug = models.SlugField(max_length=240, unique=True)
|
slug = models.SlugField(max_length=240, unique=True)
|
||||||
|
source_name = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
source_uid = models.CharField(max_length=120, blank=True, null=True)
|
||||||
competition_type = models.CharField(max_length=24, choices=CompetitionType.choices)
|
competition_type = models.CharField(max_length=24, choices=CompetitionType.choices)
|
||||||
gender = models.CharField(max_length=16, choices=Gender.choices, default=Gender.MEN)
|
gender = models.CharField(max_length=16, choices=Gender.choices, default=Gender.MEN)
|
||||||
level = models.PositiveSmallIntegerField(default=1)
|
level = models.PositiveSmallIntegerField(default=1)
|
||||||
@ -31,10 +33,17 @@ class Competition(models.Model):
|
|||||||
class Meta:
|
class Meta:
|
||||||
ordering = ["name"]
|
ordering = ["name"]
|
||||||
constraints = [
|
constraints = [
|
||||||
models.UniqueConstraint(fields=["name", "country"], name="uq_competition_name_country")
|
models.UniqueConstraint(fields=["name", "country"], name="uq_competition_name_country"),
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=["source_name", "source_uid"],
|
||||||
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
name="uq_competition_source_namespace_uid",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["name"]),
|
models.Index(fields=["name"]),
|
||||||
|
models.Index(fields=["source_name", "source_uid"]),
|
||||||
|
models.Index(fields=["source_uid"]),
|
||||||
models.Index(fields=["country"]),
|
models.Index(fields=["country"]),
|
||||||
models.Index(fields=["competition_type"]),
|
models.Index(fields=["competition_type"]),
|
||||||
models.Index(fields=["gender"]),
|
models.Index(fields=["gender"]),
|
||||||
@ -46,6 +55,7 @@ class Competition(models.Model):
|
|||||||
|
|
||||||
|
|
||||||
class Season(models.Model):
|
class Season(models.Model):
|
||||||
|
source_uid = models.CharField(max_length=120, blank=True, null=True, unique=True)
|
||||||
label = models.CharField(max_length=40, unique=True)
|
label = models.CharField(max_length=40, unique=True)
|
||||||
start_date = models.DateField()
|
start_date = models.DateField()
|
||||||
end_date = models.DateField()
|
end_date = models.DateField()
|
||||||
@ -57,6 +67,7 @@ class Season(models.Model):
|
|||||||
models.CheckConstraint(condition=models.Q(end_date__gte=models.F("start_date")), name="ck_season_dates")
|
models.CheckConstraint(condition=models.Q(end_date__gte=models.F("start_date")), name="ck_season_dates")
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
|
models.Index(fields=["source_uid"]),
|
||||||
models.Index(fields=["is_current"]),
|
models.Index(fields=["is_current"]),
|
||||||
models.Index(fields=["start_date"]),
|
models.Index(fields=["start_date"]),
|
||||||
models.Index(fields=["end_date"]),
|
models.Index(fields=["end_date"]),
|
||||||
|
|||||||
@ -1,117 +1,109 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.contrib import messages
|
from django.conf import settings
|
||||||
from django.db.models import Count
|
|
||||||
|
|
||||||
from apps.providers.registry import get_default_provider_namespace
|
from .models import ImportFile, ImportRun, IngestionError, IngestionRun
|
||||||
|
|
||||||
from .models import IngestionError, IngestionRun
|
|
||||||
from .tasks import trigger_full_sync, trigger_incremental_sync
|
|
||||||
|
|
||||||
|
|
||||||
class IngestionErrorInline(admin.TabularInline):
|
class ImportFileInline(admin.TabularInline):
|
||||||
model = IngestionError
|
model = ImportFile
|
||||||
extra = 0
|
extra = 0
|
||||||
readonly_fields = ("provider_namespace", "entity_type", "external_id", "severity", "message", "occurred_at")
|
readonly_fields = (
|
||||||
|
"relative_path",
|
||||||
|
"source_name",
|
||||||
@admin.register(IngestionRun)
|
"snapshot_date",
|
||||||
class IngestionRunAdmin(admin.ModelAdmin):
|
|
||||||
list_display = (
|
|
||||||
"provider_namespace",
|
|
||||||
"job_type",
|
|
||||||
"status",
|
"status",
|
||||||
"records_processed",
|
"checksum",
|
||||||
"records_created",
|
"file_size_bytes",
|
||||||
"records_updated",
|
"rows_total",
|
||||||
"records_failed",
|
"rows_upserted",
|
||||||
"error_count",
|
"rows_failed",
|
||||||
"short_error_summary",
|
"error_message",
|
||||||
|
"processed_at",
|
||||||
|
"created_at",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(ImportRun)
|
||||||
|
class ImportRunAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
"id",
|
||||||
|
"source",
|
||||||
|
"status",
|
||||||
|
"files_total",
|
||||||
|
"files_processed",
|
||||||
|
"rows_total",
|
||||||
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
"started_at",
|
"started_at",
|
||||||
"finished_at",
|
"finished_at",
|
||||||
|
"created_at",
|
||||||
)
|
)
|
||||||
list_filter = ("provider_namespace", "job_type", "status")
|
list_filter = ("source", "status")
|
||||||
search_fields = ("provider_namespace",)
|
search_fields = ("source", "error_summary")
|
||||||
inlines = (IngestionErrorInline,)
|
|
||||||
readonly_fields = (
|
readonly_fields = (
|
||||||
"provider_namespace",
|
"source",
|
||||||
"job_type",
|
|
||||||
"status",
|
"status",
|
||||||
"triggered_by",
|
"triggered_by",
|
||||||
"started_at",
|
"started_at",
|
||||||
"finished_at",
|
"finished_at",
|
||||||
"records_processed",
|
"files_total",
|
||||||
"records_created",
|
"files_processed",
|
||||||
"records_updated",
|
"rows_total",
|
||||||
"records_failed",
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
"error_summary",
|
"error_summary",
|
||||||
"context",
|
"context",
|
||||||
"raw_payload",
|
|
||||||
"created_at",
|
"created_at",
|
||||||
)
|
)
|
||||||
actions = (
|
inlines = (ImportFileInline,)
|
||||||
"enqueue_full_sync_default_provider",
|
|
||||||
"enqueue_incremental_sync_default_provider",
|
|
||||||
"retry_selected_runs",
|
@admin.register(ImportFile)
|
||||||
|
class ImportFileAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
"id",
|
||||||
|
"import_run",
|
||||||
|
"relative_path",
|
||||||
|
"source_name",
|
||||||
|
"snapshot_date",
|
||||||
|
"status",
|
||||||
|
"rows_total",
|
||||||
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
|
"processed_at",
|
||||||
|
)
|
||||||
|
list_filter = ("status",)
|
||||||
|
search_fields = ("relative_path", "source_name", "checksum", "error_message")
|
||||||
|
readonly_fields = (
|
||||||
|
"import_run",
|
||||||
|
"relative_path",
|
||||||
|
"source_name",
|
||||||
|
"snapshot_date",
|
||||||
|
"status",
|
||||||
|
"checksum",
|
||||||
|
"file_size_bytes",
|
||||||
|
"rows_total",
|
||||||
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
|
"error_message",
|
||||||
|
"payload_preview",
|
||||||
|
"processed_at",
|
||||||
|
"created_at",
|
||||||
)
|
)
|
||||||
|
|
||||||
@admin.action(description="Queue full sync (default provider)")
|
|
||||||
def enqueue_full_sync_default_provider(self, request, queryset):
|
|
||||||
provider_namespace = get_default_provider_namespace()
|
|
||||||
trigger_full_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
|
|
||||||
self.message_user(request, f"Queued full sync task for {provider_namespace}.", level=messages.SUCCESS)
|
|
||||||
|
|
||||||
@admin.action(description="Queue incremental sync (default provider)")
|
class LegacyIngestionRunAdmin(admin.ModelAdmin):
|
||||||
def enqueue_incremental_sync_default_provider(self, request, queryset):
|
list_display = ("provider_namespace", "job_type", "status", "started_at", "finished_at")
|
||||||
provider_namespace = get_default_provider_namespace()
|
list_filter = ("provider_namespace", "job_type", "status")
|
||||||
trigger_incremental_sync.delay(provider_namespace=provider_namespace, triggered_by_id=request.user.id)
|
search_fields = ("provider_namespace", "error_summary")
|
||||||
self.message_user(request, f"Queued incremental sync task for {provider_namespace}.", level=messages.SUCCESS)
|
|
||||||
|
|
||||||
@admin.action(description="Retry selected ingestion runs")
|
|
||||||
def retry_selected_runs(self, request, queryset):
|
|
||||||
count = 0
|
|
||||||
for run in queryset:
|
|
||||||
if run.job_type == IngestionRun.JobType.INCREMENTAL:
|
|
||||||
trigger_incremental_sync.delay(
|
|
||||||
provider_namespace=run.provider_namespace,
|
|
||||||
triggered_by_id=request.user.id,
|
|
||||||
context={"retry_of": run.id},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
trigger_full_sync.delay(
|
|
||||||
provider_namespace=run.provider_namespace,
|
|
||||||
triggered_by_id=request.user.id,
|
|
||||||
context={"retry_of": run.id},
|
|
||||||
)
|
|
||||||
count += 1
|
|
||||||
self.message_user(request, f"Queued {count} retry task(s).", level=messages.SUCCESS)
|
|
||||||
|
|
||||||
def get_queryset(self, request):
|
|
||||||
queryset = super().get_queryset(request)
|
|
||||||
return queryset.annotate(_error_count=Count("errors"))
|
|
||||||
|
|
||||||
@admin.display(ordering="_error_count", description="Errors")
|
|
||||||
def error_count(self, obj):
|
|
||||||
return getattr(obj, "_error_count", 0)
|
|
||||||
|
|
||||||
@admin.display(description="Error summary")
|
|
||||||
def short_error_summary(self, obj):
|
|
||||||
if not obj.error_summary:
|
|
||||||
return "-"
|
|
||||||
return (obj.error_summary[:90] + "...") if len(obj.error_summary) > 90 else obj.error_summary
|
|
||||||
|
|
||||||
|
|
||||||
@admin.register(IngestionError)
|
class LegacyIngestionErrorAdmin(admin.ModelAdmin):
|
||||||
class IngestionErrorAdmin(admin.ModelAdmin):
|
|
||||||
list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at")
|
list_display = ("provider_namespace", "entity_type", "external_id", "severity", "occurred_at")
|
||||||
list_filter = ("severity", "provider_namespace")
|
list_filter = ("severity", "provider_namespace")
|
||||||
search_fields = ("entity_type", "external_id", "message")
|
search_fields = ("entity_type", "external_id", "message")
|
||||||
readonly_fields = (
|
|
||||||
"ingestion_run",
|
|
||||||
"provider_namespace",
|
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||||
"entity_type",
|
admin.site.register(IngestionRun, LegacyIngestionRunAdmin)
|
||||||
"external_id",
|
admin.site.register(IngestionError, LegacyIngestionErrorAdmin)
|
||||||
"severity",
|
|
||||||
"message",
|
|
||||||
"raw_payload",
|
|
||||||
"occurred_at",
|
|
||||||
)
|
|
||||||
|
|||||||
26
apps/ingestion/extractors/__init__.py
Normal file
26
apps/ingestion/extractors/__init__.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from .bcl import BCLSnapshotExtractor
|
||||||
|
from .base import (
|
||||||
|
BaseSnapshotExtractor,
|
||||||
|
ExtractionResult,
|
||||||
|
ExtractorConfigError,
|
||||||
|
ExtractorError,
|
||||||
|
ExtractorFetchError,
|
||||||
|
ExtractorNormalizationError,
|
||||||
|
ExtractorParseError,
|
||||||
|
)
|
||||||
|
from .lba import LBASnapshotExtractor
|
||||||
|
from .registry import available_extractors, create_extractor
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BaseSnapshotExtractor",
|
||||||
|
"BCLSnapshotExtractor",
|
||||||
|
"LBASnapshotExtractor",
|
||||||
|
"ExtractionResult",
|
||||||
|
"ExtractorError",
|
||||||
|
"ExtractorConfigError",
|
||||||
|
"ExtractorFetchError",
|
||||||
|
"ExtractorParseError",
|
||||||
|
"ExtractorNormalizationError",
|
||||||
|
"available_extractors",
|
||||||
|
"create_extractor",
|
||||||
|
]
|
||||||
150
apps/ingestion/extractors/base.py
Normal file
150
apps/ingestion/extractors/base.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from apps.ingestion.snapshots import SnapshotSchemaValidator
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorError(RuntimeError):
|
||||||
|
"""Base extractor exception."""
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorConfigError(ExtractorError):
|
||||||
|
"""Raised when extractor configuration is invalid."""
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorFetchError(ExtractorError):
|
||||||
|
"""Raised when remote/source fetch fails."""
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorParseError(ExtractorError):
|
||||||
|
"""Raised when fetched content cannot be parsed."""
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorNormalizationError(ExtractorError):
|
||||||
|
"""Raised when source rows cannot be normalized."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtractionResult:
|
||||||
|
extractor_name: str
|
||||||
|
source_name: str
|
||||||
|
snapshot_date: date
|
||||||
|
records_count: int
|
||||||
|
output_path: Path | None
|
||||||
|
|
||||||
|
|
||||||
|
class BaseSnapshotExtractor(ABC):
|
||||||
|
extractor_name = "base"
|
||||||
|
source_name = "unknown_source"
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def fetch(self) -> Any:
|
||||||
|
"""Fetch source payload from a source endpoint/resource."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def parse(self, payload: Any) -> list[dict[str, Any]]:
|
||||||
|
"""Parse fetched payload into source-specific record dictionaries."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Normalize a source record into HoopScout snapshot record shape."""
|
||||||
|
|
||||||
|
def resolve_snapshot_date(self) -> date:
|
||||||
|
return date.today()
|
||||||
|
|
||||||
|
def normalize_records(self, source_records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
normalized: list[dict[str, Any]] = []
|
||||||
|
for idx, row in enumerate(source_records):
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
raise ExtractorNormalizationError(f"Parsed record at index {idx} must be an object.")
|
||||||
|
normalized.append(self.normalize_record(row))
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def build_snapshot(self, records: list[dict[str, Any]], snapshot_date: date) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"source_name": self.source_name,
|
||||||
|
"snapshot_date": snapshot_date.isoformat(),
|
||||||
|
"records": records,
|
||||||
|
}
|
||||||
|
|
||||||
|
def default_output_dir(self) -> Path:
|
||||||
|
return Path(settings.STATIC_DATASET_INCOMING_DIR)
|
||||||
|
|
||||||
|
def snapshot_filename(self, snapshot_date: date) -> str:
|
||||||
|
return f"{self.extractor_name}-{snapshot_date.isoformat()}.json"
|
||||||
|
|
||||||
|
def emit_snapshot(
|
||||||
|
self,
|
||||||
|
snapshot: dict[str, Any],
|
||||||
|
*,
|
||||||
|
output_path: str | Path | None = None,
|
||||||
|
indent: int = 2,
|
||||||
|
) -> Path:
|
||||||
|
if output_path is None:
|
||||||
|
destination = self.default_output_dir()
|
||||||
|
destination.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = destination / self.snapshot_filename(date.fromisoformat(snapshot["snapshot_date"]))
|
||||||
|
else:
|
||||||
|
target = Path(output_path)
|
||||||
|
if target.suffix.lower() == ".json":
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = target
|
||||||
|
else:
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = target / self.snapshot_filename(date.fromisoformat(snapshot["snapshot_date"]))
|
||||||
|
|
||||||
|
file_path.write_text(json.dumps(snapshot, indent=indent, ensure_ascii=True), encoding="utf-8")
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def run(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
output_path: str | Path | None = None,
|
||||||
|
snapshot_date: date | None = None,
|
||||||
|
write_output: bool = True,
|
||||||
|
indent: int = 2,
|
||||||
|
) -> ExtractionResult:
|
||||||
|
payload = self.fetch()
|
||||||
|
source_rows = self.parse(payload)
|
||||||
|
normalized_rows = self.normalize_records(source_rows)
|
||||||
|
resolved_snapshot_date = snapshot_date or self.resolve_snapshot_date()
|
||||||
|
snapshot = self.build_snapshot(normalized_rows, resolved_snapshot_date)
|
||||||
|
validated = SnapshotSchemaValidator.validate(snapshot)
|
||||||
|
snapshot["records"] = validated.records
|
||||||
|
|
||||||
|
output_file: Path | None = None
|
||||||
|
if write_output:
|
||||||
|
output_file = self.emit_snapshot(snapshot, output_path=output_path, indent=indent)
|
||||||
|
logger.info(
|
||||||
|
"extractor_snapshot_written extractor=%s source=%s records=%s path=%s",
|
||||||
|
self.extractor_name,
|
||||||
|
validated.source_name,
|
||||||
|
len(validated.records),
|
||||||
|
output_file,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"extractor_snapshot_validated extractor=%s source=%s records=%s write_output=0",
|
||||||
|
self.extractor_name,
|
||||||
|
validated.source_name,
|
||||||
|
len(validated.records),
|
||||||
|
)
|
||||||
|
|
||||||
|
return ExtractionResult(
|
||||||
|
extractor_name=self.extractor_name,
|
||||||
|
source_name=validated.source_name,
|
||||||
|
snapshot_date=validated.snapshot_date,
|
||||||
|
records_count=len(validated.records),
|
||||||
|
output_path=output_file,
|
||||||
|
)
|
||||||
171
apps/ingestion/extractors/bcl.py
Normal file
171
apps/ingestion/extractors/bcl.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import BaseSnapshotExtractor, ExtractorConfigError, ExtractorNormalizationError, ExtractorParseError
|
||||||
|
from .http import ResponsibleHttpClient
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||||
|
for key in keys:
|
||||||
|
value = record.get(key)
|
||||||
|
if value not in (None, ""):
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
|
||||||
|
for key in keys:
|
||||||
|
value = record.get(key)
|
||||||
|
if isinstance(value, str):
|
||||||
|
stripped = value.strip()
|
||||||
|
if stripped:
|
||||||
|
return stripped
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
ESSENTIAL_FIELDS = {
|
||||||
|
"competition_external_id",
|
||||||
|
"competition_name",
|
||||||
|
"season",
|
||||||
|
"team_external_id",
|
||||||
|
"team_name",
|
||||||
|
"player_external_id",
|
||||||
|
"full_name",
|
||||||
|
"games_played",
|
||||||
|
"minutes_per_game",
|
||||||
|
"points_per_game",
|
||||||
|
"rebounds_per_game",
|
||||||
|
"assists_per_game",
|
||||||
|
"steals_per_game",
|
||||||
|
"blocks_per_game",
|
||||||
|
"turnovers_per_game",
|
||||||
|
"fg_pct",
|
||||||
|
"three_pt_pct",
|
||||||
|
"ft_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BCLSnapshotExtractor(BaseSnapshotExtractor):
|
||||||
|
"""
|
||||||
|
Basketball Champions League MVP extractor.
|
||||||
|
|
||||||
|
Scope is intentionally conservative:
|
||||||
|
- one configured public stats endpoint
|
||||||
|
- one configured season label
|
||||||
|
- normalized player-season rows only
|
||||||
|
"""
|
||||||
|
|
||||||
|
extractor_name = "bcl"
|
||||||
|
source_name = "bcl"
|
||||||
|
|
||||||
|
def __init__(self, *, http_client: ResponsibleHttpClient | None = None):
|
||||||
|
self.url = settings.EXTRACTOR_BCL_STATS_URL.strip()
|
||||||
|
self.season_label = settings.EXTRACTOR_BCL_SEASON_LABEL.strip()
|
||||||
|
self.competition_external_id = settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID.strip()
|
||||||
|
self.competition_name = settings.EXTRACTOR_BCL_COMPETITION_NAME.strip()
|
||||||
|
self.include_raw_payload = settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD
|
||||||
|
self.http_client = http_client or ResponsibleHttpClient(
|
||||||
|
user_agent=settings.EXTRACTOR_USER_AGENT,
|
||||||
|
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
|
||||||
|
retries=settings.EXTRACTOR_HTTP_RETRIES,
|
||||||
|
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
|
||||||
|
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.url:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_BCL_STATS_URL is required for bcl extractor.")
|
||||||
|
if not self.season_label:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_BCL_SEASON_LABEL is required for bcl extractor.")
|
||||||
|
if not self.competition_external_id:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID is required.")
|
||||||
|
if not self.competition_name:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_BCL_COMPETITION_NAME is required.")
|
||||||
|
|
||||||
|
def fetch(self) -> Any:
|
||||||
|
return self.http_client.get_json(self.url)
|
||||||
|
|
||||||
|
def parse(self, payload: Any) -> list[dict[str, Any]]:
|
||||||
|
if isinstance(payload, list):
|
||||||
|
return payload
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
raise ExtractorParseError("BCL payload must be a JSON object or array.")
|
||||||
|
|
||||||
|
for key in ("records", "data", "players", "items"):
|
||||||
|
rows = payload.get(key)
|
||||||
|
if isinstance(rows, list):
|
||||||
|
return rows
|
||||||
|
|
||||||
|
raise ExtractorParseError("BCL payload must contain one of: records, data, players, items.")
|
||||||
|
|
||||||
|
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
player_obj = source_record.get("player") if isinstance(source_record.get("player"), dict) else {}
|
||||||
|
team_obj = source_record.get("team") if isinstance(source_record.get("team"), dict) else {}
|
||||||
|
|
||||||
|
full_name = _first_non_empty(
|
||||||
|
source_record,
|
||||||
|
"full_name",
|
||||||
|
"player_name",
|
||||||
|
"name",
|
||||||
|
) or _first_non_empty(player_obj, "full_name", "name")
|
||||||
|
first_name = _first_non_empty(source_record, "first_name") or _first_non_empty(player_obj, "first_name")
|
||||||
|
last_name = _first_non_empty(source_record, "last_name") or _first_non_empty(player_obj, "last_name")
|
||||||
|
player_external_id = _first_non_empty(
|
||||||
|
source_record, "player_external_id", "player_id", "athlete_id"
|
||||||
|
) or _first_non_empty(player_obj, "id", "player_id")
|
||||||
|
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
|
||||||
|
team_obj, "id", "team_id"
|
||||||
|
)
|
||||||
|
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
|
||||||
|
team_obj, "name"
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
"competition_external_id": self.competition_external_id,
|
||||||
|
"competition_name": self.competition_name,
|
||||||
|
"season": self.season_label,
|
||||||
|
"team_external_id": team_external_id,
|
||||||
|
"team_name": team_name,
|
||||||
|
"player_external_id": player_external_id,
|
||||||
|
"full_name": full_name,
|
||||||
|
"first_name": first_name,
|
||||||
|
"last_name": last_name,
|
||||||
|
"birth_date": _first_non_empty(source_record, "birth_date") or _first_non_empty(
|
||||||
|
player_obj, "birth_date", "dob"
|
||||||
|
),
|
||||||
|
"nationality": _first_non_empty(source_record, "nationality")
|
||||||
|
or _first_non_empty(player_obj, "nationality", "country"),
|
||||||
|
"height_cm": _first_non_empty(source_record, "height_cm") or _first_non_empty(player_obj, "height_cm"),
|
||||||
|
"weight_kg": _first_non_empty(source_record, "weight_kg") or _first_non_empty(player_obj, "weight_kg"),
|
||||||
|
"position": _first_non_empty(source_record, "position") or _first_non_empty(player_obj, "position"),
|
||||||
|
"role": _first_non_empty(source_record, "role"),
|
||||||
|
"games_played": _first_non_empty(source_record, "games_played", "gp"),
|
||||||
|
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
|
||||||
|
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
|
||||||
|
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
|
||||||
|
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
|
||||||
|
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
|
||||||
|
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
|
||||||
|
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
|
||||||
|
"fg_pct": _first_non_empty(source_record, "fg_pct", "fg_percentage"),
|
||||||
|
"three_pt_pct": _first_non_empty(
|
||||||
|
source_record, "three_pt_pct", "three_point_pct", "3p_pct", "three_pct"
|
||||||
|
),
|
||||||
|
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise ExtractorNormalizationError(f"bcl row missing required fields: {', '.join(sorted(missing))}")
|
||||||
|
|
||||||
|
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
|
||||||
|
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
|
||||||
|
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
|
||||||
|
normalized["season"] = str(normalized["season"]).strip()
|
||||||
|
|
||||||
|
if self.include_raw_payload:
|
||||||
|
normalized["raw_payload"] = source_record
|
||||||
|
|
||||||
|
return normalized
|
||||||
109
apps/ingestion/extractors/http.py
Normal file
109
apps/ingestion/extractors/http.py
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .base import ExtractorFetchError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ResponsibleHttpClient:
|
||||||
|
"""
|
||||||
|
Small HTTP helper for public-source extraction:
|
||||||
|
- explicit User-Agent
|
||||||
|
- request timeout
|
||||||
|
- conservative retries
|
||||||
|
- low-frequency pacing (fixed delay between requests)
|
||||||
|
"""
|
||||||
|
|
||||||
|
RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
user_agent: str,
|
||||||
|
timeout_seconds: float,
|
||||||
|
retries: int,
|
||||||
|
retry_sleep_seconds: float,
|
||||||
|
request_delay_seconds: float,
|
||||||
|
session: requests.Session | None = None,
|
||||||
|
):
|
||||||
|
self.user_agent = user_agent
|
||||||
|
self.timeout_seconds = timeout_seconds
|
||||||
|
self.retries = retries
|
||||||
|
self.retry_sleep_seconds = retry_sleep_seconds
|
||||||
|
self.request_delay_seconds = request_delay_seconds
|
||||||
|
self.session = session or requests.Session()
|
||||||
|
self._last_request_at: float | None = None
|
||||||
|
|
||||||
|
def _pace(self) -> None:
|
||||||
|
if self.request_delay_seconds <= 0:
|
||||||
|
return
|
||||||
|
now = time.monotonic()
|
||||||
|
if self._last_request_at is None:
|
||||||
|
self._last_request_at = now
|
||||||
|
return
|
||||||
|
elapsed = now - self._last_request_at
|
||||||
|
remaining = self.request_delay_seconds - elapsed
|
||||||
|
if remaining > 0:
|
||||||
|
time.sleep(remaining)
|
||||||
|
self._last_request_at = time.monotonic()
|
||||||
|
|
||||||
|
def get_json(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
*,
|
||||||
|
params: dict[str, Any] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> Any:
|
||||||
|
merged_headers = {"User-Agent": self.user_agent}
|
||||||
|
if headers:
|
||||||
|
merged_headers.update(headers)
|
||||||
|
|
||||||
|
attempts = self.retries + 1
|
||||||
|
for attempt in range(1, attempts + 1):
|
||||||
|
try:
|
||||||
|
self._pace()
|
||||||
|
response = self.session.get(
|
||||||
|
url,
|
||||||
|
params=params,
|
||||||
|
headers=merged_headers,
|
||||||
|
timeout=self.timeout_seconds,
|
||||||
|
)
|
||||||
|
if response.status_code in self.RETRYABLE_STATUS_CODES:
|
||||||
|
if attempt < attempts:
|
||||||
|
logger.warning(
|
||||||
|
"extractor_http_retryable_status status=%s url=%s attempt=%s/%s",
|
||||||
|
response.status_code,
|
||||||
|
url,
|
||||||
|
attempt,
|
||||||
|
attempts,
|
||||||
|
)
|
||||||
|
time.sleep(self.retry_sleep_seconds)
|
||||||
|
continue
|
||||||
|
raise ExtractorFetchError(
|
||||||
|
f"Retryable status exhausted: status={response.status_code} url={url}"
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
if attempt < attempts:
|
||||||
|
logger.warning(
|
||||||
|
"extractor_http_request_retry error=%s url=%s attempt=%s/%s",
|
||||||
|
exc,
|
||||||
|
url,
|
||||||
|
attempt,
|
||||||
|
attempts,
|
||||||
|
)
|
||||||
|
time.sleep(self.retry_sleep_seconds)
|
||||||
|
continue
|
||||||
|
raise ExtractorFetchError(f"Request failed after retries: {exc}") from exc
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ExtractorFetchError(f"Invalid JSON response from {url}: {exc}") from exc
|
||||||
|
|
||||||
|
raise ExtractorFetchError(f"Unexpected retry loop exit for {url}")
|
||||||
171
apps/ingestion/extractors/lba.py
Normal file
171
apps/ingestion/extractors/lba.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .base import BaseSnapshotExtractor, ExtractorConfigError, ExtractorNormalizationError, ExtractorParseError
|
||||||
|
from .http import ResponsibleHttpClient
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||||
|
for key in keys:
|
||||||
|
value = record.get(key)
|
||||||
|
if value not in (None, ""):
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty_text(record: dict[str, Any], *keys: str) -> str | None:
|
||||||
|
for key in keys:
|
||||||
|
value = record.get(key)
|
||||||
|
if isinstance(value, str):
|
||||||
|
stripped = value.strip()
|
||||||
|
if stripped:
|
||||||
|
return stripped
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
ESSENTIAL_FIELDS = {
|
||||||
|
"competition_external_id",
|
||||||
|
"competition_name",
|
||||||
|
"season",
|
||||||
|
"team_external_id",
|
||||||
|
"team_name",
|
||||||
|
"player_external_id",
|
||||||
|
"full_name",
|
||||||
|
"games_played",
|
||||||
|
"minutes_per_game",
|
||||||
|
"points_per_game",
|
||||||
|
"rebounds_per_game",
|
||||||
|
"assists_per_game",
|
||||||
|
"steals_per_game",
|
||||||
|
"blocks_per_game",
|
||||||
|
"turnovers_per_game",
|
||||||
|
"fg_pct",
|
||||||
|
"three_pt_pct",
|
||||||
|
"ft_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LBASnapshotExtractor(BaseSnapshotExtractor):
|
||||||
|
"""
|
||||||
|
LBA (Lega Basket Serie A) MVP extractor.
|
||||||
|
|
||||||
|
Scope is intentionally conservative:
|
||||||
|
- one configured public stats endpoint
|
||||||
|
- one configured season label
|
||||||
|
- normalized player-season rows only
|
||||||
|
"""
|
||||||
|
|
||||||
|
extractor_name = "lba"
|
||||||
|
source_name = "lba"
|
||||||
|
|
||||||
|
def __init__(self, *, http_client: ResponsibleHttpClient | None = None):
|
||||||
|
self.url = settings.EXTRACTOR_LBA_STATS_URL.strip()
|
||||||
|
self.season_label = settings.EXTRACTOR_LBA_SEASON_LABEL.strip()
|
||||||
|
self.competition_external_id = settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID.strip()
|
||||||
|
self.competition_name = settings.EXTRACTOR_LBA_COMPETITION_NAME.strip()
|
||||||
|
self.include_raw_payload = settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD
|
||||||
|
self.http_client = http_client or ResponsibleHttpClient(
|
||||||
|
user_agent=settings.EXTRACTOR_USER_AGENT,
|
||||||
|
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
|
||||||
|
retries=settings.EXTRACTOR_HTTP_RETRIES,
|
||||||
|
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
|
||||||
|
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.url:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_LBA_STATS_URL is required for lba extractor.")
|
||||||
|
if not self.season_label:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_LBA_SEASON_LABEL is required for lba extractor.")
|
||||||
|
if not self.competition_external_id:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID is required.")
|
||||||
|
if not self.competition_name:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_LBA_COMPETITION_NAME is required.")
|
||||||
|
|
||||||
|
def fetch(self) -> Any:
|
||||||
|
return self.http_client.get_json(self.url)
|
||||||
|
|
||||||
|
def parse(self, payload: Any) -> list[dict[str, Any]]:
|
||||||
|
if isinstance(payload, list):
|
||||||
|
return payload
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
raise ExtractorParseError("LBA payload must be a JSON object or array.")
|
||||||
|
|
||||||
|
for key in ("records", "data", "players", "items"):
|
||||||
|
rows = payload.get(key)
|
||||||
|
if isinstance(rows, list):
|
||||||
|
return rows
|
||||||
|
|
||||||
|
raise ExtractorParseError("LBA payload must contain one of: records, data, players, items.")
|
||||||
|
|
||||||
|
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
player_obj = source_record.get("player") if isinstance(source_record.get("player"), dict) else {}
|
||||||
|
team_obj = source_record.get("team") if isinstance(source_record.get("team"), dict) else {}
|
||||||
|
|
||||||
|
full_name = _first_non_empty(
|
||||||
|
source_record,
|
||||||
|
"full_name",
|
||||||
|
"player_name",
|
||||||
|
"name",
|
||||||
|
) or _first_non_empty(player_obj, "full_name", "name")
|
||||||
|
first_name = _first_non_empty(source_record, "first_name") or _first_non_empty(player_obj, "first_name")
|
||||||
|
last_name = _first_non_empty(source_record, "last_name") or _first_non_empty(player_obj, "last_name")
|
||||||
|
player_external_id = _first_non_empty(
|
||||||
|
source_record, "player_external_id", "player_id", "athlete_id"
|
||||||
|
) or _first_non_empty(player_obj, "id", "player_id")
|
||||||
|
team_external_id = _first_non_empty(source_record, "team_external_id", "team_id") or _first_non_empty(
|
||||||
|
team_obj, "id", "team_id"
|
||||||
|
)
|
||||||
|
team_name = _first_non_empty_text(source_record, "team_name", "team") or _first_non_empty_text(
|
||||||
|
team_obj, "name"
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = {
|
||||||
|
"competition_external_id": self.competition_external_id,
|
||||||
|
"competition_name": self.competition_name,
|
||||||
|
"season": self.season_label,
|
||||||
|
"team_external_id": team_external_id,
|
||||||
|
"team_name": team_name,
|
||||||
|
"player_external_id": player_external_id,
|
||||||
|
"full_name": full_name,
|
||||||
|
"first_name": first_name,
|
||||||
|
"last_name": last_name,
|
||||||
|
"birth_date": _first_non_empty(source_record, "birth_date") or _first_non_empty(
|
||||||
|
player_obj, "birth_date", "dob"
|
||||||
|
),
|
||||||
|
"nationality": _first_non_empty(source_record, "nationality")
|
||||||
|
or _first_non_empty(player_obj, "nationality", "country"),
|
||||||
|
"height_cm": _first_non_empty(source_record, "height_cm") or _first_non_empty(player_obj, "height_cm"),
|
||||||
|
"weight_kg": _first_non_empty(source_record, "weight_kg") or _first_non_empty(player_obj, "weight_kg"),
|
||||||
|
"position": _first_non_empty(source_record, "position") or _first_non_empty(player_obj, "position"),
|
||||||
|
"role": _first_non_empty(source_record, "role"),
|
||||||
|
"games_played": _first_non_empty(source_record, "games_played", "gp"),
|
||||||
|
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
|
||||||
|
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
|
||||||
|
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
|
||||||
|
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
|
||||||
|
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
|
||||||
|
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
|
||||||
|
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
|
||||||
|
"fg_pct": _first_non_empty(source_record, "fg_pct", "fg_percentage"),
|
||||||
|
"three_pt_pct": _first_non_empty(
|
||||||
|
source_record, "three_pt_pct", "three_point_pct", "3p_pct", "three_pct"
|
||||||
|
),
|
||||||
|
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise ExtractorNormalizationError(f"lba row missing required fields: {', '.join(sorted(missing))}")
|
||||||
|
|
||||||
|
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
|
||||||
|
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
|
||||||
|
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
|
||||||
|
normalized["season"] = str(normalized["season"]).strip()
|
||||||
|
|
||||||
|
if self.include_raw_payload:
|
||||||
|
normalized["raw_payload"] = source_record
|
||||||
|
|
||||||
|
return normalized
|
||||||
132
apps/ingestion/extractors/public_json.py
Normal file
132
apps/ingestion/extractors/public_json.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
|
||||||
|
|
||||||
|
from .base import (
|
||||||
|
BaseSnapshotExtractor,
|
||||||
|
ExtractorConfigError,
|
||||||
|
ExtractorNormalizationError,
|
||||||
|
ExtractorParseError,
|
||||||
|
)
|
||||||
|
from .http import ResponsibleHttpClient
|
||||||
|
|
||||||
|
|
||||||
|
def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||||
|
for key in keys:
|
||||||
|
if key in record and record[key] not in (None, ""):
|
||||||
|
return record[key]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class PublicJsonSnapshotExtractor(BaseSnapshotExtractor):
|
||||||
|
"""
|
||||||
|
Generic public JSON extractor for MVP v2.
|
||||||
|
|
||||||
|
This extractor is intentionally generic and lightweight:
|
||||||
|
- fetch from one public JSON endpoint
|
||||||
|
- parse list-like payloads
|
||||||
|
- normalize into HoopScout snapshot schema
|
||||||
|
"""
|
||||||
|
|
||||||
|
extractor_name = "public_json_snapshot"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
url: str | None = None,
|
||||||
|
source_name: str | None = None,
|
||||||
|
include_raw_payload: bool | None = None,
|
||||||
|
http_client: ResponsibleHttpClient | None = None,
|
||||||
|
):
|
||||||
|
self.url = (url or settings.EXTRACTOR_PUBLIC_JSON_URL).strip()
|
||||||
|
self.source_name = (source_name or settings.EXTRACTOR_PUBLIC_SOURCE_NAME).strip()
|
||||||
|
self.include_raw_payload = (
|
||||||
|
settings.EXTRACTOR_INCLUDE_RAW_PAYLOAD if include_raw_payload is None else include_raw_payload
|
||||||
|
)
|
||||||
|
if not self.url:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_PUBLIC_JSON_URL is required for public_json_snapshot extractor.")
|
||||||
|
if not self.source_name:
|
||||||
|
raise ExtractorConfigError("EXTRACTOR_PUBLIC_SOURCE_NAME must not be empty.")
|
||||||
|
|
||||||
|
self.http_client = http_client or ResponsibleHttpClient(
|
||||||
|
user_agent=settings.EXTRACTOR_USER_AGENT,
|
||||||
|
timeout_seconds=settings.EXTRACTOR_HTTP_TIMEOUT_SECONDS,
|
||||||
|
retries=settings.EXTRACTOR_HTTP_RETRIES,
|
||||||
|
retry_sleep_seconds=settings.EXTRACTOR_RETRY_SLEEP_SECONDS,
|
||||||
|
request_delay_seconds=settings.EXTRACTOR_REQUEST_DELAY_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
def fetch(self) -> Any:
|
||||||
|
return self.http_client.get_json(self.url)
|
||||||
|
|
||||||
|
def parse(self, payload: Any) -> list[dict[str, Any]]:
|
||||||
|
if isinstance(payload, list):
|
||||||
|
return payload
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
raise ExtractorParseError("Fetched payload must be a JSON object or array.")
|
||||||
|
|
||||||
|
rows = payload.get("records")
|
||||||
|
if isinstance(rows, list):
|
||||||
|
return rows
|
||||||
|
|
||||||
|
data_rows = payload.get("data")
|
||||||
|
if isinstance(data_rows, list):
|
||||||
|
return data_rows
|
||||||
|
|
||||||
|
raise ExtractorParseError("Payload must contain 'records' or 'data' list.")
|
||||||
|
|
||||||
|
def normalize_record(self, source_record: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
normalized = {
|
||||||
|
"competition_external_id": _first_non_empty(
|
||||||
|
source_record, "competition_external_id", "competition_id", "league_id"
|
||||||
|
),
|
||||||
|
"competition_name": _first_non_empty(
|
||||||
|
source_record, "competition_name", "competition", "league_name"
|
||||||
|
),
|
||||||
|
"season": _first_non_empty(source_record, "season", "season_label", "season_name"),
|
||||||
|
"team_external_id": _first_non_empty(source_record, "team_external_id", "team_id"),
|
||||||
|
"team_name": _first_non_empty(source_record, "team_name", "team"),
|
||||||
|
"player_external_id": _first_non_empty(source_record, "player_external_id", "player_id"),
|
||||||
|
"full_name": _first_non_empty(source_record, "full_name", "player_name", "name"),
|
||||||
|
"first_name": _first_non_empty(source_record, "first_name"),
|
||||||
|
"last_name": _first_non_empty(source_record, "last_name"),
|
||||||
|
"birth_date": _first_non_empty(source_record, "birth_date"),
|
||||||
|
"nationality": _first_non_empty(source_record, "nationality", "nationality_code"),
|
||||||
|
"height_cm": _first_non_empty(source_record, "height_cm"),
|
||||||
|
"weight_kg": _first_non_empty(source_record, "weight_kg"),
|
||||||
|
"position": _first_non_empty(source_record, "position"),
|
||||||
|
"role": _first_non_empty(source_record, "role"),
|
||||||
|
"games_played": _first_non_empty(source_record, "games_played", "gp"),
|
||||||
|
"minutes_per_game": _first_non_empty(source_record, "minutes_per_game", "mpg"),
|
||||||
|
"points_per_game": _first_non_empty(source_record, "points_per_game", "ppg"),
|
||||||
|
"rebounds_per_game": _first_non_empty(source_record, "rebounds_per_game", "rpg"),
|
||||||
|
"assists_per_game": _first_non_empty(source_record, "assists_per_game", "apg"),
|
||||||
|
"steals_per_game": _first_non_empty(source_record, "steals_per_game", "spg"),
|
||||||
|
"blocks_per_game": _first_non_empty(source_record, "blocks_per_game", "bpg"),
|
||||||
|
"turnovers_per_game": _first_non_empty(source_record, "turnovers_per_game", "tov"),
|
||||||
|
"fg_pct": _first_non_empty(source_record, "fg_pct"),
|
||||||
|
"three_pt_pct": _first_non_empty(
|
||||||
|
source_record, "three_pt_pct", "three_point_pct", "three_pct", "3p_pct"
|
||||||
|
),
|
||||||
|
"ft_pct": _first_non_empty(source_record, "ft_pct"),
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = [key for key in REQUIRED_RECORD_FIELDS if normalized.get(key) in (None, "")]
|
||||||
|
if missing:
|
||||||
|
raise ExtractorNormalizationError(
|
||||||
|
f"public_json_snapshot row missing required fields: {', '.join(sorted(missing))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized["season"] = str(normalized["season"]).strip()
|
||||||
|
normalized["competition_external_id"] = str(normalized["competition_external_id"]).strip()
|
||||||
|
normalized["team_external_id"] = str(normalized["team_external_id"]).strip()
|
||||||
|
normalized["player_external_id"] = str(normalized["player_external_id"]).strip()
|
||||||
|
|
||||||
|
if self.include_raw_payload:
|
||||||
|
normalized["raw_payload"] = source_record
|
||||||
|
|
||||||
|
return normalized
|
||||||
26
apps/ingestion/extractors/registry.py
Normal file
26
apps/ingestion/extractors/registry.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .bcl import BCLSnapshotExtractor
|
||||||
|
from .base import BaseSnapshotExtractor, ExtractorConfigError
|
||||||
|
from .lba import LBASnapshotExtractor
|
||||||
|
from .public_json import PublicJsonSnapshotExtractor
|
||||||
|
|
||||||
|
EXTRACTOR_REGISTRY: dict[str, type[BaseSnapshotExtractor]] = {
|
||||||
|
BCLSnapshotExtractor.extractor_name: BCLSnapshotExtractor,
|
||||||
|
LBASnapshotExtractor.extractor_name: LBASnapshotExtractor,
|
||||||
|
PublicJsonSnapshotExtractor.extractor_name: PublicJsonSnapshotExtractor,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def available_extractors() -> list[str]:
|
||||||
|
return sorted(EXTRACTOR_REGISTRY.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def create_extractor(extractor_name: str) -> BaseSnapshotExtractor:
|
||||||
|
try:
|
||||||
|
extractor_cls = EXTRACTOR_REGISTRY[extractor_name]
|
||||||
|
except KeyError as exc:
|
||||||
|
raise ExtractorConfigError(
|
||||||
|
f"Unknown extractor '{extractor_name}'. Available: {', '.join(available_extractors())}"
|
||||||
|
) from exc
|
||||||
|
return extractor_cls()
|
||||||
0
apps/ingestion/management/__init__.py
Normal file
0
apps/ingestion/management/__init__.py
Normal file
0
apps/ingestion/management/commands/__init__.py
Normal file
0
apps/ingestion/management/commands/__init__.py
Normal file
23
apps/ingestion/management/commands/import_snapshots.py
Normal file
23
apps/ingestion/management/commands/import_snapshots.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from django.conf import settings
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from apps.ingestion.services.snapshot_import import SnapshotImporter
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Import static JSON snapshots from incoming directory into PostgreSQL."
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
importer = SnapshotImporter(
|
||||||
|
incoming_dir=settings.STATIC_DATASET_INCOMING_DIR,
|
||||||
|
archive_dir=settings.STATIC_DATASET_ARCHIVE_DIR,
|
||||||
|
failed_dir=settings.STATIC_DATASET_FAILED_DIR,
|
||||||
|
)
|
||||||
|
run = importer.run()
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"Import run {run.id} completed: status={run.status} "
|
||||||
|
f"files={run.files_processed}/{run.files_total} "
|
||||||
|
f"rows_upserted={run.rows_upserted} rows_failed={run.rows_failed}"
|
||||||
|
)
|
||||||
|
)
|
||||||
61
apps/ingestion/management/commands/run_bcl_extractor.py
Normal file
61
apps/ingestion/management/commands/run_bcl_extractor.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
from apps.ingestion.extractors import ExtractorError, create_extractor
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Run only the BCL extractor and emit an import-ready snapshot JSON."
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-path",
|
||||||
|
dest="output_path",
|
||||||
|
default=None,
|
||||||
|
help="Directory or .json path to write output (default incoming dir).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--snapshot-date",
|
||||||
|
dest="snapshot_date",
|
||||||
|
default=None,
|
||||||
|
help="Override snapshot date in YYYY-MM-DD format.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Validate without writing output.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--indent",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="JSON indent level for emitted file.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
snapshot_date = None
|
||||||
|
if options["snapshot_date"]:
|
||||||
|
snapshot_date = parse_date(options["snapshot_date"])
|
||||||
|
if snapshot_date is None:
|
||||||
|
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
extractor = create_extractor("bcl")
|
||||||
|
result = extractor.run(
|
||||||
|
output_path=options["output_path"],
|
||||||
|
snapshot_date=snapshot_date,
|
||||||
|
write_output=not options["dry_run"],
|
||||||
|
indent=options["indent"],
|
||||||
|
)
|
||||||
|
except ExtractorError as exc:
|
||||||
|
raise CommandError(str(exc)) from exc
|
||||||
|
|
||||||
|
output = str(result.output_path) if result.output_path else "<dry-run>"
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"BCL extractor completed: source={result.source_name} "
|
||||||
|
f"date={result.snapshot_date} records={result.records_count} output={output}"
|
||||||
|
)
|
||||||
|
)
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
from apps.ingestion.services.daily_orchestration import run_daily_orchestration
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Run daily HoopScout v2 workflow: extract snapshots, then import snapshots."
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--snapshot-date",
|
||||||
|
dest="snapshot_date",
|
||||||
|
default=None,
|
||||||
|
help="Override snapshot date for all extractor outputs (YYYY-MM-DD).",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
snapshot_date = None
|
||||||
|
if options["snapshot_date"]:
|
||||||
|
snapshot_date = parse_date(options["snapshot_date"])
|
||||||
|
if snapshot_date is None:
|
||||||
|
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = run_daily_orchestration(snapshot_date=snapshot_date)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
raise CommandError(str(exc)) from exc
|
||||||
|
|
||||||
|
extractor_summary = ", ".join(
|
||||||
|
f"{row.extractor_name}:{row.records_count}" for row in result.extractors_run
|
||||||
|
)
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
"Daily orchestration completed: "
|
||||||
|
f"extractors=[{extractor_summary}] "
|
||||||
|
f"import_run={result.import_run_id} "
|
||||||
|
f"import_status={result.import_status} "
|
||||||
|
f"files_processed={result.files_processed} "
|
||||||
|
f"rows_upserted={result.rows_upserted} "
|
||||||
|
f"rows_failed={result.rows_failed}"
|
||||||
|
)
|
||||||
|
)
|
||||||
63
apps/ingestion/management/commands/run_extractor.py
Normal file
63
apps/ingestion/management/commands/run_extractor.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
from apps.ingestion.extractors import ExtractorError, available_extractors, create_extractor
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Run a snapshot extractor and emit importable JSON snapshots."
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument("extractor_name", choices=available_extractors())
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-path",
|
||||||
|
dest="output_path",
|
||||||
|
default=None,
|
||||||
|
help="Directory or .json file path where snapshot should be written. Defaults to incoming dir.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--snapshot-date",
|
||||||
|
dest="snapshot_date",
|
||||||
|
default=None,
|
||||||
|
help="Override snapshot date in YYYY-MM-DD format.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Validate extraction/normalization without writing snapshot file.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--indent",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="JSON indent level for emitted snapshots.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
snapshot_date = None
|
||||||
|
if options["snapshot_date"]:
|
||||||
|
snapshot_date = parse_date(options["snapshot_date"])
|
||||||
|
if snapshot_date is None:
|
||||||
|
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
extractor = create_extractor(options["extractor_name"])
|
||||||
|
result = extractor.run(
|
||||||
|
output_path=options["output_path"],
|
||||||
|
snapshot_date=snapshot_date,
|
||||||
|
write_output=not options["dry_run"],
|
||||||
|
indent=options["indent"],
|
||||||
|
)
|
||||||
|
except ExtractorError as exc:
|
||||||
|
raise CommandError(str(exc)) from exc
|
||||||
|
|
||||||
|
output_path = str(result.output_path) if result.output_path else "<dry-run>"
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"Extractor {result.extractor_name} completed: "
|
||||||
|
f"source={result.source_name} date={result.snapshot_date} "
|
||||||
|
f"records={result.records_count} output={output_path}"
|
||||||
|
)
|
||||||
|
)
|
||||||
61
apps/ingestion/management/commands/run_lba_extractor.py
Normal file
61
apps/ingestion/management/commands/run_lba_extractor.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
from apps.ingestion.extractors import ExtractorError, create_extractor
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Run only the LBA extractor and emit an import-ready snapshot JSON."
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-path",
|
||||||
|
dest="output_path",
|
||||||
|
default=None,
|
||||||
|
help="Directory or .json path to write output (default incoming dir).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--snapshot-date",
|
||||||
|
dest="snapshot_date",
|
||||||
|
default=None,
|
||||||
|
help="Override snapshot date in YYYY-MM-DD format.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Validate without writing output.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--indent",
|
||||||
|
type=int,
|
||||||
|
default=2,
|
||||||
|
help="JSON indent level for emitted file.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
snapshot_date = None
|
||||||
|
if options["snapshot_date"]:
|
||||||
|
snapshot_date = parse_date(options["snapshot_date"])
|
||||||
|
if snapshot_date is None:
|
||||||
|
raise CommandError("--snapshot-date must be YYYY-MM-DD.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
extractor = create_extractor("lba")
|
||||||
|
result = extractor.run(
|
||||||
|
output_path=options["output_path"],
|
||||||
|
snapshot_date=snapshot_date,
|
||||||
|
write_output=not options["dry_run"],
|
||||||
|
indent=options["indent"],
|
||||||
|
)
|
||||||
|
except ExtractorError as exc:
|
||||||
|
raise CommandError(str(exc)) from exc
|
||||||
|
|
||||||
|
output = str(result.output_path) if result.output_path else "<dry-run>"
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"LBA extractor completed: source={result.source_name} "
|
||||||
|
f"date={result.snapshot_date} records={result.records_count} output={output}"
|
||||||
|
)
|
||||||
|
)
|
||||||
@ -0,0 +1,91 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:44
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('ingestion', '0002_ingestionrun_error_summary'),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ImportRun',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('source', models.CharField(default='snapshot', max_length=80)),
|
||||||
|
('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('success', 'Success'), ('failed', 'Failed'), ('canceled', 'Canceled')], default='pending', max_length=24)),
|
||||||
|
('started_at', models.DateTimeField(blank=True, null=True)),
|
||||||
|
('finished_at', models.DateTimeField(blank=True, null=True)),
|
||||||
|
('files_total', models.PositiveIntegerField(default=0)),
|
||||||
|
('files_processed', models.PositiveIntegerField(default=0)),
|
||||||
|
('rows_total', models.PositiveIntegerField(default=0)),
|
||||||
|
('rows_upserted', models.PositiveIntegerField(default=0)),
|
||||||
|
('rows_failed', models.PositiveIntegerField(default=0)),
|
||||||
|
('error_summary', models.TextField(blank=True, default='')),
|
||||||
|
('context', models.JSONField(blank=True, default=dict)),
|
||||||
|
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('triggered_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='import_runs', to=settings.AUTH_USER_MODEL)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'ordering': ['-created_at'],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ImportFile',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('relative_path', models.CharField(max_length=260)),
|
||||||
|
('status', models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('success', 'Success'), ('failed', 'Failed'), ('skipped', 'Skipped')], default='pending', max_length=24)),
|
||||||
|
('checksum', models.CharField(blank=True, max_length=128)),
|
||||||
|
('file_size_bytes', models.PositiveBigIntegerField(blank=True, null=True)),
|
||||||
|
('rows_total', models.PositiveIntegerField(default=0)),
|
||||||
|
('rows_upserted', models.PositiveIntegerField(default=0)),
|
||||||
|
('rows_failed', models.PositiveIntegerField(default=0)),
|
||||||
|
('error_message', models.TextField(blank=True)),
|
||||||
|
('payload_preview', models.JSONField(blank=True, default=dict)),
|
||||||
|
('processed_at', models.DateTimeField(blank=True, null=True)),
|
||||||
|
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('import_run', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='files', to='ingestion.importrun')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'ordering': ['-created_at'],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importrun',
|
||||||
|
index=models.Index(fields=['source', 'status'], name='ingestion_i_source_61db63_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importrun',
|
||||||
|
index=models.Index(fields=['created_at'], name='ingestion_i_created_93c115_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importrun',
|
||||||
|
index=models.Index(fields=['started_at'], name='ingestion_i_started_bf1d94_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importrun',
|
||||||
|
index=models.Index(fields=['finished_at'], name='ingestion_i_finishe_73cbed_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importfile',
|
||||||
|
index=models.Index(fields=['import_run', 'status'], name='ingestion_i_import__075f75_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importfile',
|
||||||
|
index=models.Index(fields=['relative_path'], name='ingestion_i_relativ_183e34_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importfile',
|
||||||
|
index=models.Index(fields=['processed_at'], name='ingestion_i_process_dfc080_idx'),
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name='importfile',
|
||||||
|
constraint=models.UniqueConstraint(fields=('import_run', 'relative_path'), name='uq_import_file_per_run_path'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -0,0 +1,27 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:59
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('ingestion', '0003_importrun_importfile_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='importfile',
|
||||||
|
name='snapshot_date',
|
||||||
|
field=models.DateField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='importfile',
|
||||||
|
name='source_name',
|
||||||
|
field=models.CharField(blank=True, max_length=120),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='importfile',
|
||||||
|
index=models.Index(fields=['source_name', 'snapshot_date'], name='ingestion_i_source__de6843_idx'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -2,6 +2,93 @@ from django.conf import settings
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class ImportRun(models.Model):
|
||||||
|
class RunStatus(models.TextChoices):
|
||||||
|
PENDING = "pending", "Pending"
|
||||||
|
RUNNING = "running", "Running"
|
||||||
|
SUCCESS = "success", "Success"
|
||||||
|
FAILED = "failed", "Failed"
|
||||||
|
CANCELED = "canceled", "Canceled"
|
||||||
|
|
||||||
|
source = models.CharField(max_length=80, default="snapshot")
|
||||||
|
status = models.CharField(max_length=24, choices=RunStatus.choices, default=RunStatus.PENDING)
|
||||||
|
triggered_by = models.ForeignKey(
|
||||||
|
settings.AUTH_USER_MODEL,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
related_name="import_runs",
|
||||||
|
)
|
||||||
|
started_at = models.DateTimeField(blank=True, null=True)
|
||||||
|
finished_at = models.DateTimeField(blank=True, null=True)
|
||||||
|
files_total = models.PositiveIntegerField(default=0)
|
||||||
|
files_processed = models.PositiveIntegerField(default=0)
|
||||||
|
rows_total = models.PositiveIntegerField(default=0)
|
||||||
|
rows_upserted = models.PositiveIntegerField(default=0)
|
||||||
|
rows_failed = models.PositiveIntegerField(default=0)
|
||||||
|
error_summary = models.TextField(blank=True, default="")
|
||||||
|
context = models.JSONField(default=dict, blank=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
ordering = ["-created_at"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["source", "status"]),
|
||||||
|
models.Index(fields=["created_at"]),
|
||||||
|
models.Index(fields=["started_at"]),
|
||||||
|
models.Index(fields=["finished_at"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"{self.source} | {self.status} | {self.created_at:%Y-%m-%d %H:%M}"
|
||||||
|
|
||||||
|
|
||||||
|
class ImportFile(models.Model):
|
||||||
|
class FileStatus(models.TextChoices):
|
||||||
|
PENDING = "pending", "Pending"
|
||||||
|
PROCESSING = "processing", "Processing"
|
||||||
|
SUCCESS = "success", "Success"
|
||||||
|
FAILED = "failed", "Failed"
|
||||||
|
SKIPPED = "skipped", "Skipped"
|
||||||
|
|
||||||
|
import_run = models.ForeignKey(
|
||||||
|
"ingestion.ImportRun",
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="files",
|
||||||
|
)
|
||||||
|
relative_path = models.CharField(max_length=260)
|
||||||
|
source_name = models.CharField(max_length=120, blank=True)
|
||||||
|
snapshot_date = models.DateField(blank=True, null=True)
|
||||||
|
status = models.CharField(max_length=24, choices=FileStatus.choices, default=FileStatus.PENDING)
|
||||||
|
checksum = models.CharField(max_length=128, blank=True)
|
||||||
|
file_size_bytes = models.PositiveBigIntegerField(blank=True, null=True)
|
||||||
|
rows_total = models.PositiveIntegerField(default=0)
|
||||||
|
rows_upserted = models.PositiveIntegerField(default=0)
|
||||||
|
rows_failed = models.PositiveIntegerField(default=0)
|
||||||
|
error_message = models.TextField(blank=True)
|
||||||
|
payload_preview = models.JSONField(default=dict, blank=True)
|
||||||
|
processed_at = models.DateTimeField(blank=True, null=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
ordering = ["-created_at"]
|
||||||
|
constraints = [
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=["import_run", "relative_path"],
|
||||||
|
name="uq_import_file_per_run_path",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["import_run", "status"]),
|
||||||
|
models.Index(fields=["relative_path"]),
|
||||||
|
models.Index(fields=["source_name", "snapshot_date"]),
|
||||||
|
models.Index(fields=["processed_at"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"{self.relative_path} [{self.status}]"
|
||||||
|
|
||||||
|
|
||||||
class IngestionRun(models.Model):
|
class IngestionRun(models.Model):
|
||||||
class RunStatus(models.TextChoices):
|
class RunStatus(models.TextChoices):
|
||||||
PENDING = "pending", "Pending"
|
PENDING = "pending", "Pending"
|
||||||
|
|||||||
@ -1,9 +1,14 @@
|
|||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
|
from .runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
|
||||||
from .sync import run_sync_job
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"start_ingestion_run",
|
"start_ingestion_run",
|
||||||
"finish_ingestion_run",
|
"finish_ingestion_run",
|
||||||
"log_ingestion_error",
|
"log_ingestion_error",
|
||||||
"run_sync_job",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||||
|
from .sync import run_sync_job # pragma: no cover - legacy provider stack only.
|
||||||
|
|
||||||
|
__all__.append("run_sync_job")
|
||||||
|
|||||||
84
apps/ingestion/services/daily_orchestration.py
Normal file
84
apps/ingestion/services/daily_orchestration.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from apps.ingestion.extractors import create_extractor
|
||||||
|
from apps.ingestion.services.snapshot_import import SnapshotImporter
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtractorRunSummary:
|
||||||
|
extractor_name: str
|
||||||
|
records_count: int
|
||||||
|
output_path: Path | None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DailyOrchestrationResult:
|
||||||
|
extractors_run: list[ExtractorRunSummary]
|
||||||
|
import_run_id: int
|
||||||
|
import_status: str
|
||||||
|
files_processed: int
|
||||||
|
rows_upserted: int
|
||||||
|
rows_failed: int
|
||||||
|
|
||||||
|
|
||||||
|
def parse_enabled_extractors(raw_value: str) -> list[str]:
|
||||||
|
return [item.strip() for item in raw_value.split(",") if item.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def run_daily_orchestration(*, snapshot_date: date | None = None) -> DailyOrchestrationResult:
|
||||||
|
extractor_names = parse_enabled_extractors(settings.DAILY_ORCHESTRATION_EXTRACTORS)
|
||||||
|
if not extractor_names:
|
||||||
|
raise ValueError("DAILY_ORCHESTRATION_EXTRACTORS cannot be empty.")
|
||||||
|
|
||||||
|
summaries: list[ExtractorRunSummary] = []
|
||||||
|
for extractor_name in extractor_names:
|
||||||
|
logger.info("daily_orchestration_extractor_start extractor=%s", extractor_name)
|
||||||
|
extractor = create_extractor(extractor_name)
|
||||||
|
result = extractor.run(snapshot_date=snapshot_date)
|
||||||
|
summaries.append(
|
||||||
|
ExtractorRunSummary(
|
||||||
|
extractor_name=extractor_name,
|
||||||
|
records_count=result.records_count,
|
||||||
|
output_path=result.output_path,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"daily_orchestration_extractor_done extractor=%s records=%s output=%s",
|
||||||
|
extractor_name,
|
||||||
|
result.records_count,
|
||||||
|
result.output_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
importer = SnapshotImporter(
|
||||||
|
incoming_dir=settings.STATIC_DATASET_INCOMING_DIR,
|
||||||
|
archive_dir=settings.STATIC_DATASET_ARCHIVE_DIR,
|
||||||
|
failed_dir=settings.STATIC_DATASET_FAILED_DIR,
|
||||||
|
)
|
||||||
|
import_run = importer.run()
|
||||||
|
logger.info(
|
||||||
|
"daily_orchestration_import_done run_id=%s status=%s files=%s/%s upserted=%s failed=%s",
|
||||||
|
import_run.id,
|
||||||
|
import_run.status,
|
||||||
|
import_run.files_processed,
|
||||||
|
import_run.files_total,
|
||||||
|
import_run.rows_upserted,
|
||||||
|
import_run.rows_failed,
|
||||||
|
)
|
||||||
|
|
||||||
|
return DailyOrchestrationResult(
|
||||||
|
extractors_run=summaries,
|
||||||
|
import_run_id=import_run.id,
|
||||||
|
import_status=import_run.status,
|
||||||
|
files_processed=import_run.files_processed,
|
||||||
|
rows_upserted=import_run.rows_upserted,
|
||||||
|
rows_failed=import_run.rows_failed,
|
||||||
|
)
|
||||||
357
apps/ingestion/services/snapshot_import.py
Normal file
357
apps/ingestion/services/snapshot_import.py
Normal file
@ -0,0 +1,357 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date, datetime
|
||||||
|
from decimal import Decimal
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.db import transaction
|
||||||
|
from django.template.defaultfilters import slugify
|
||||||
|
from django.utils import timezone
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
from apps.competitions.models import Competition, Season
|
||||||
|
from apps.ingestion.models import ImportFile, ImportRun
|
||||||
|
from apps.ingestion.snapshots import SnapshotSchemaValidator, SnapshotValidationError
|
||||||
|
from apps.players.models import Nationality, Player, Position, Role
|
||||||
|
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
||||||
|
from apps.teams.models import Team
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ImportSummary:
|
||||||
|
files_total: int = 0
|
||||||
|
files_processed: int = 0
|
||||||
|
rows_total: int = 0
|
||||||
|
rows_upserted: int = 0
|
||||||
|
rows_failed: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_move(src: Path, destination_dir: Path) -> Path:
|
||||||
|
destination_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
candidate = destination_dir / src.name
|
||||||
|
if candidate.exists():
|
||||||
|
ts = datetime.utcnow().strftime("%Y%m%d%H%M%S")
|
||||||
|
candidate = destination_dir / f"{src.stem}-{ts}{src.suffix}"
|
||||||
|
shutil.move(str(src), str(candidate))
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def _file_checksum(path: Path) -> str:
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
with path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||||
|
digest.update(chunk)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_decimal(value: float | int | str) -> Decimal:
|
||||||
|
return Decimal(str(value)).quantize(Decimal("0.01"))
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_season_dates(label: str) -> tuple[date, date]:
|
||||||
|
if "-" in label:
|
||||||
|
first = label.split("-", 1)[0]
|
||||||
|
else:
|
||||||
|
first = label
|
||||||
|
year = int(first)
|
||||||
|
return date(year, 9, 1), date(year + 1, 7, 31)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_optional_birth_date(value: str | None) -> date | None:
|
||||||
|
if value in (None, ""):
|
||||||
|
return None
|
||||||
|
return parse_date(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_name_parts(full_name: str) -> tuple[str, str]:
|
||||||
|
parts = full_name.strip().split(maxsplit=1)
|
||||||
|
if not parts:
|
||||||
|
return "", ""
|
||||||
|
if len(parts) == 1:
|
||||||
|
return parts[0], ""
|
||||||
|
return parts[0], parts[1]
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_nationality(value: str | None) -> Nationality | None:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
token = value.strip()
|
||||||
|
if not token:
|
||||||
|
return None
|
||||||
|
if len(token) == 2:
|
||||||
|
code = token.upper()
|
||||||
|
obj, _ = Nationality.objects.get_or_create(
|
||||||
|
iso2_code=code,
|
||||||
|
defaults={"name": code},
|
||||||
|
)
|
||||||
|
return obj
|
||||||
|
return Nationality.objects.filter(name__iexact=token).first()
|
||||||
|
|
||||||
|
|
||||||
|
def _position_code(position_value: str) -> str:
|
||||||
|
token = position_value.strip().upper().replace(" ", "_")
|
||||||
|
return (token[:10] or "UNK")
|
||||||
|
|
||||||
|
|
||||||
|
def _role_code(role_value: str) -> str:
|
||||||
|
token = slugify(role_value).replace("-", "_")
|
||||||
|
return (token[:32] or "unknown")
|
||||||
|
|
||||||
|
|
||||||
|
def _player_season_source_uid(record: dict[str, Any], source_name: str, snapshot_date: date) -> str:
|
||||||
|
return (
|
||||||
|
f"{source_name}:{snapshot_date.isoformat()}:"
|
||||||
|
f"{record['competition_external_id']}:{record['season']}:"
|
||||||
|
f"{record['team_external_id']}:{record['player_external_id']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _source_slug(*, source_name: str, base_name: str, fallback_prefix: str, fallback_external_id: str) -> str:
|
||||||
|
base_slug = slugify(base_name) or f"{fallback_prefix}-{fallback_external_id}"
|
||||||
|
source_slug = slugify(source_name) or "snapshot"
|
||||||
|
return f"{source_slug}-{base_slug}"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalized_source_name(source_name: str) -> str:
|
||||||
|
return source_name.strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _upsert_record(record: dict[str, Any], *, source_name: str, snapshot_date: date) -> None:
|
||||||
|
source_key = _normalized_source_name(source_name)
|
||||||
|
competition_slug = _source_slug(
|
||||||
|
source_name=source_key,
|
||||||
|
base_name=record["competition_name"],
|
||||||
|
fallback_prefix="competition",
|
||||||
|
fallback_external_id=record["competition_external_id"],
|
||||||
|
)
|
||||||
|
competition, _ = Competition.objects.update_or_create(
|
||||||
|
source_name=source_key,
|
||||||
|
source_uid=record["competition_external_id"],
|
||||||
|
defaults={
|
||||||
|
"name": record["competition_name"],
|
||||||
|
"slug": competition_slug,
|
||||||
|
"competition_type": Competition.CompetitionType.LEAGUE,
|
||||||
|
"is_active": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
start_date, end_date = _parse_season_dates(record["season"])
|
||||||
|
season, _ = Season.objects.update_or_create(
|
||||||
|
source_uid=f"season:{record['season']}",
|
||||||
|
defaults={
|
||||||
|
"label": record["season"],
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
"is_current": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
team_slug = _source_slug(
|
||||||
|
source_name=source_key,
|
||||||
|
base_name=record["team_name"],
|
||||||
|
fallback_prefix="team",
|
||||||
|
fallback_external_id=record["team_external_id"],
|
||||||
|
)
|
||||||
|
team, _ = Team.objects.update_or_create(
|
||||||
|
source_name=source_key,
|
||||||
|
source_uid=record["team_external_id"],
|
||||||
|
defaults={
|
||||||
|
"name": record["team_name"],
|
||||||
|
"slug": team_slug,
|
||||||
|
"short_name": "",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
position_value = record.get("position")
|
||||||
|
position = None
|
||||||
|
if position_value:
|
||||||
|
position, _ = Position.objects.get_or_create(
|
||||||
|
code=_position_code(position_value),
|
||||||
|
defaults={"name": position_value},
|
||||||
|
)
|
||||||
|
role = None
|
||||||
|
if record.get("role"):
|
||||||
|
role, _ = Role.objects.get_or_create(
|
||||||
|
code=_role_code(record["role"]),
|
||||||
|
defaults={"name": record["role"]},
|
||||||
|
)
|
||||||
|
|
||||||
|
first_name = record.get("first_name") or ""
|
||||||
|
last_name = record.get("last_name") or ""
|
||||||
|
if not first_name and not last_name:
|
||||||
|
first_name, last_name = _split_name_parts(record["full_name"])
|
||||||
|
|
||||||
|
player, _ = Player.objects.update_or_create(
|
||||||
|
source_name=source_key,
|
||||||
|
source_uid=record["player_external_id"],
|
||||||
|
defaults={
|
||||||
|
"first_name": first_name,
|
||||||
|
"last_name": last_name,
|
||||||
|
"full_name": record["full_name"],
|
||||||
|
"birth_date": _parse_optional_birth_date(record.get("birth_date")),
|
||||||
|
"nationality": _resolve_nationality(record.get("nationality")),
|
||||||
|
"nominal_position": position,
|
||||||
|
"inferred_role": role,
|
||||||
|
"height_cm": record.get("height_cm"),
|
||||||
|
"weight_kg": record.get("weight_kg"),
|
||||||
|
"is_active": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
player_season, _ = PlayerSeason.objects.update_or_create(
|
||||||
|
source_uid=_player_season_source_uid(record, source_name=source_key, snapshot_date=snapshot_date),
|
||||||
|
defaults={
|
||||||
|
"player": player,
|
||||||
|
"season": season,
|
||||||
|
"team": team,
|
||||||
|
"competition": competition,
|
||||||
|
"games_played": int(record["games_played"]),
|
||||||
|
"games_started": 0,
|
||||||
|
"minutes_played": int(round(float(record["minutes_per_game"]) * int(record["games_played"]))),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
PlayerSeasonStats.objects.update_or_create(
|
||||||
|
player_season=player_season,
|
||||||
|
defaults={
|
||||||
|
"points": _normalize_decimal(record["points_per_game"]),
|
||||||
|
"rebounds": _normalize_decimal(record["rebounds_per_game"]),
|
||||||
|
"assists": _normalize_decimal(record["assists_per_game"]),
|
||||||
|
"steals": _normalize_decimal(record["steals_per_game"]),
|
||||||
|
"blocks": _normalize_decimal(record["blocks_per_game"]),
|
||||||
|
"turnovers": _normalize_decimal(record["turnovers_per_game"]),
|
||||||
|
"fg_pct": _normalize_decimal(record["fg_pct"]),
|
||||||
|
"three_pct": _normalize_decimal(record["three_pt_pct"]),
|
||||||
|
"ft_pct": _normalize_decimal(record["ft_pct"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SnapshotImporter:
|
||||||
|
def __init__(self, *, incoming_dir: str, archive_dir: str, failed_dir: str):
|
||||||
|
self.incoming_dir = Path(incoming_dir)
|
||||||
|
self.archive_dir = Path(archive_dir)
|
||||||
|
self.failed_dir = Path(failed_dir)
|
||||||
|
|
||||||
|
def _list_input_files(self) -> list[Path]:
|
||||||
|
if not self.incoming_dir.exists():
|
||||||
|
return []
|
||||||
|
return sorted(path for path in self.incoming_dir.iterdir() if path.is_file() and path.suffix.lower() == ".json")
|
||||||
|
|
||||||
|
def run(self, *, triggered_by=None) -> ImportRun:
|
||||||
|
run = ImportRun.objects.create(
|
||||||
|
source="static_snapshot_json",
|
||||||
|
status=ImportRun.RunStatus.RUNNING,
|
||||||
|
triggered_by=triggered_by,
|
||||||
|
started_at=timezone.now(),
|
||||||
|
context={
|
||||||
|
"incoming_dir": str(self.incoming_dir),
|
||||||
|
"archive_dir": str(self.archive_dir),
|
||||||
|
"failed_dir": str(self.failed_dir),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
summary = ImportSummary()
|
||||||
|
files = self._list_input_files()
|
||||||
|
summary.files_total = len(files)
|
||||||
|
|
||||||
|
for path in files:
|
||||||
|
checksum = _file_checksum(path)
|
||||||
|
file_row = ImportFile.objects.create(
|
||||||
|
import_run=run,
|
||||||
|
relative_path=path.name,
|
||||||
|
status=ImportFile.FileStatus.PROCESSING,
|
||||||
|
checksum=checksum,
|
||||||
|
file_size_bytes=path.stat().st_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Duplicate file content previously imported successfully.
|
||||||
|
already_imported = ImportFile.objects.filter(
|
||||||
|
checksum=checksum,
|
||||||
|
status=ImportFile.FileStatus.SUCCESS,
|
||||||
|
).exclude(pk=file_row.pk).exists()
|
||||||
|
if already_imported:
|
||||||
|
file_row.status = ImportFile.FileStatus.SKIPPED
|
||||||
|
file_row.error_message = "Skipped duplicate checksum already imported successfully."
|
||||||
|
file_row.processed_at = timezone.now()
|
||||||
|
file_row.save(update_fields=["status", "error_message", "processed_at"])
|
||||||
|
_safe_move(path, self.archive_dir)
|
||||||
|
summary.files_processed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
validated = SnapshotSchemaValidator.validate(payload)
|
||||||
|
|
||||||
|
file_row.source_name = validated.source_name
|
||||||
|
file_row.snapshot_date = validated.snapshot_date
|
||||||
|
file_row.rows_total = len(validated.records)
|
||||||
|
|
||||||
|
with transaction.atomic():
|
||||||
|
for record in validated.records:
|
||||||
|
_upsert_record(record, source_name=validated.source_name, snapshot_date=validated.snapshot_date)
|
||||||
|
|
||||||
|
file_row.status = ImportFile.FileStatus.SUCCESS
|
||||||
|
file_row.rows_upserted = len(validated.records)
|
||||||
|
file_row.payload_preview = {
|
||||||
|
"source_name": validated.source_name,
|
||||||
|
"snapshot_date": validated.snapshot_date.isoformat(),
|
||||||
|
"sample_record": validated.records[0],
|
||||||
|
}
|
||||||
|
_safe_move(path, self.archive_dir)
|
||||||
|
except (json.JSONDecodeError, SnapshotValidationError, ValueError) as exc:
|
||||||
|
file_row.status = ImportFile.FileStatus.FAILED
|
||||||
|
file_row.error_message = str(exc)
|
||||||
|
_safe_move(path, self.failed_dir)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
file_row.status = ImportFile.FileStatus.FAILED
|
||||||
|
file_row.error_message = f"Unhandled import error: {exc}"
|
||||||
|
_safe_move(path, self.failed_dir)
|
||||||
|
|
||||||
|
file_row.processed_at = timezone.now()
|
||||||
|
file_row.save(
|
||||||
|
update_fields=[
|
||||||
|
"source_name",
|
||||||
|
"snapshot_date",
|
||||||
|
"status",
|
||||||
|
"rows_total",
|
||||||
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
|
"error_message",
|
||||||
|
"payload_preview",
|
||||||
|
"processed_at",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
summary.files_processed += 1
|
||||||
|
summary.rows_total += file_row.rows_total
|
||||||
|
summary.rows_upserted += file_row.rows_upserted
|
||||||
|
summary.rows_failed += file_row.rows_failed + (1 if file_row.status == ImportFile.FileStatus.FAILED else 0)
|
||||||
|
|
||||||
|
run.status = ImportRun.RunStatus.SUCCESS if summary.rows_failed == 0 else ImportRun.RunStatus.FAILED
|
||||||
|
run.files_total = summary.files_total
|
||||||
|
run.files_processed = summary.files_processed
|
||||||
|
run.rows_total = summary.rows_total
|
||||||
|
run.rows_upserted = summary.rows_upserted
|
||||||
|
run.rows_failed = summary.rows_failed
|
||||||
|
run.finished_at = timezone.now()
|
||||||
|
if summary.rows_failed:
|
||||||
|
run.error_summary = f"{summary.rows_failed} file/row import error(s)."
|
||||||
|
run.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"files_total",
|
||||||
|
"files_processed",
|
||||||
|
"rows_total",
|
||||||
|
"rows_upserted",
|
||||||
|
"rows_failed",
|
||||||
|
"error_summary",
|
||||||
|
"finished_at",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return run
|
||||||
3
apps/ingestion/snapshots/__init__.py
Normal file
3
apps/ingestion/snapshots/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .schema import SnapshotSchemaValidator, SnapshotValidationError, SnapshotValidationResult
|
||||||
|
|
||||||
|
__all__ = ["SnapshotSchemaValidator", "SnapshotValidationError", "SnapshotValidationResult"]
|
||||||
203
apps/ingestion/snapshots/schema.py
Normal file
203
apps/ingestion/snapshots/schema.py
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.utils.dateparse import parse_date
|
||||||
|
|
||||||
|
REQUIRED_RECORD_FIELDS = {
|
||||||
|
"competition_external_id",
|
||||||
|
"competition_name",
|
||||||
|
"season",
|
||||||
|
"team_external_id",
|
||||||
|
"team_name",
|
||||||
|
"player_external_id",
|
||||||
|
"full_name",
|
||||||
|
"games_played",
|
||||||
|
"minutes_per_game",
|
||||||
|
"points_per_game",
|
||||||
|
"rebounds_per_game",
|
||||||
|
"assists_per_game",
|
||||||
|
"steals_per_game",
|
||||||
|
"blocks_per_game",
|
||||||
|
"turnovers_per_game",
|
||||||
|
"fg_pct",
|
||||||
|
"three_pt_pct",
|
||||||
|
"ft_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
OPTIONAL_RECORD_FIELDS = {
|
||||||
|
"first_name",
|
||||||
|
"last_name",
|
||||||
|
"birth_date",
|
||||||
|
"nationality",
|
||||||
|
"height_cm",
|
||||||
|
"weight_kg",
|
||||||
|
"position",
|
||||||
|
}
|
||||||
|
|
||||||
|
ALLOWED_TOP_LEVEL_FIELDS = {
|
||||||
|
"source_name",
|
||||||
|
"snapshot_date",
|
||||||
|
"records",
|
||||||
|
"source_metadata",
|
||||||
|
"raw_payload",
|
||||||
|
}
|
||||||
|
|
||||||
|
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | {
|
||||||
|
"role",
|
||||||
|
"source_metadata",
|
||||||
|
"raw_payload",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SnapshotValidationResult:
|
||||||
|
source_name: str
|
||||||
|
snapshot_date: date
|
||||||
|
records: list[dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
|
class SnapshotValidationError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SnapshotSchemaValidator:
|
||||||
|
"""Strict JSON schema validator for HoopScout v2 player-season snapshots."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _require_string(value: Any, field: str) -> str:
|
||||||
|
if not isinstance(value, str) or not value.strip():
|
||||||
|
raise SnapshotValidationError(f"{field} must be a non-empty string")
|
||||||
|
return value.strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _optional_string(value: Any, field: str) -> str | None:
|
||||||
|
if value in (None, ""):
|
||||||
|
return None
|
||||||
|
if not isinstance(value, str):
|
||||||
|
raise SnapshotValidationError(f"{field} must be a string when provided")
|
||||||
|
stripped = value.strip()
|
||||||
|
return stripped or None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _require_non_negative_int(value: Any, field: str) -> int:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
||||||
|
try:
|
||||||
|
parsed = int(value)
|
||||||
|
except (TypeError, ValueError) as exc:
|
||||||
|
raise SnapshotValidationError(f"{field} must be a non-negative integer") from exc
|
||||||
|
if parsed < 0:
|
||||||
|
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _optional_non_negative_int(cls, value: Any, field: str) -> int | None:
|
||||||
|
if value in (None, ""):
|
||||||
|
return None
|
||||||
|
return cls._require_non_negative_int(value, field)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _require_float(value: Any, field: str) -> float:
|
||||||
|
try:
|
||||||
|
parsed = float(value)
|
||||||
|
except (TypeError, ValueError) as exc:
|
||||||
|
raise SnapshotValidationError(f"{field} must be numeric") from exc
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _validate_record(cls, record: dict[str, Any], index: int) -> dict[str, Any]:
|
||||||
|
unknown = set(record.keys()) - ALLOWED_RECORD_FIELDS
|
||||||
|
if unknown:
|
||||||
|
raise SnapshotValidationError(
|
||||||
|
f"record[{index}] contains unknown fields: {', '.join(sorted(unknown))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
missing = REQUIRED_RECORD_FIELDS - set(record.keys())
|
||||||
|
if missing:
|
||||||
|
raise SnapshotValidationError(
|
||||||
|
f"record[{index}] missing required fields: {', '.join(sorted(missing))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = dict(record)
|
||||||
|
for field in (
|
||||||
|
"competition_external_id",
|
||||||
|
"competition_name",
|
||||||
|
"season",
|
||||||
|
"team_external_id",
|
||||||
|
"team_name",
|
||||||
|
"player_external_id",
|
||||||
|
"full_name",
|
||||||
|
):
|
||||||
|
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
|
||||||
|
|
||||||
|
for field in ("first_name", "last_name", "nationality", "position"):
|
||||||
|
normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}")
|
||||||
|
|
||||||
|
if record.get("role") is not None:
|
||||||
|
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
|
||||||
|
|
||||||
|
birth_date_raw = record.get("birth_date")
|
||||||
|
if birth_date_raw in (None, ""):
|
||||||
|
normalized["birth_date"] = None
|
||||||
|
else:
|
||||||
|
birth_date = parse_date(str(birth_date_raw))
|
||||||
|
if not birth_date:
|
||||||
|
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
||||||
|
normalized["birth_date"] = birth_date.isoformat()
|
||||||
|
|
||||||
|
normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
||||||
|
normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
||||||
|
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
|
||||||
|
|
||||||
|
for field in (
|
||||||
|
"minutes_per_game",
|
||||||
|
"points_per_game",
|
||||||
|
"rebounds_per_game",
|
||||||
|
"assists_per_game",
|
||||||
|
"steals_per_game",
|
||||||
|
"blocks_per_game",
|
||||||
|
"turnovers_per_game",
|
||||||
|
"fg_pct",
|
||||||
|
"three_pt_pct",
|
||||||
|
"ft_pct",
|
||||||
|
):
|
||||||
|
normalized[field] = cls._require_float(record.get(field), f"record[{index}].{field}")
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate(cls, payload: dict[str, Any]) -> SnapshotValidationResult:
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
raise SnapshotValidationError("Snapshot root must be an object")
|
||||||
|
|
||||||
|
unknown = set(payload.keys()) - ALLOWED_TOP_LEVEL_FIELDS
|
||||||
|
if unknown:
|
||||||
|
raise SnapshotValidationError(
|
||||||
|
f"Snapshot contains unknown top-level fields: {', '.join(sorted(unknown))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
source_name = cls._require_string(payload.get("source_name"), "source_name")
|
||||||
|
|
||||||
|
snapshot_date_raw = payload.get("snapshot_date")
|
||||||
|
snapshot_date = parse_date(str(snapshot_date_raw))
|
||||||
|
if not snapshot_date:
|
||||||
|
raise SnapshotValidationError("snapshot_date must be YYYY-MM-DD")
|
||||||
|
|
||||||
|
records = payload.get("records")
|
||||||
|
if not isinstance(records, list) or not records:
|
||||||
|
raise SnapshotValidationError("records must be a non-empty array")
|
||||||
|
|
||||||
|
normalized_records: list[dict[str, Any]] = []
|
||||||
|
for index, record in enumerate(records):
|
||||||
|
if not isinstance(record, dict):
|
||||||
|
raise SnapshotValidationError(f"record[{index}] must be an object")
|
||||||
|
normalized_records.append(cls._validate_record(record, index=index))
|
||||||
|
|
||||||
|
return SnapshotValidationResult(
|
||||||
|
source_name=source_name,
|
||||||
|
snapshot_date=snapshot_date,
|
||||||
|
records=normalized_records,
|
||||||
|
)
|
||||||
@ -37,6 +37,8 @@ class PlayerCareerEntryInline(admin.TabularInline):
|
|||||||
class PlayerAdmin(admin.ModelAdmin):
|
class PlayerAdmin(admin.ModelAdmin):
|
||||||
list_display = (
|
list_display = (
|
||||||
"full_name",
|
"full_name",
|
||||||
|
"source_name",
|
||||||
|
"source_uid",
|
||||||
"birth_date",
|
"birth_date",
|
||||||
"nationality",
|
"nationality",
|
||||||
"nominal_position",
|
"nominal_position",
|
||||||
@ -53,7 +55,7 @@ class PlayerAdmin(admin.ModelAdmin):
|
|||||||
"origin_competition",
|
"origin_competition",
|
||||||
"origin_team",
|
"origin_team",
|
||||||
)
|
)
|
||||||
search_fields = ("full_name", "first_name", "last_name")
|
search_fields = ("full_name", "first_name", "last_name", "source_name", "source_uid")
|
||||||
inlines = (PlayerAliasInline, PlayerCareerEntryInline)
|
inlines = (PlayerAliasInline, PlayerCareerEntryInline)
|
||||||
actions = ("recompute_origin_fields",)
|
actions = ("recompute_origin_fields",)
|
||||||
|
|
||||||
|
|||||||
@ -25,10 +25,8 @@ class PlayerSearchForm(forms.Form):
|
|||||||
nominal_position = forms.ModelChoiceField(queryset=Position.objects.none(), required=False)
|
nominal_position = forms.ModelChoiceField(queryset=Position.objects.none(), required=False)
|
||||||
inferred_role = forms.ModelChoiceField(queryset=Role.objects.none(), required=False)
|
inferred_role = forms.ModelChoiceField(queryset=Role.objects.none(), required=False)
|
||||||
competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False)
|
competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False)
|
||||||
origin_competition = forms.ModelChoiceField(queryset=Competition.objects.none(), required=False)
|
|
||||||
nationality = forms.ModelChoiceField(queryset=Nationality.objects.none(), required=False)
|
nationality = forms.ModelChoiceField(queryset=Nationality.objects.none(), required=False)
|
||||||
team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False)
|
team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False)
|
||||||
origin_team = forms.ModelChoiceField(queryset=Team.objects.none(), required=False)
|
|
||||||
season = forms.ModelChoiceField(queryset=Season.objects.none(), required=False)
|
season = forms.ModelChoiceField(queryset=Season.objects.none(), required=False)
|
||||||
|
|
||||||
age_min = forms.IntegerField(required=False, min_value=0, max_value=60, label="Min age")
|
age_min = forms.IntegerField(required=False, min_value=0, max_value=60, label="Min age")
|
||||||
@ -60,20 +58,6 @@ class PlayerSearchForm(forms.Form):
|
|||||||
three_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="3P% max")
|
three_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="3P% max")
|
||||||
ft_pct_min = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% min")
|
ft_pct_min = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% min")
|
||||||
ft_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% max")
|
ft_pct_max = forms.DecimalField(required=False, min_value=0, decimal_places=2, max_digits=5, label="FT% max")
|
||||||
efficiency_metric_min = forms.DecimalField(
|
|
||||||
required=False,
|
|
||||||
min_value=0,
|
|
||||||
decimal_places=2,
|
|
||||||
max_digits=6,
|
|
||||||
label="Impact metric min",
|
|
||||||
)
|
|
||||||
efficiency_metric_max = forms.DecimalField(
|
|
||||||
required=False,
|
|
||||||
min_value=0,
|
|
||||||
decimal_places=2,
|
|
||||||
max_digits=6,
|
|
||||||
label="Impact metric max",
|
|
||||||
)
|
|
||||||
|
|
||||||
sort = forms.ChoiceField(choices=SORT_CHOICES, required=False, initial="name_asc")
|
sort = forms.ChoiceField(choices=SORT_CHOICES, required=False, initial="name_asc")
|
||||||
page_size = forms.TypedChoiceField(
|
page_size = forms.TypedChoiceField(
|
||||||
@ -88,10 +72,8 @@ class PlayerSearchForm(forms.Form):
|
|||||||
self.fields["nominal_position"].queryset = Position.objects.order_by("code")
|
self.fields["nominal_position"].queryset = Position.objects.order_by("code")
|
||||||
self.fields["inferred_role"].queryset = Role.objects.order_by("name")
|
self.fields["inferred_role"].queryset = Role.objects.order_by("name")
|
||||||
self.fields["competition"].queryset = Competition.objects.order_by("name")
|
self.fields["competition"].queryset = Competition.objects.order_by("name")
|
||||||
self.fields["origin_competition"].queryset = Competition.objects.order_by("name")
|
|
||||||
self.fields["nationality"].queryset = Nationality.objects.order_by("name")
|
self.fields["nationality"].queryset = Nationality.objects.order_by("name")
|
||||||
self.fields["team"].queryset = Team.objects.order_by("name")
|
self.fields["team"].queryset = Team.objects.order_by("name")
|
||||||
self.fields["origin_team"].queryset = Team.objects.order_by("name")
|
|
||||||
self.fields["season"].queryset = Season.objects.order_by("-start_date")
|
self.fields["season"].queryset = Season.objects.order_by("-start_date")
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
@ -110,7 +92,6 @@ class PlayerSearchForm(forms.Form):
|
|||||||
self._validate_min_max(cleaned_data, "fg_pct_min", "fg_pct_max")
|
self._validate_min_max(cleaned_data, "fg_pct_min", "fg_pct_max")
|
||||||
self._validate_min_max(cleaned_data, "three_pct_min", "three_pct_max")
|
self._validate_min_max(cleaned_data, "three_pct_min", "three_pct_max")
|
||||||
self._validate_min_max(cleaned_data, "ft_pct_min", "ft_pct_max")
|
self._validate_min_max(cleaned_data, "ft_pct_min", "ft_pct_max")
|
||||||
self._validate_min_max(cleaned_data, "efficiency_metric_min", "efficiency_metric_max")
|
|
||||||
|
|
||||||
if not cleaned_data.get("sort"):
|
if not cleaned_data.get("sort"):
|
||||||
cleaned_data["sort"] = "name_asc"
|
cleaned_data["sort"] = "name_asc"
|
||||||
|
|||||||
24
apps/players/migrations/0006_player_source_uid_and_more.py
Normal file
24
apps/players/migrations/0006_player_source_uid_and_more.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('competitions', '0003_competition_source_uid_season_source_uid_and_more'),
|
||||||
|
('players', '0005_player_weight_index'),
|
||||||
|
('teams', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='player',
|
||||||
|
name='source_uid',
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='player',
|
||||||
|
index=models.Index(fields=['source_uid'], name='players_pla_source__93bb47_idx'),
|
||||||
|
),
|
||||||
|
]
|
||||||
39
apps/players/migrations/0007_player_source_namespaced_uid.py
Normal file
39
apps/players/migrations/0007_player_source_namespaced_uid.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 15:08
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("players", "0006_player_source_uid_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="player",
|
||||||
|
name="source_name",
|
||||||
|
field=models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="player",
|
||||||
|
name="source_uid",
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True),
|
||||||
|
),
|
||||||
|
migrations.RemoveConstraint(
|
||||||
|
model_name="player",
|
||||||
|
name="uq_player_full_name_birth_date",
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name="player",
|
||||||
|
constraint=models.UniqueConstraint(
|
||||||
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
fields=("source_name", "source_uid"),
|
||||||
|
name="uq_player_source_namespace_uid",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="player",
|
||||||
|
index=models.Index(fields=["source_name", "source_uid"], name="players_pla_source__73848c_idx"),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -58,6 +58,8 @@ class Player(TimeStampedModel):
|
|||||||
first_name = models.CharField(max_length=120)
|
first_name = models.CharField(max_length=120)
|
||||||
last_name = models.CharField(max_length=120)
|
last_name = models.CharField(max_length=120)
|
||||||
full_name = models.CharField(max_length=260)
|
full_name = models.CharField(max_length=260)
|
||||||
|
source_name = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
source_uid = models.CharField(max_length=120, blank=True, null=True)
|
||||||
birth_date = models.DateField(blank=True, null=True)
|
birth_date = models.DateField(blank=True, null=True)
|
||||||
nationality = models.ForeignKey(
|
nationality = models.ForeignKey(
|
||||||
"players.Nationality",
|
"players.Nationality",
|
||||||
@ -108,12 +110,15 @@ class Player(TimeStampedModel):
|
|||||||
ordering = ["full_name", "id"]
|
ordering = ["full_name", "id"]
|
||||||
constraints = [
|
constraints = [
|
||||||
models.UniqueConstraint(
|
models.UniqueConstraint(
|
||||||
fields=["full_name", "birth_date"],
|
fields=["source_name", "source_uid"],
|
||||||
name="uq_player_full_name_birth_date",
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
name="uq_player_source_namespace_uid",
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["full_name"]),
|
models.Index(fields=["full_name"]),
|
||||||
|
models.Index(fields=["source_name", "source_uid"]),
|
||||||
|
models.Index(fields=["source_uid"]),
|
||||||
models.Index(fields=["last_name", "first_name"]),
|
models.Index(fields=["last_name", "first_name"]),
|
||||||
models.Index(fields=["birth_date"]),
|
models.Index(fields=["birth_date"]),
|
||||||
models.Index(fields=["nationality"]),
|
models.Index(fields=["nationality"]),
|
||||||
|
|||||||
@ -14,7 +14,6 @@ from django.db.models import (
|
|||||||
Value,
|
Value,
|
||||||
When,
|
When,
|
||||||
)
|
)
|
||||||
from django.db.models.functions import Coalesce
|
|
||||||
|
|
||||||
from apps.players.models import Player
|
from apps.players.models import Player
|
||||||
from apps.stats.models import PlayerSeason
|
from apps.stats.models import PlayerSeason
|
||||||
@ -22,7 +21,8 @@ from apps.stats.models import PlayerSeason
|
|||||||
METRIC_SORT_KEYS = {"ppg_desc", "ppg_asc", "mpg_desc", "mpg_asc"}
|
METRIC_SORT_KEYS = {"ppg_desc", "ppg_asc", "mpg_desc", "mpg_asc"}
|
||||||
SEARCH_METRIC_SEMANTICS_TEXT = (
|
SEARCH_METRIC_SEMANTICS_TEXT = (
|
||||||
"Search metrics are best eligible values per player (max per metric across eligible player-season rows). "
|
"Search metrics are best eligible values per player (max per metric across eligible player-season rows). "
|
||||||
"With season/team/competition/stat filters, eligibility is scoped by those filters."
|
"With season/team/competition/stat filters, eligibility is scoped by those filters. "
|
||||||
|
"When no eligible stat exists in the current filter context, metric cells show '-'."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -73,8 +73,6 @@ def _season_scope_filter_keys() -> tuple[str, ...]:
|
|||||||
"three_pct_max",
|
"three_pct_max",
|
||||||
"ft_pct_min",
|
"ft_pct_min",
|
||||||
"ft_pct_max",
|
"ft_pct_max",
|
||||||
"efficiency_metric_min",
|
|
||||||
"efficiency_metric_max",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -121,7 +119,6 @@ def _apply_player_season_scope_filters(queryset, data: dict):
|
|||||||
("fg_pct_min", "fg_pct_max", "stats__fg_pct"),
|
("fg_pct_min", "fg_pct_max", "stats__fg_pct"),
|
||||||
("three_pct_min", "three_pct_max", "stats__three_pct"),
|
("three_pct_min", "three_pct_max", "stats__three_pct"),
|
||||||
("ft_pct_min", "ft_pct_max", "stats__ft_pct"),
|
("ft_pct_min", "ft_pct_max", "stats__ft_pct"),
|
||||||
("efficiency_metric_min", "efficiency_metric_max", "stats__player_efficiency_rating"),
|
|
||||||
)
|
)
|
||||||
for min_key, max_key, field_name in stat_pairs:
|
for min_key, max_key, field_name in stat_pairs:
|
||||||
queryset = _apply_min_max_filter(queryset, min_key, max_key, field_name, data)
|
queryset = _apply_min_max_filter(queryset, min_key, max_key, field_name, data)
|
||||||
@ -149,11 +146,6 @@ def _build_metric_context_filter(data: dict) -> Q:
|
|||||||
("fg_pct_min", "fg_pct_max", "player_seasons__stats__fg_pct"),
|
("fg_pct_min", "fg_pct_max", "player_seasons__stats__fg_pct"),
|
||||||
("three_pct_min", "three_pct_max", "player_seasons__stats__three_pct"),
|
("three_pct_min", "three_pct_max", "player_seasons__stats__three_pct"),
|
||||||
("ft_pct_min", "ft_pct_max", "player_seasons__stats__ft_pct"),
|
("ft_pct_min", "ft_pct_max", "player_seasons__stats__ft_pct"),
|
||||||
(
|
|
||||||
"efficiency_metric_min",
|
|
||||||
"efficiency_metric_max",
|
|
||||||
"player_seasons__stats__player_efficiency_rating",
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
for min_key, max_key, field_name in minmax_pairs:
|
for min_key, max_key, field_name in minmax_pairs:
|
||||||
min_value = data.get(min_key)
|
min_value = data.get(min_key)
|
||||||
@ -188,10 +180,6 @@ def filter_players(queryset, data: dict):
|
|||||||
queryset = queryset.filter(inferred_role=data["inferred_role"])
|
queryset = queryset.filter(inferred_role=data["inferred_role"])
|
||||||
if data.get("nationality"):
|
if data.get("nationality"):
|
||||||
queryset = queryset.filter(nationality=data["nationality"])
|
queryset = queryset.filter(nationality=data["nationality"])
|
||||||
if data.get("origin_competition"):
|
|
||||||
queryset = queryset.filter(origin_competition=data["origin_competition"])
|
|
||||||
if data.get("origin_team"):
|
|
||||||
queryset = queryset.filter(origin_team=data["origin_team"])
|
|
||||||
|
|
||||||
queryset = _apply_min_max_filter(queryset, "height_min", "height_max", "height_cm", data)
|
queryset = _apply_min_max_filter(queryset, "height_min", "height_max", "height_cm", data)
|
||||||
queryset = _apply_min_max_filter(queryset, "weight_min", "weight_max", "weight_kg", data)
|
queryset = _apply_min_max_filter(queryset, "weight_min", "weight_max", "weight_kg", data)
|
||||||
@ -235,47 +223,62 @@ def annotate_player_metrics(queryset, data: dict | None = None):
|
|||||||
output_field=FloatField(),
|
output_field=FloatField(),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
default=Value(0.0),
|
default=Value(None),
|
||||||
output_field=FloatField(),
|
output_field=FloatField(),
|
||||||
)
|
)
|
||||||
|
|
||||||
return queryset.annotate(
|
return queryset.annotate(
|
||||||
games_played_value=Coalesce(
|
games_played_value=Max(
|
||||||
Max("player_seasons__games_played", filter=context_filter),
|
"player_seasons__games_played",
|
||||||
Value(0, output_field=IntegerField()),
|
filter=context_filter,
|
||||||
output_field=IntegerField(),
|
output_field=IntegerField(),
|
||||||
),
|
),
|
||||||
mpg_value=Coalesce(Max(mpg_expression, filter=context_filter), Value(0.0)),
|
mpg_value=Max(mpg_expression, filter=context_filter),
|
||||||
ppg_value=Coalesce(
|
ppg_value=Max(
|
||||||
Max("player_seasons__stats__points", filter=context_filter),
|
"player_seasons__stats__points",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
rpg_value=Coalesce(
|
rpg_value=Max(
|
||||||
Max("player_seasons__stats__rebounds", filter=context_filter),
|
"player_seasons__stats__rebounds",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
apg_value=Coalesce(
|
apg_value=Max(
|
||||||
Max("player_seasons__stats__assists", filter=context_filter),
|
"player_seasons__stats__assists",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
spg_value=Coalesce(
|
spg_value=Max(
|
||||||
Max("player_seasons__stats__steals", filter=context_filter),
|
"player_seasons__stats__steals",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
bpg_value=Coalesce(
|
bpg_value=Max(
|
||||||
Max("player_seasons__stats__blocks", filter=context_filter),
|
"player_seasons__stats__blocks",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
top_efficiency=Coalesce(
|
tov_value=Max(
|
||||||
Max("player_seasons__stats__player_efficiency_rating", filter=context_filter),
|
"player_seasons__stats__turnovers",
|
||||||
Value(0, output_field=DecimalField(max_digits=6, decimal_places=2)),
|
filter=context_filter,
|
||||||
output_field=DecimalField(max_digits=6, decimal_places=2),
|
output_field=DecimalField(max_digits=6, decimal_places=2),
|
||||||
),
|
),
|
||||||
|
fg_pct_value=Max(
|
||||||
|
"player_seasons__stats__fg_pct",
|
||||||
|
filter=context_filter,
|
||||||
|
output_field=DecimalField(max_digits=5, decimal_places=2),
|
||||||
|
),
|
||||||
|
three_pct_value=Max(
|
||||||
|
"player_seasons__stats__three_pct",
|
||||||
|
filter=context_filter,
|
||||||
|
output_field=DecimalField(max_digits=5, decimal_places=2),
|
||||||
|
),
|
||||||
|
ft_pct_value=Max(
|
||||||
|
"player_seasons__stats__ft_pct",
|
||||||
|
filter=context_filter,
|
||||||
|
output_field=DecimalField(max_digits=5, decimal_places=2),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from apps.scouting.models import FavoritePlayer
|
|||||||
from apps.stats.models import PlayerSeason
|
from apps.stats.models import PlayerSeason
|
||||||
|
|
||||||
from .forms import PlayerSearchForm
|
from .forms import PlayerSearchForm
|
||||||
from .models import Player, PlayerCareerEntry
|
from .models import Player
|
||||||
from .services.search import (
|
from .services.search import (
|
||||||
SEARCH_METRIC_SEMANTICS_TEXT,
|
SEARCH_METRIC_SEMANTICS_TEXT,
|
||||||
annotate_player_metrics,
|
annotate_player_metrics,
|
||||||
@ -92,12 +92,6 @@ class PlayerDetailView(DetailView):
|
|||||||
"competition",
|
"competition",
|
||||||
"stats",
|
"stats",
|
||||||
).order_by("-season__start_date", "-id")
|
).order_by("-season__start_date", "-id")
|
||||||
career_queryset = PlayerCareerEntry.objects.select_related(
|
|
||||||
"team",
|
|
||||||
"competition",
|
|
||||||
"season",
|
|
||||||
"role_snapshot",
|
|
||||||
).order_by("-start_date", "-id")
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
Player.objects.select_related(
|
Player.objects.select_related(
|
||||||
@ -108,9 +102,7 @@ class PlayerDetailView(DetailView):
|
|||||||
"origin_team",
|
"origin_team",
|
||||||
)
|
)
|
||||||
.prefetch_related(
|
.prefetch_related(
|
||||||
"aliases",
|
|
||||||
Prefetch("player_seasons", queryset=season_queryset),
|
Prefetch("player_seasons", queryset=season_queryset),
|
||||||
Prefetch("career_entries", queryset=career_queryset),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -146,7 +138,6 @@ class PlayerDetailView(DetailView):
|
|||||||
|
|
||||||
context["age"] = calculate_age(player.birth_date)
|
context["age"] = calculate_age(player.birth_date)
|
||||||
context["current_assignment"] = current_assignment
|
context["current_assignment"] = current_assignment
|
||||||
context["career_entries"] = player.career_entries.all()
|
|
||||||
context["season_rows"] = season_rows
|
context["season_rows"] = season_rows
|
||||||
context["is_favorite"] = False
|
context["is_favorite"] = False
|
||||||
if self.request.user.is_authenticated:
|
if self.request.user.is_authenticated:
|
||||||
|
|||||||
@ -1,4 +1,8 @@
|
|||||||
from django import forms
|
from django import forms
|
||||||
|
import json
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
from apps.players.forms import PlayerSearchForm
|
||||||
|
|
||||||
from .models import SavedSearch
|
from .models import SavedSearch
|
||||||
|
|
||||||
@ -10,3 +14,61 @@ class SavedSearchForm(forms.ModelForm):
|
|||||||
widgets = {
|
widgets = {
|
||||||
"name": forms.TextInput(attrs={"placeholder": "e.g. EuroLeague guards under 24"}),
|
"name": forms.TextInput(attrs={"placeholder": "e.g. EuroLeague guards under 24"}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SavedSearchUpdateForm(forms.ModelForm):
|
||||||
|
filters_json = forms.CharField(
|
||||||
|
required=False,
|
||||||
|
label="Filters (JSON)",
|
||||||
|
widget=forms.Textarea(attrs={"rows": 8, "class": "font-mono"}),
|
||||||
|
help_text="Structured search filters payload. Leave blank to keep current filters.",
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = SavedSearch
|
||||||
|
fields = ["name", "is_public", "filters_json"]
|
||||||
|
widgets = {
|
||||||
|
"name": forms.TextInput(attrs={"placeholder": "e.g. Italian wings - updated"}),
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
if self.instance and self.instance.pk and not self.initial.get("filters_json"):
|
||||||
|
self.initial["filters_json"] = json.dumps(self.instance.filters, indent=2, sort_keys=True)
|
||||||
|
|
||||||
|
def clean_filters_json(self):
|
||||||
|
raw = self.cleaned_data.get("filters_json")
|
||||||
|
if not raw:
|
||||||
|
return self.instance.filters
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
raise forms.ValidationError("Invalid JSON format.") from exc
|
||||||
|
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
raise forms.ValidationError("Filters JSON must be an object.")
|
||||||
|
|
||||||
|
form = PlayerSearchForm(parsed)
|
||||||
|
if not form.is_valid():
|
||||||
|
raise forms.ValidationError("Filters JSON contains invalid search parameters.")
|
||||||
|
|
||||||
|
validated = {}
|
||||||
|
for key, value in form.cleaned_data.items():
|
||||||
|
if value in (None, ""):
|
||||||
|
continue
|
||||||
|
if hasattr(value, "pk"):
|
||||||
|
validated[key] = value.pk
|
||||||
|
elif isinstance(value, Decimal):
|
||||||
|
validated[key] = str(value)
|
||||||
|
else:
|
||||||
|
validated[key] = value
|
||||||
|
if not validated:
|
||||||
|
raise forms.ValidationError("Filters JSON does not contain valid searchable filters.")
|
||||||
|
return validated
|
||||||
|
|
||||||
|
def save(self, commit=True):
|
||||||
|
instance = super().save(commit=False)
|
||||||
|
instance.filters = self.cleaned_data["filters_json"]
|
||||||
|
if commit:
|
||||||
|
instance.save()
|
||||||
|
return instance
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from django.utils import timezone
|
|||||||
from django.views import View
|
from django.views import View
|
||||||
from django.views.generic import ListView, TemplateView, UpdateView
|
from django.views.generic import ListView, TemplateView, UpdateView
|
||||||
|
|
||||||
from .forms import SavedSearchForm
|
from .forms import SavedSearchForm, SavedSearchUpdateForm
|
||||||
from .models import FavoritePlayer, SavedSearch
|
from .models import FavoritePlayer, SavedSearch
|
||||||
from .services.saved_searches import extract_filters_from_params, saved_search_to_querystring
|
from .services.saved_searches import extract_filters_from_params, saved_search_to_querystring
|
||||||
|
|
||||||
@ -81,7 +81,7 @@ class SavedSearchCreateView(LoginRequiredMixin, View):
|
|||||||
|
|
||||||
class SavedSearchUpdateView(LoginRequiredMixin, UpdateView):
|
class SavedSearchUpdateView(LoginRequiredMixin, UpdateView):
|
||||||
model = SavedSearch
|
model = SavedSearch
|
||||||
form_class = SavedSearchForm
|
form_class = SavedSearchUpdateForm
|
||||||
template_name = "scouting/saved_search_edit.html"
|
template_name = "scouting/saved_search_edit.html"
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
@ -96,6 +96,13 @@ class SavedSearchDeleteView(LoginRequiredMixin, View):
|
|||||||
def post(self, request, pk, *args, **kwargs):
|
def post(self, request, pk, *args, **kwargs):
|
||||||
saved_search = get_object_or_404(SavedSearch, pk=pk, user=request.user)
|
saved_search = get_object_or_404(SavedSearch, pk=pk, user=request.user)
|
||||||
saved_search.delete()
|
saved_search.delete()
|
||||||
|
if request.headers.get("HX-Request") == "true":
|
||||||
|
saved_searches = SavedSearch.objects.filter(user=request.user).order_by("-updated_at")
|
||||||
|
return render(
|
||||||
|
request,
|
||||||
|
"scouting/partials/saved_search_table.html",
|
||||||
|
{"saved_searches": saved_searches},
|
||||||
|
)
|
||||||
messages.success(request, "Saved search deleted.")
|
messages.success(request, "Saved search deleted.")
|
||||||
return redirect("scouting:index")
|
return redirect("scouting:index")
|
||||||
|
|
||||||
|
|||||||
@ -5,9 +5,9 @@ from .models import PlayerSeason, PlayerSeasonStats
|
|||||||
|
|
||||||
@admin.register(PlayerSeason)
|
@admin.register(PlayerSeason)
|
||||||
class PlayerSeasonAdmin(admin.ModelAdmin):
|
class PlayerSeasonAdmin(admin.ModelAdmin):
|
||||||
list_display = ("player", "season", "team", "competition", "games_played", "minutes_played")
|
list_display = ("player", "season", "source_uid", "team", "competition", "games_played", "minutes_played")
|
||||||
list_filter = ("season", "competition")
|
list_filter = ("season", "competition")
|
||||||
search_fields = ("player__full_name", "team__name", "competition__name", "season__label")
|
search_fields = ("player__full_name", "team__name", "competition__name", "season__label", "source_uid")
|
||||||
|
|
||||||
|
|
||||||
@admin.register(PlayerSeasonStats)
|
@admin.register(PlayerSeasonStats)
|
||||||
|
|||||||
@ -0,0 +1,25 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('competitions', '0003_competition_source_uid_season_source_uid_and_more'),
|
||||||
|
('players', '0006_player_source_uid_and_more'),
|
||||||
|
('stats', '0002_playerseasonstats_search_indexes'),
|
||||||
|
('teams', '0002_team_source_uid_team_teams_team_source__940258_idx'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='playerseason',
|
||||||
|
name='source_uid',
|
||||||
|
field=models.CharField(blank=True, max_length=160, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='playerseason',
|
||||||
|
index=models.Index(fields=['source_uid'], name='stats_playe_source__57b701_idx'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -4,6 +4,7 @@ from django.db import models
|
|||||||
class PlayerSeason(models.Model):
|
class PlayerSeason(models.Model):
|
||||||
player = models.ForeignKey("players.Player", on_delete=models.CASCADE, related_name="player_seasons")
|
player = models.ForeignKey("players.Player", on_delete=models.CASCADE, related_name="player_seasons")
|
||||||
season = models.ForeignKey("competitions.Season", on_delete=models.CASCADE, related_name="player_seasons")
|
season = models.ForeignKey("competitions.Season", on_delete=models.CASCADE, related_name="player_seasons")
|
||||||
|
source_uid = models.CharField(max_length=160, blank=True, null=True, unique=True)
|
||||||
team = models.ForeignKey(
|
team = models.ForeignKey(
|
||||||
"teams.Team",
|
"teams.Team",
|
||||||
on_delete=models.SET_NULL,
|
on_delete=models.SET_NULL,
|
||||||
@ -31,6 +32,7 @@ class PlayerSeason(models.Model):
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
|
models.Index(fields=["source_uid"]),
|
||||||
models.Index(fields=["player", "season"]),
|
models.Index(fields=["player", "season"]),
|
||||||
models.Index(fields=["season", "team"]),
|
models.Index(fields=["season", "team"]),
|
||||||
models.Index(fields=["season", "competition"]),
|
models.Index(fields=["season", "competition"]),
|
||||||
|
|||||||
@ -5,6 +5,6 @@ from .models import Team
|
|||||||
|
|
||||||
@admin.register(Team)
|
@admin.register(Team)
|
||||||
class TeamAdmin(admin.ModelAdmin):
|
class TeamAdmin(admin.ModelAdmin):
|
||||||
list_display = ("name", "short_name", "country", "is_national_team")
|
list_display = ("name", "source_name", "source_uid", "short_name", "country", "is_national_team")
|
||||||
list_filter = ("is_national_team", "country")
|
list_filter = ("is_national_team", "country")
|
||||||
search_fields = ("name", "short_name", "slug")
|
search_fields = ("name", "short_name", "slug", "source_name", "source_uid")
|
||||||
|
|||||||
@ -0,0 +1,23 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 12:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('players', '0006_player_source_uid_and_more'),
|
||||||
|
('teams', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='team',
|
||||||
|
name='source_uid',
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name='team',
|
||||||
|
index=models.Index(fields=['source_uid'], name='teams_team_source__940258_idx'),
|
||||||
|
),
|
||||||
|
]
|
||||||
35
apps/teams/migrations/0003_team_source_namespaced_uid.py
Normal file
35
apps/teams/migrations/0003_team_source_namespaced_uid.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# Generated by Django 5.2.12 on 2026-03-13 15:08
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("teams", "0002_team_source_uid_team_teams_team_source__940258_idx"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="team",
|
||||||
|
name="source_name",
|
||||||
|
field=models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="team",
|
||||||
|
name="source_uid",
|
||||||
|
field=models.CharField(blank=True, max_length=120, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name="team",
|
||||||
|
constraint=models.UniqueConstraint(
|
||||||
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
fields=("source_name", "source_uid"),
|
||||||
|
name="uq_team_source_namespace_uid",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="team",
|
||||||
|
index=models.Index(fields=["source_name", "source_uid"], name="teams_team_source__8035ae_idx"),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -5,6 +5,8 @@ class Team(models.Model):
|
|||||||
name = models.CharField(max_length=200)
|
name = models.CharField(max_length=200)
|
||||||
short_name = models.CharField(max_length=80, blank=True)
|
short_name = models.CharField(max_length=80, blank=True)
|
||||||
slug = models.SlugField(max_length=220, unique=True)
|
slug = models.SlugField(max_length=220, unique=True)
|
||||||
|
source_name = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
source_uid = models.CharField(max_length=120, blank=True, null=True)
|
||||||
country = models.ForeignKey(
|
country = models.ForeignKey(
|
||||||
"players.Nationality",
|
"players.Nationality",
|
||||||
on_delete=models.SET_NULL,
|
on_delete=models.SET_NULL,
|
||||||
@ -20,11 +22,18 @@ class Team(models.Model):
|
|||||||
class Meta:
|
class Meta:
|
||||||
ordering = ["name"]
|
ordering = ["name"]
|
||||||
constraints = [
|
constraints = [
|
||||||
models.UniqueConstraint(fields=["name", "country"], name="uq_team_name_country")
|
models.UniqueConstraint(fields=["name", "country"], name="uq_team_name_country"),
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=["source_name", "source_uid"],
|
||||||
|
condition=models.Q(source_uid__isnull=False) & ~models.Q(source_uid=""),
|
||||||
|
name="uq_team_source_namespace_uid",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["name"]),
|
models.Index(fields=["name"]),
|
||||||
models.Index(fields=["slug"]),
|
models.Index(fields=["slug"]),
|
||||||
|
models.Index(fields=["source_name", "source_uid"]),
|
||||||
|
models.Index(fields=["source_uid"]),
|
||||||
models.Index(fields=["country"]),
|
models.Index(fields=["country"]),
|
||||||
models.Index(fields=["is_national_team"]),
|
models.Index(fields=["is_national_team"]),
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,3 +1,8 @@
|
|||||||
from .celery import app as celery_app
|
"""
|
||||||
|
HoopScout v2 runtime package.
|
||||||
|
|
||||||
__all__ = ("celery_app",)
|
Celery is intentionally not auto-loaded at import time in v2 foundation runtime.
|
||||||
|
Legacy task modules remain in-repo and can be loaded explicitly if needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = ()
|
||||||
|
|||||||
@ -28,12 +28,12 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
|
|||||||
|
|
||||||
|
|
||||||
def build_periodic_schedule() -> dict:
|
def build_periodic_schedule() -> dict:
|
||||||
if not settings.INGESTION_SCHEDULE_ENABLED:
|
if not getattr(settings, "INGESTION_SCHEDULE_ENABLED", False):
|
||||||
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
|
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
|
schedule_kwargs = _parse_cron_expression(getattr(settings, "INGESTION_SCHEDULE_CRON", "*/30 * * * *"))
|
||||||
return {
|
return {
|
||||||
"ingestion.scheduled_provider_sync": {
|
"ingestion.scheduled_provider_sync": {
|
||||||
"task": "apps.ingestion.tasks.scheduled_provider_sync",
|
"task": "apps.ingestion.tasks.scheduled_provider_sync",
|
||||||
@ -44,7 +44,7 @@ def build_periodic_schedule() -> dict:
|
|||||||
logger.error(
|
logger.error(
|
||||||
"Invalid periodic ingestion schedule config. Task disabled. "
|
"Invalid periodic ingestion schedule config. Task disabled. "
|
||||||
"INGESTION_SCHEDULE_CRON=%r error=%s",
|
"INGESTION_SCHEDULE_CRON=%r error=%s",
|
||||||
settings.INGESTION_SCHEDULE_CRON,
|
getattr(settings, "INGESTION_SCHEDULE_CRON", ""),
|
||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
return {}
|
return {}
|
||||||
|
|||||||
@ -72,10 +72,14 @@ INSTALLED_APPS = [
|
|||||||
"apps.teams",
|
"apps.teams",
|
||||||
"apps.stats",
|
"apps.stats",
|
||||||
"apps.scouting",
|
"apps.scouting",
|
||||||
"apps.providers",
|
|
||||||
"apps.ingestion",
|
"apps.ingestion",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# v2 default runtime is snapshot-first. Legacy provider stack is opt-in.
|
||||||
|
LEGACY_PROVIDER_STACK_ENABLED = env_bool("LEGACY_PROVIDER_STACK_ENABLED", False)
|
||||||
|
if LEGACY_PROVIDER_STACK_ENABLED:
|
||||||
|
INSTALLED_APPS.append("apps.providers")
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
"django.middleware.security.SecurityMiddleware",
|
"django.middleware.security.SecurityMiddleware",
|
||||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||||
@ -142,47 +146,83 @@ LOGIN_URL = "users:login"
|
|||||||
LOGIN_REDIRECT_URL = "core:dashboard"
|
LOGIN_REDIRECT_URL = "core:dashboard"
|
||||||
LOGOUT_REDIRECT_URL = "core:home"
|
LOGOUT_REDIRECT_URL = "core:home"
|
||||||
|
|
||||||
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
|
# HoopScout v2 static dataset storage (volume-backed directories).
|
||||||
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
|
STATIC_DATASET_INCOMING_DIR = os.getenv(
|
||||||
CELERY_ACCEPT_CONTENT = ["json"]
|
"STATIC_DATASET_INCOMING_DIR",
|
||||||
CELERY_TASK_SERIALIZER = "json"
|
os.getenv("SNAPSHOT_INCOMING_DIR", str(BASE_DIR / "snapshots" / "incoming")),
|
||||||
CELERY_RESULT_SERIALIZER = "json"
|
|
||||||
CELERY_TIMEZONE = TIME_ZONE
|
|
||||||
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
|
|
||||||
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
|
|
||||||
INGESTION_SCHEDULE_ENABLED = env_bool("INGESTION_SCHEDULE_ENABLED", False)
|
|
||||||
INGESTION_SCHEDULE_CRON = os.getenv("INGESTION_SCHEDULE_CRON", "*/30 * * * *").strip()
|
|
||||||
INGESTION_SCHEDULE_PROVIDER_NAMESPACE = os.getenv("INGESTION_SCHEDULE_PROVIDER_NAMESPACE", "").strip()
|
|
||||||
INGESTION_SCHEDULE_JOB_TYPE = os.getenv("INGESTION_SCHEDULE_JOB_TYPE", "incremental").strip().lower()
|
|
||||||
INGESTION_PREVENT_OVERLAP = env_bool("INGESTION_PREVENT_OVERLAP", True)
|
|
||||||
INGESTION_OVERLAP_WINDOW_MINUTES = int(os.getenv("INGESTION_OVERLAP_WINDOW_MINUTES", "180"))
|
|
||||||
|
|
||||||
if INGESTION_SCHEDULE_JOB_TYPE not in {"incremental", "full_sync"}:
|
|
||||||
raise ImproperlyConfigured("INGESTION_SCHEDULE_JOB_TYPE must be either 'incremental' or 'full_sync'.")
|
|
||||||
|
|
||||||
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
|
|
||||||
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
|
|
||||||
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
|
|
||||||
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
|
|
||||||
PROVIDER_MVP_DATA_FILE = os.getenv(
|
|
||||||
"PROVIDER_MVP_DATA_FILE",
|
|
||||||
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
|
|
||||||
)
|
)
|
||||||
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
|
STATIC_DATASET_ARCHIVE_DIR = os.getenv(
|
||||||
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
|
"STATIC_DATASET_ARCHIVE_DIR",
|
||||||
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
|
os.getenv("SNAPSHOT_ARCHIVE_DIR", str(BASE_DIR / "snapshots" / "archive")),
|
||||||
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
|
)
|
||||||
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
|
STATIC_DATASET_FAILED_DIR = os.getenv(
|
||||||
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
|
"STATIC_DATASET_FAILED_DIR",
|
||||||
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
|
os.getenv("SNAPSHOT_FAILED_DIR", str(BASE_DIR / "snapshots" / "failed")),
|
||||||
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
|
)
|
||||||
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
|
|
||||||
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
|
# v2 extractor framework runtime settings.
|
||||||
PROVIDER_BALLDONTLIE_SEASONS = [
|
EXTRACTOR_USER_AGENT = os.getenv("EXTRACTOR_USER_AGENT", "HoopScoutBot/2.0 (+https://younerd.org)")
|
||||||
int(value.strip())
|
EXTRACTOR_HTTP_TIMEOUT_SECONDS = float(os.getenv("EXTRACTOR_HTTP_TIMEOUT_SECONDS", "15"))
|
||||||
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
|
EXTRACTOR_HTTP_RETRIES = int(os.getenv("EXTRACTOR_HTTP_RETRIES", "2"))
|
||||||
if value.strip().isdigit()
|
EXTRACTOR_RETRY_SLEEP_SECONDS = float(os.getenv("EXTRACTOR_RETRY_SLEEP_SECONDS", "1.0"))
|
||||||
]
|
EXTRACTOR_REQUEST_DELAY_SECONDS = float(os.getenv("EXTRACTOR_REQUEST_DELAY_SECONDS", "0.5"))
|
||||||
|
EXTRACTOR_PUBLIC_JSON_URL = os.getenv("EXTRACTOR_PUBLIC_JSON_URL", "").strip()
|
||||||
|
EXTRACTOR_PUBLIC_SOURCE_NAME = os.getenv("EXTRACTOR_PUBLIC_SOURCE_NAME", "public_json_source").strip()
|
||||||
|
EXTRACTOR_INCLUDE_RAW_PAYLOAD = env_bool("EXTRACTOR_INCLUDE_RAW_PAYLOAD", False)
|
||||||
|
EXTRACTOR_LBA_STATS_URL = os.getenv("EXTRACTOR_LBA_STATS_URL", "").strip()
|
||||||
|
EXTRACTOR_LBA_SEASON_LABEL = os.getenv("EXTRACTOR_LBA_SEASON_LABEL", "").strip()
|
||||||
|
EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = os.getenv("EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID", "lba-serie-a").strip()
|
||||||
|
EXTRACTOR_LBA_COMPETITION_NAME = os.getenv("EXTRACTOR_LBA_COMPETITION_NAME", "Lega Basket Serie A").strip()
|
||||||
|
EXTRACTOR_BCL_STATS_URL = os.getenv("EXTRACTOR_BCL_STATS_URL", "").strip()
|
||||||
|
EXTRACTOR_BCL_SEASON_LABEL = os.getenv("EXTRACTOR_BCL_SEASON_LABEL", "").strip()
|
||||||
|
EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = os.getenv("EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID", "bcl").strip()
|
||||||
|
EXTRACTOR_BCL_COMPETITION_NAME = os.getenv("EXTRACTOR_BCL_COMPETITION_NAME", "Basketball Champions League").strip()
|
||||||
|
|
||||||
|
# Simple daily orchestration settings (extract -> import).
|
||||||
|
DAILY_ORCHESTRATION_EXTRACTORS = os.getenv("DAILY_ORCHESTRATION_EXTRACTORS", "lba,bcl")
|
||||||
|
DAILY_ORCHESTRATION_INTERVAL_SECONDS = int(os.getenv("DAILY_ORCHESTRATION_INTERVAL_SECONDS", "86400"))
|
||||||
|
|
||||||
|
if EXTRACTOR_HTTP_TIMEOUT_SECONDS <= 0:
|
||||||
|
raise ImproperlyConfigured("EXTRACTOR_HTTP_TIMEOUT_SECONDS must be > 0.")
|
||||||
|
if EXTRACTOR_HTTP_RETRIES < 0:
|
||||||
|
raise ImproperlyConfigured("EXTRACTOR_HTTP_RETRIES must be >= 0.")
|
||||||
|
if EXTRACTOR_RETRY_SLEEP_SECONDS < 0:
|
||||||
|
raise ImproperlyConfigured("EXTRACTOR_RETRY_SLEEP_SECONDS must be >= 0.")
|
||||||
|
if EXTRACTOR_REQUEST_DELAY_SECONDS < 0:
|
||||||
|
raise ImproperlyConfigured("EXTRACTOR_REQUEST_DELAY_SECONDS must be >= 0.")
|
||||||
|
if DAILY_ORCHESTRATION_INTERVAL_SECONDS < 60:
|
||||||
|
raise ImproperlyConfigured("DAILY_ORCHESTRATION_INTERVAL_SECONDS must be >= 60.")
|
||||||
|
|
||||||
|
# Optional scheduler command settings for future v2 snapshot jobs.
|
||||||
|
SCHEDULER_ENABLED = env_bool("SCHEDULER_ENABLED", False)
|
||||||
|
SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))
|
||||||
|
if SCHEDULER_INTERVAL_SECONDS < 30:
|
||||||
|
raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.")
|
||||||
|
|
||||||
|
if LEGACY_PROVIDER_STACK_ENABLED:
|
||||||
|
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
|
||||||
|
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")
|
||||||
|
PROVIDER_NAMESPACE_BALLDONTLIE = os.getenv("PROVIDER_NAMESPACE_BALLDONTLIE", "balldontlie")
|
||||||
|
PROVIDER_DEFAULT_NAMESPACE = os.getenv("PROVIDER_DEFAULT_NAMESPACE", "").strip()
|
||||||
|
PROVIDER_MVP_DATA_FILE = os.getenv(
|
||||||
|
"PROVIDER_MVP_DATA_FILE",
|
||||||
|
str(BASE_DIR / "apps" / "providers" / "data" / "mvp_provider.json"),
|
||||||
|
)
|
||||||
|
PROVIDER_REQUEST_RETRIES = int(os.getenv("PROVIDER_REQUEST_RETRIES", "3"))
|
||||||
|
PROVIDER_REQUEST_RETRY_SLEEP = float(os.getenv("PROVIDER_REQUEST_RETRY_SLEEP", "1"))
|
||||||
|
PROVIDER_HTTP_TIMEOUT_SECONDS = float(os.getenv("PROVIDER_HTTP_TIMEOUT_SECONDS", "10"))
|
||||||
|
PROVIDER_BALLDONTLIE_BASE_URL = os.getenv("PROVIDER_BALLDONTLIE_BASE_URL", "https://api.balldontlie.io")
|
||||||
|
PROVIDER_BALLDONTLIE_API_KEY = os.getenv("PROVIDER_BALLDONTLIE_API_KEY", "")
|
||||||
|
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT", "5"))
|
||||||
|
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE", "100"))
|
||||||
|
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT", "10"))
|
||||||
|
PROVIDER_BALLDONTLIE_STATS_PER_PAGE = int(os.getenv("PROVIDER_BALLDONTLIE_STATS_PER_PAGE", "100"))
|
||||||
|
PROVIDER_BALLDONTLIE_STATS_STRICT = env_bool("PROVIDER_BALLDONTLIE_STATS_STRICT", False)
|
||||||
|
PROVIDER_BALLDONTLIE_SEASONS = [
|
||||||
|
int(value.strip())
|
||||||
|
for value in os.getenv("PROVIDER_BALLDONTLIE_SEASONS", "2024").split(",")
|
||||||
|
if value.strip().isdigit()
|
||||||
|
]
|
||||||
|
|
||||||
LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper()
|
LOG_LEVEL = os.getenv("DJANGO_LOG_LEVEL", "INFO").upper()
|
||||||
LOG_SQL = env_bool("DJANGO_LOG_SQL", False)
|
LOG_SQL = env_bool("DJANGO_LOG_SQL", False)
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
from django.conf import settings
|
||||||
from django.urls import include, path
|
from django.urls import include, path
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
@ -11,6 +12,8 @@ urlpatterns = [
|
|||||||
path("teams/", include("apps.teams.urls")),
|
path("teams/", include("apps.teams.urls")),
|
||||||
path("stats/", include("apps.stats.urls")),
|
path("stats/", include("apps.stats.urls")),
|
||||||
path("scouting/", include("apps.scouting.urls")),
|
path("scouting/", include("apps.scouting.urls")),
|
||||||
path("providers/", include("apps.providers.urls")),
|
|
||||||
path("ingestion/", include("apps.ingestion.urls")),
|
path("ingestion/", include("apps.ingestion.urls")),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if settings.LEGACY_PROVIDER_STACK_ENABLED:
|
||||||
|
urlpatterns.append(path("providers/", include("apps.providers.urls")))
|
||||||
|
|||||||
@ -3,25 +3,28 @@ services:
|
|||||||
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- .:/app
|
||||||
- node_modules_data:/app/node_modules
|
- static_data_dev:/app/staticfiles
|
||||||
- static_data:/app/staticfiles
|
- media_data_dev:/app/media
|
||||||
- media_data:/app/media
|
- snapshots_incoming_dev:/app/snapshots/incoming
|
||||||
- runtime_data:/app/runtime
|
- snapshots_archive_dev:/app/snapshots/archive
|
||||||
|
- snapshots_failed_dev:/app/snapshots/failed
|
||||||
|
|
||||||
celery_worker:
|
nginx:
|
||||||
|
volumes:
|
||||||
|
- static_data_dev:/var/www/static:ro
|
||||||
|
- media_data_dev:/var/www/media:ro
|
||||||
|
|
||||||
|
scheduler:
|
||||||
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- .:/app
|
||||||
- runtime_data:/app/runtime
|
- snapshots_incoming_dev:/app/snapshots/incoming
|
||||||
|
- snapshots_archive_dev:/app/snapshots/archive
|
||||||
|
- snapshots_failed_dev:/app/snapshots/failed
|
||||||
|
|
||||||
celery_beat:
|
volumes:
|
||||||
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
static_data_dev:
|
||||||
volumes:
|
media_data_dev:
|
||||||
- .:/app
|
snapshots_incoming_dev:
|
||||||
- runtime_data:/app/runtime
|
snapshots_archive_dev:
|
||||||
|
snapshots_failed_dev:
|
||||||
tailwind:
|
|
||||||
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
|
|
||||||
volumes:
|
|
||||||
- .:/app
|
|
||||||
- node_modules_data:/app/node_modules
|
|
||||||
|
|||||||
@ -2,14 +2,5 @@ services:
|
|||||||
web:
|
web:
|
||||||
environment:
|
environment:
|
||||||
DJANGO_SETTINGS_MODULE: config.settings.production
|
DJANGO_SETTINGS_MODULE: config.settings.production
|
||||||
DJANGO_DEBUG: "0"
|
DJANGO_ENV: production
|
||||||
|
|
||||||
celery_worker:
|
|
||||||
environment:
|
|
||||||
DJANGO_SETTINGS_MODULE: config.settings.production
|
|
||||||
DJANGO_DEBUG: "0"
|
|
||||||
|
|
||||||
celery_beat:
|
|
||||||
environment:
|
|
||||||
DJANGO_SETTINGS_MODULE: config.settings.production
|
|
||||||
DJANGO_DEBUG: "0"
|
DJANGO_DEBUG: "0"
|
||||||
|
|||||||
@ -1,13 +1,43 @@
|
|||||||
services:
|
services:
|
||||||
|
web:
|
||||||
|
image: registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
user: "10001:10001"
|
||||||
|
volumes:
|
||||||
|
- static_data:/app/staticfiles
|
||||||
|
- media_data:/app/media
|
||||||
|
- snapshots_incoming:/app/snapshots/incoming
|
||||||
|
- snapshots_archive:/app/snapshots/archive
|
||||||
|
- snapshots_failed:/app/snapshots/failed
|
||||||
|
expose:
|
||||||
|
- "8000"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/health/ || exit 1"]
|
||||||
|
interval: 15s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 8
|
||||||
|
start_period: 25s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
nginx:
|
nginx:
|
||||||
image: nginx:1.27-alpine
|
image: registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: nginx/Dockerfile
|
||||||
depends_on:
|
depends_on:
|
||||||
web:
|
web:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
ports:
|
ports:
|
||||||
- "80:80"
|
- "80:80"
|
||||||
volumes:
|
volumes:
|
||||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
|
||||||
- static_data:/var/www/static:ro
|
- static_data:/var/www/static:ro
|
||||||
- media_data:/var/www/media:ro
|
- media_data:/var/www/media:ro
|
||||||
read_only: true
|
read_only: true
|
||||||
@ -22,91 +52,6 @@ services:
|
|||||||
start_period: 10s
|
start_period: 10s
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
web:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
redis:
|
|
||||||
condition: service_healthy
|
|
||||||
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
|
|
||||||
user: "10001:10001"
|
|
||||||
volumes:
|
|
||||||
- static_data:/app/staticfiles
|
|
||||||
- media_data:/app/media
|
|
||||||
- runtime_data:/app/runtime
|
|
||||||
expose:
|
|
||||||
- "8000"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "curl -f http://127.0.0.1:8000/health/ || exit 1"]
|
|
||||||
interval: 15s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 8
|
|
||||||
start_period: 20s
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
tailwind:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
command: npm run dev
|
|
||||||
user: "10001:10001"
|
|
||||||
profiles:
|
|
||||||
- dev
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
celery_worker:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
redis:
|
|
||||||
condition: service_healthy
|
|
||||||
command: celery -A config worker -l info
|
|
||||||
user: "10001:10001"
|
|
||||||
volumes:
|
|
||||||
- runtime_data:/app/runtime
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "celery -A config inspect ping -d celery@$$HOSTNAME | grep -q pong || exit 1"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 5
|
|
||||||
start_period: 30s
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
celery_beat:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
depends_on:
|
|
||||||
postgres:
|
|
||||||
condition: service_healthy
|
|
||||||
redis:
|
|
||||||
condition: service_healthy
|
|
||||||
command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule
|
|
||||||
user: "10001:10001"
|
|
||||||
volumes:
|
|
||||||
- runtime_data:/app/runtime
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "test -f /app/runtime/celerybeat-schedule || exit 1"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 10
|
|
||||||
start_period: 20s
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:16-alpine
|
image: postgres:16-alpine
|
||||||
environment:
|
environment:
|
||||||
@ -122,22 +67,39 @@ services:
|
|||||||
retries: 5
|
retries: 5
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
redis:
|
scheduler:
|
||||||
image: redis:7-alpine
|
profiles: ["scheduler"]
|
||||||
command: redis-server --save 60 1 --loglevel warning
|
image: registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
SCHEDULER_ENABLED: ${SCHEDULER_ENABLED:-0}
|
||||||
|
SCHEDULER_DISABLED_SLEEP_SECONDS: ${SCHEDULER_DISABLED_SLEEP_SECONDS:-300}
|
||||||
|
DAILY_ORCHESTRATION_INTERVAL_SECONDS: ${DAILY_ORCHESTRATION_INTERVAL_SECONDS:-86400}
|
||||||
|
command: /app/scripts/scheduler.sh
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
user: "10001:10001"
|
||||||
volumes:
|
volumes:
|
||||||
- redis_data:/data
|
- snapshots_incoming:/app/snapshots/incoming
|
||||||
|
- snapshots_archive:/app/snapshots/archive
|
||||||
|
- snapshots_failed:/app/snapshots/failed
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "redis-cli", "ping"]
|
test: ["CMD-SHELL", "grep -qa 'scheduler.sh' /proc/1/cmdline || exit 1"]
|
||||||
interval: 10s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 3
|
||||||
|
start_period: 20s
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
static_data:
|
static_data:
|
||||||
media_data:
|
media_data:
|
||||||
runtime_data:
|
snapshots_incoming:
|
||||||
redis_data:
|
snapshots_archive:
|
||||||
node_modules_data:
|
snapshots_failed:
|
||||||
|
|||||||
58
docs/runtime-consistency-checklist.md
Normal file
58
docs/runtime-consistency-checklist.md
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# Runtime Consistency Checklist (v2)
|
||||||
|
|
||||||
|
Use this checklist when runtime/docs changes are made.
|
||||||
|
|
||||||
|
## Compose and Runtime
|
||||||
|
|
||||||
|
- `docker-compose.yml` contains only v2 default runtime services:
|
||||||
|
- `web`, `nginx`, `postgres`
|
||||||
|
- optional `scheduler` profile service
|
||||||
|
- `docker-compose.dev.yml` is mutable (source bind mounts allowed for dev only).
|
||||||
|
- `docker-compose.release.yml` is settings-focused and keeps release runtime immutable.
|
||||||
|
|
||||||
|
## Image/Registry Strategy
|
||||||
|
|
||||||
|
- `web` image: `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}`
|
||||||
|
- `nginx` image: `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}`
|
||||||
|
- optional scheduler image: `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
|
||||||
|
|
||||||
|
## Entrypoints
|
||||||
|
|
||||||
|
- `entrypoint.sh`:
|
||||||
|
- waits for PostgreSQL
|
||||||
|
- creates snapshot directories
|
||||||
|
- optionally runs `migrate` and `collectstatic` when booting gunicorn
|
||||||
|
- `scripts/scheduler.sh`:
|
||||||
|
- runs `run_daily_orchestration` loop
|
||||||
|
- idle-sleeps when `SCHEDULER_ENABLED=0`
|
||||||
|
|
||||||
|
## Snapshot Lifecycle
|
||||||
|
|
||||||
|
1. Extractor writes snapshots to `incoming`.
|
||||||
|
2. `import_snapshots` validates + upserts into PostgreSQL.
|
||||||
|
3. Success => file moved to `archive`.
|
||||||
|
4. Failure => file moved to `failed`.
|
||||||
|
|
||||||
|
## Source Identity Rule
|
||||||
|
|
||||||
|
Raw IDs are not global. Imported identities are namespaced by source:
|
||||||
|
|
||||||
|
- `Competition`: `(source_name, source_uid)`
|
||||||
|
- `Team`: `(source_name, source_uid)`
|
||||||
|
- `Player`: `(source_name, source_uid)`
|
||||||
|
|
||||||
|
## Legacy Isolation
|
||||||
|
|
||||||
|
- `LEGACY_PROVIDER_STACK_ENABLED=0` by default.
|
||||||
|
- With default setting:
|
||||||
|
- `apps.providers` is not installed
|
||||||
|
- `/providers/` routes are not mounted
|
||||||
|
- legacy provider settings are not required
|
||||||
|
|
||||||
|
## Verification Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.release.yml config
|
||||||
|
./scripts/verify_release_topology.sh
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml run --rm web sh -lc "export PYTHONUSERBASE=/tmp/pyuser && python -m pip install --user -r requirements/dev.txt && python -m pytest -q"
|
||||||
|
```
|
||||||
@ -8,6 +8,10 @@ done
|
|||||||
|
|
||||||
echo "PostgreSQL is available."
|
echo "PostgreSQL is available."
|
||||||
|
|
||||||
|
mkdir -p "${STATIC_DATASET_INCOMING_DIR:-${SNAPSHOT_INCOMING_DIR:-/app/snapshots/incoming}}" \
|
||||||
|
"${STATIC_DATASET_ARCHIVE_DIR:-${SNAPSHOT_ARCHIVE_DIR:-/app/snapshots/archive}}" \
|
||||||
|
"${STATIC_DATASET_FAILED_DIR:-${SNAPSHOT_FAILED_DIR:-/app/snapshots/failed}}"
|
||||||
|
|
||||||
if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then
|
if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then
|
||||||
echo "Running Django deployment checks..."
|
echo "Running Django deployment checks..."
|
||||||
python manage.py check --deploy --fail-level WARNING
|
python manage.py check --deploy --fail-level WARNING
|
||||||
@ -19,15 +23,6 @@ if [ "${AUTO_APPLY_MIGRATIONS:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
|
if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
|
||||||
if [ "${AUTO_BUILD_TAILWIND:-1}" = "1" ] && [ -f /app/package.json ]; then
|
|
||||||
if [ -x /app/node_modules/.bin/tailwindcss ]; then
|
|
||||||
echo "Building Tailwind assets..."
|
|
||||||
npm run build
|
|
||||||
else
|
|
||||||
echo "Tailwind dependencies missing; skipping AUTO_BUILD_TAILWIND."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Collecting static files..."
|
echo "Collecting static files..."
|
||||||
python manage.py collectstatic --noinput
|
python manage.py collectstatic --noinput
|
||||||
fi
|
fi
|
||||||
|
|||||||
8
nginx/Dockerfile
Normal file
8
nginx/Dockerfile
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
FROM nginx:1.27-alpine
|
||||||
|
|
||||||
|
COPY nginx/nginx.conf /etc/nginx/nginx.conf
|
||||||
|
COPY nginx/entrypoint.sh /entrypoint.sh
|
||||||
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
|
CMD ["nginx", "-g", "daemon off;"]
|
||||||
4
nginx/entrypoint.sh
Normal file
4
nginx/entrypoint.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
35
scripts/scheduler.sh
Normal file
35
scripts/scheduler.sh
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ "${SCHEDULER_ENABLED:-0}" != "1" ]; then
|
||||||
|
DISABLED_SLEEP="${SCHEDULER_DISABLED_SLEEP_SECONDS:-300}"
|
||||||
|
if [ "${DISABLED_SLEEP}" -lt 30 ]; then
|
||||||
|
echo "SCHEDULER_DISABLED_SLEEP_SECONDS must be >= 30"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Scheduler disabled (SCHEDULER_ENABLED=${SCHEDULER_ENABLED:-0}). Entering idle mode with ${DISABLED_SLEEP}s sleep."
|
||||||
|
while true; do
|
||||||
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Scheduler disabled; sleeping for ${DISABLED_SLEEP}s."
|
||||||
|
sleep "${DISABLED_SLEEP}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
INTERVAL="${DAILY_ORCHESTRATION_INTERVAL_SECONDS:-${SCHEDULER_INTERVAL_SECONDS:-86400}}"
|
||||||
|
if [ "${INTERVAL}" -lt 60 ]; then
|
||||||
|
echo "DAILY_ORCHESTRATION_INTERVAL_SECONDS/SCHEDULER_INTERVAL_SECONDS must be >= 60"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting HoopScout scheduler loop interval=${INTERVAL}s"
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Running daily orchestration..."
|
||||||
|
if python manage.py run_daily_orchestration; then
|
||||||
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Daily orchestration completed successfully."
|
||||||
|
else
|
||||||
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Daily orchestration failed."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] Sleeping for ${INTERVAL}s."
|
||||||
|
sleep "${INTERVAL}"
|
||||||
|
done
|
||||||
@ -30,7 +30,6 @@ check_service_bind_mount() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
check_service_bind_mount "web"
|
check_service_bind_mount "web"
|
||||||
check_service_bind_mount "celery_worker"
|
check_service_bind_mount "scheduler"
|
||||||
check_service_bind_mount "celery_beat"
|
|
||||||
|
|
||||||
echo "Release topology verification passed."
|
echo "Release topology verification passed."
|
||||||
|
|||||||
@ -22,8 +22,6 @@
|
|||||||
<h2 class="text-base">Summary</h2>
|
<h2 class="text-base">Summary</h2>
|
||||||
<dl class="mt-2 space-y-1 text-sm">
|
<dl class="mt-2 space-y-1 text-sm">
|
||||||
<div><dt class="inline font-semibold">Nationality:</dt> <dd class="inline">{{ player.nationality.name|default:"-" }}</dd></div>
|
<div><dt class="inline font-semibold">Nationality:</dt> <dd class="inline">{{ player.nationality.name|default:"-" }}</dd></div>
|
||||||
<div><dt class="inline font-semibold">Origin competition:</dt> <dd class="inline">{{ player.origin_competition.name|default:"-" }}</dd></div>
|
|
||||||
<div><dt class="inline font-semibold">Origin team:</dt> <dd class="inline">{{ player.origin_team.name|default:"-" }}</dd></div>
|
|
||||||
<div><dt class="inline font-semibold">Birth date:</dt> <dd class="inline">{{ player.birth_date|date:"Y-m-d"|default:"-" }}</dd></div>
|
<div><dt class="inline font-semibold">Birth date:</dt> <dd class="inline">{{ player.birth_date|date:"Y-m-d"|default:"-" }}</dd></div>
|
||||||
<div><dt class="inline font-semibold">Age:</dt> <dd class="inline">{{ age|default:"-" }}</dd></div>
|
<div><dt class="inline font-semibold">Age:</dt> <dd class="inline">{{ age|default:"-" }}</dd></div>
|
||||||
<div><dt class="inline font-semibold">Height:</dt> <dd class="inline">{{ player.height_cm|default:"-" }} cm</dd></div>
|
<div><dt class="inline font-semibold">Height:</dt> <dd class="inline">{{ player.height_cm|default:"-" }} cm</dd></div>
|
||||||
@ -47,14 +45,11 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="rounded-lg border border-slate-200 p-4">
|
<div class="rounded-lg border border-slate-200 p-4">
|
||||||
<h2 class="text-base">Aliases</h2>
|
<h2 class="text-base">Snapshot Coverage</h2>
|
||||||
<ul class="mt-2 list-inside list-disc text-sm text-slate-700">
|
<dl class="mt-2 space-y-1 text-sm">
|
||||||
{% for alias in player.aliases.all %}
|
<div><dt class="inline font-semibold">Seasons imported:</dt> <dd class="inline">{{ season_rows|length }}</dd></div>
|
||||||
<li>{{ alias.alias }}{% if alias.source %} ({{ alias.source }}){% endif %}</li>
|
<div><dt class="inline font-semibold">Latest season:</dt> <dd class="inline">{% if season_rows %}{{ season_rows.0.season.label|default:"-" }}{% else %}-{% endif %}</dd></div>
|
||||||
{% empty %}
|
</dl>
|
||||||
<li>No aliases recorded.</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
@ -77,33 +72,6 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section class="panel mt-4">
|
|
||||||
<h2>Career History</h2>
|
|
||||||
{% if career_entries %}
|
|
||||||
<div class="table-wrap mt-3">
|
|
||||||
<table class="data-table">
|
|
||||||
<thead>
|
|
||||||
<tr><th>Season</th><th>Team</th><th>Competition</th><th>Role</th><th>From</th><th>To</th></tr>
|
|
||||||
</thead>
|
|
||||||
<tbody class="divide-y divide-slate-100 bg-white">
|
|
||||||
{% for entry in career_entries %}
|
|
||||||
<tr>
|
|
||||||
<td>{{ entry.season.label|default:"-" }}</td>
|
|
||||||
<td>{{ entry.team.name|default:"-" }}</td>
|
|
||||||
<td>{{ entry.competition.name|default:"-" }}</td>
|
|
||||||
<td>{{ entry.role_snapshot.name|default:"-" }}</td>
|
|
||||||
<td>{{ entry.start_date|date:"Y-m-d"|default:"-" }}</td>
|
|
||||||
<td>{{ entry.end_date|date:"Y-m-d"|default:"-" }}</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
{% else %}
|
|
||||||
<div class="empty-state mt-3">No career entries available.</div>
|
|
||||||
{% endif %}
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section class="panel mt-4">
|
<section class="panel mt-4">
|
||||||
<h2>Season-by-Season Stats</h2>
|
<h2>Season-by-Season Stats</h2>
|
||||||
{% if season_rows %}
|
{% if season_rows %}
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
{% block content %}
|
{% block content %}
|
||||||
<section class="panel">
|
<section class="panel">
|
||||||
<h1>Player Search</h1>
|
<h1>Player Search</h1>
|
||||||
<p class="mt-1 text-sm text-slate-600">Filter players by profile, origin, context, and production metrics.</p>
|
<p class="mt-1 text-sm text-slate-600">Filter players by profile, team-season context, and production metrics.</p>
|
||||||
{% if search_has_errors %}
|
{% if search_has_errors %}
|
||||||
<div class="mt-4 rounded-md border border-rose-200 bg-rose-50 p-3 text-sm text-rose-800">
|
<div class="mt-4 rounded-md border border-rose-200 bg-rose-50 p-3 text-sm text-rose-800">
|
||||||
<p class="font-medium">Please correct the highlighted filters.</p>
|
<p class="font-medium">Please correct the highlighted filters.</p>
|
||||||
@ -56,8 +56,6 @@
|
|||||||
<div><label for="id_competition">Competition</label>{{ search_form.competition }}</div>
|
<div><label for="id_competition">Competition</label>{{ search_form.competition }}</div>
|
||||||
<div><label for="id_team">Team</label>{{ search_form.team }}</div>
|
<div><label for="id_team">Team</label>{{ search_form.team }}</div>
|
||||||
<div><label for="id_season">Season</label>{{ search_form.season }}</div>
|
<div><label for="id_season">Season</label>{{ search_form.season }}</div>
|
||||||
<div><label for="id_origin_competition">Origin competition</label>{{ search_form.origin_competition }}</div>
|
|
||||||
<div><label for="id_origin_team">Origin team</label>{{ search_form.origin_team }}</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<details class="rounded-lg border border-slate-200 bg-slate-50 p-3">
|
<details class="rounded-lg border border-slate-200 bg-slate-50 p-3">
|
||||||
@ -97,8 +95,6 @@
|
|||||||
<div><label for="id_three_pct_max">3P% max</label>{{ search_form.three_pct_max }}</div>
|
<div><label for="id_three_pct_max">3P% max</label>{{ search_form.three_pct_max }}</div>
|
||||||
<div><label for="id_ft_pct_min">FT% min</label>{{ search_form.ft_pct_min }}</div>
|
<div><label for="id_ft_pct_min">FT% min</label>{{ search_form.ft_pct_min }}</div>
|
||||||
<div><label for="id_ft_pct_max">FT% max</label>{{ search_form.ft_pct_max }}</div>
|
<div><label for="id_ft_pct_max">FT% max</label>{{ search_form.ft_pct_max }}</div>
|
||||||
<div><label for="id_efficiency_metric_min">Impact min</label>{{ search_form.efficiency_metric_min }}</div>
|
|
||||||
<div><label for="id_efficiency_metric_max">Impact max</label>{{ search_form.efficiency_metric_max }}</div>
|
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
</form>
|
</form>
|
||||||
|
|||||||
@ -36,13 +36,18 @@
|
|||||||
<th>Player</th>
|
<th>Player</th>
|
||||||
<th>Nationality</th>
|
<th>Nationality</th>
|
||||||
<th>Pos / Role</th>
|
<th>Pos / Role</th>
|
||||||
<th>Origin</th>
|
|
||||||
<th>Height / Weight</th>
|
<th>Height / Weight</th>
|
||||||
<th>Best Eligible Games</th>
|
<th>Best Eligible Games</th>
|
||||||
<th>Best Eligible MPG</th>
|
<th>Best Eligible MPG</th>
|
||||||
<th>Best Eligible PPG</th>
|
<th>Best Eligible PPG</th>
|
||||||
<th>Best Eligible RPG</th>
|
<th>Best Eligible RPG</th>
|
||||||
<th>Best Eligible APG</th>
|
<th>Best Eligible APG</th>
|
||||||
|
<th>Best Eligible SPG</th>
|
||||||
|
<th>Best Eligible BPG</th>
|
||||||
|
<th>Best Eligible TOV</th>
|
||||||
|
<th>Best Eligible FG%</th>
|
||||||
|
<th>Best Eligible 3P%</th>
|
||||||
|
<th>Best Eligible FT%</th>
|
||||||
{% if request.user.is_authenticated %}<th>Watchlist</th>{% endif %}
|
{% if request.user.is_authenticated %}<th>Watchlist</th>{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@ -52,16 +57,18 @@
|
|||||||
<td><a class="font-medium" href="{% url 'players:detail' player.pk %}">{{ player.full_name }}</a></td>
|
<td><a class="font-medium" href="{% url 'players:detail' player.pk %}">{{ player.full_name }}</a></td>
|
||||||
<td>{{ player.nationality.name|default:"-" }}</td>
|
<td>{{ player.nationality.name|default:"-" }}</td>
|
||||||
<td>{{ player.nominal_position.code|default:"-" }} / {{ player.inferred_role.name|default:"-" }}</td>
|
<td>{{ player.nominal_position.code|default:"-" }} / {{ player.inferred_role.name|default:"-" }}</td>
|
||||||
<td>
|
|
||||||
{{ player.origin_competition.name|default:"-" }}
|
|
||||||
{% if player.origin_team %}<div class="text-xs text-slate-500">{{ player.origin_team.name }}</div>{% endif %}
|
|
||||||
</td>
|
|
||||||
<td>{{ player.height_cm|default:"-" }} / {{ player.weight_kg|default:"-" }}</td>
|
<td>{{ player.height_cm|default:"-" }} / {{ player.weight_kg|default:"-" }}</td>
|
||||||
<td>{{ player.games_played_value|floatformat:0 }}</td>
|
<td>{% if player.games_played_value is not None %}{{ player.games_played_value|floatformat:0 }}{% else %}-{% endif %}</td>
|
||||||
<td>{{ player.mpg_value|floatformat:1 }}</td>
|
<td>{% if player.mpg_value is not None %}{{ player.mpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
<td>{{ player.ppg_value|floatformat:1 }}</td>
|
<td>{% if player.ppg_value is not None %}{{ player.ppg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
<td>{{ player.rpg_value|floatformat:1 }}</td>
|
<td>{% if player.rpg_value is not None %}{{ player.rpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
<td>{{ player.apg_value|floatformat:1 }}</td>
|
<td>{% if player.apg_value is not None %}{{ player.apg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.spg_value is not None %}{{ player.spg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.bpg_value is not None %}{{ player.bpg_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.tov_value is not None %}{{ player.tov_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.fg_pct_value is not None %}{{ player.fg_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.three_pct_value is not None %}{{ player.three_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
|
<td>{% if player.ft_pct_value is not None %}{{ player.ft_pct_value|floatformat:1 }}{% else %}-{% endif %}</td>
|
||||||
{% if request.user.is_authenticated %}
|
{% if request.user.is_authenticated %}
|
||||||
<td>
|
<td>
|
||||||
{% if player.id in favorite_player_ids %}
|
{% if player.id in favorite_player_ids %}
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
<div id="saved-search-table">
|
||||||
{% if saved_searches %}
|
{% if saved_searches %}
|
||||||
<div class="table-wrap mt-4">
|
<div class="table-wrap mt-4">
|
||||||
<table class="data-table">
|
<table class="data-table">
|
||||||
@ -21,7 +22,14 @@
|
|||||||
<div class="flex flex-wrap gap-2">
|
<div class="flex flex-wrap gap-2">
|
||||||
<a class="btn-secondary" href="{% url 'scouting:saved_search_run' saved_search.pk %}">Run</a>
|
<a class="btn-secondary" href="{% url 'scouting:saved_search_run' saved_search.pk %}">Run</a>
|
||||||
<a class="btn-secondary" href="{% url 'scouting:saved_search_edit' saved_search.pk %}">Edit</a>
|
<a class="btn-secondary" href="{% url 'scouting:saved_search_edit' saved_search.pk %}">Edit</a>
|
||||||
<form method="post" action="{% url 'scouting:saved_search_delete' saved_search.pk %}">
|
<form
|
||||||
|
method="post"
|
||||||
|
action="{% url 'scouting:saved_search_delete' saved_search.pk %}"
|
||||||
|
hx-post="{% url 'scouting:saved_search_delete' saved_search.pk %}"
|
||||||
|
hx-target="#saved-search-table"
|
||||||
|
hx-swap="outerHTML"
|
||||||
|
hx-indicator="#htmx-loading"
|
||||||
|
>
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<button class="btn-secondary" type="submit">Delete</button>
|
<button class="btn-secondary" type="submit">Delete</button>
|
||||||
</form>
|
</form>
|
||||||
@ -35,3 +43,4 @@
|
|||||||
{% else %}
|
{% else %}
|
||||||
<div class="empty-state mt-4">No saved searches yet.</div>
|
<div class="empty-state mt-4">No saved searches yet.</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|||||||
@ -7,7 +7,23 @@
|
|||||||
<h1>Edit Saved Search</h1>
|
<h1>Edit Saved Search</h1>
|
||||||
<form method="post" class="mt-4 space-y-4">
|
<form method="post" class="mt-4 space-y-4">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
{{ form.as_p }}
|
<div>
|
||||||
|
<label for="{{ form.name.id_for_label }}">{{ form.name.label }}</label>
|
||||||
|
{{ form.name }}
|
||||||
|
{% for error in form.name.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
{{ form.is_public }}
|
||||||
|
<label for="{{ form.is_public.id_for_label }}">{{ form.is_public.label }}</label>
|
||||||
|
{% for error in form.is_public.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="{{ form.filters_json.id_for_label }}">{{ form.filters_json.label }}</label>
|
||||||
|
{{ form.filters_json }}
|
||||||
|
<p class="mt-1 text-xs text-slate-500">{{ form.filters_json.help_text }}</p>
|
||||||
|
{% for error in form.filters_json.errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% for error in form.non_field_errors %}<p class="text-sm text-rose-700">{{ error }}</p>{% endfor %}
|
||||||
<div class="flex flex-wrap gap-2">
|
<div class="flex flex-wrap gap-2">
|
||||||
<button type="submit" class="btn">Update</button>
|
<button type="submit" class="btn">Update</button>
|
||||||
<a class="btn-secondary" href="{% url 'scouting:index' %}">Cancel</a>
|
<a class="btn-secondary" href="{% url 'scouting:index' %}">Cancel</a>
|
||||||
|
|||||||
32
tests/fixtures/bcl/bcl_players_stats.json
vendored
Normal file
32
tests/fixtures/bcl/bcl_players_stats.json
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"player": {
|
||||||
|
"id": "bcl-player-42",
|
||||||
|
"name": "John Carter",
|
||||||
|
"first_name": "John",
|
||||||
|
"last_name": "Carter",
|
||||||
|
"birth_date": "1999-07-14",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 198,
|
||||||
|
"weight_kg": 95,
|
||||||
|
"position": "SF"
|
||||||
|
},
|
||||||
|
"team": {
|
||||||
|
"id": "bcl-team-murcia",
|
||||||
|
"name": "UCAM Murcia"
|
||||||
|
},
|
||||||
|
"gp": 12,
|
||||||
|
"mpg": 29.1,
|
||||||
|
"ppg": 16.4,
|
||||||
|
"rpg": 5.8,
|
||||||
|
"apg": 2.7,
|
||||||
|
"spg": 1.5,
|
||||||
|
"bpg": 0.6,
|
||||||
|
"tov": 2.3,
|
||||||
|
"fg_pct": 48.1,
|
||||||
|
"three_pct": 37.2,
|
||||||
|
"ft_pct": 81.4
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
25
tests/fixtures/bcl/bcl_players_stats_partial_public.json
vendored
Normal file
25
tests/fixtures/bcl/bcl_players_stats_partial_public.json
vendored
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"player": {
|
||||||
|
"id": "bcl-player-99",
|
||||||
|
"name": "Alex Novak"
|
||||||
|
},
|
||||||
|
"team": {
|
||||||
|
"id": "bcl-team-tenerife",
|
||||||
|
"name": "Lenovo Tenerife"
|
||||||
|
},
|
||||||
|
"gp": 10,
|
||||||
|
"mpg": 27.2,
|
||||||
|
"ppg": 14.8,
|
||||||
|
"rpg": 4.1,
|
||||||
|
"apg": 3.3,
|
||||||
|
"spg": 1.2,
|
||||||
|
"bpg": 0.4,
|
||||||
|
"tov": 2.0,
|
||||||
|
"fg_pct": 47.3,
|
||||||
|
"three_pct": 38.0,
|
||||||
|
"ft_pct": 79.1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
32
tests/fixtures/lba/lba_players_stats.json
vendored
Normal file
32
tests/fixtures/lba/lba_players_stats.json
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"player": {
|
||||||
|
"id": "p-001",
|
||||||
|
"name": "Marco Rossi",
|
||||||
|
"first_name": "Marco",
|
||||||
|
"last_name": "Rossi",
|
||||||
|
"birth_date": "2000-01-05",
|
||||||
|
"nationality": "IT",
|
||||||
|
"height_cm": 190,
|
||||||
|
"weight_kg": 84,
|
||||||
|
"position": "PG"
|
||||||
|
},
|
||||||
|
"team": {
|
||||||
|
"id": "team-virtus-bologna",
|
||||||
|
"name": "Virtus Bologna"
|
||||||
|
},
|
||||||
|
"gp": 20,
|
||||||
|
"mpg": 28.3,
|
||||||
|
"ppg": 15.8,
|
||||||
|
"rpg": 3.4,
|
||||||
|
"apg": 5.9,
|
||||||
|
"spg": 1.4,
|
||||||
|
"bpg": 0.2,
|
||||||
|
"tov": 2.1,
|
||||||
|
"fg_pct": 47.6,
|
||||||
|
"three_pct": 36.5,
|
||||||
|
"ft_pct": 84.2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
25
tests/fixtures/lba/lba_players_stats_partial_public.json
vendored
Normal file
25
tests/fixtures/lba/lba_players_stats_partial_public.json
vendored
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"player": {
|
||||||
|
"id": "p-002",
|
||||||
|
"name": "Andrea Bianchi"
|
||||||
|
},
|
||||||
|
"team": {
|
||||||
|
"id": "team-olimpia-milano",
|
||||||
|
"name": "Olimpia Milano"
|
||||||
|
},
|
||||||
|
"gp": 18,
|
||||||
|
"mpg": 24.7,
|
||||||
|
"ppg": 12.3,
|
||||||
|
"rpg": 2.9,
|
||||||
|
"apg": 4.2,
|
||||||
|
"spg": 1.1,
|
||||||
|
"bpg": 0.1,
|
||||||
|
"tov": 1.8,
|
||||||
|
"fg_pct": 45.0,
|
||||||
|
"three_pct": 35.4,
|
||||||
|
"ft_pct": 82.7
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@ -30,6 +30,12 @@ def test_players_api_list_and_detail(client):
|
|||||||
list_response = client.get(reverse("api:players"), data={"q": "rossi"})
|
list_response = client.get(reverse("api:players"), data={"q": "rossi"})
|
||||||
assert list_response.status_code == 200
|
assert list_response.status_code == 200
|
||||||
assert list_response.json()["count"] == 1
|
assert list_response.json()["count"] == 1
|
||||||
|
list_payload = list_response.json()
|
||||||
|
assert "sort" in list_payload
|
||||||
|
assert "metric_semantics" in list_payload
|
||||||
|
assert "metric_sort_keys" in list_payload
|
||||||
|
assert "ppg_value" in list_payload["results"][0]
|
||||||
|
assert "mpg_value" in list_payload["results"][0]
|
||||||
|
|
||||||
detail_response = client.get(reverse("api:player_detail", kwargs={"pk": player.pk}))
|
detail_response = client.get(reverse("api:player_detail", kwargs={"pk": player.pk}))
|
||||||
assert detail_response.status_code == 200
|
assert detail_response.status_code == 200
|
||||||
@ -83,8 +89,6 @@ def test_players_api_search_consistent_with_ui_filters(client):
|
|||||||
nationality=nationality,
|
nationality=nationality,
|
||||||
nominal_position=position,
|
nominal_position=position,
|
||||||
inferred_role=role,
|
inferred_role=role,
|
||||||
origin_competition=competition,
|
|
||||||
origin_team=team,
|
|
||||||
)
|
)
|
||||||
ps = PlayerSeason.objects.create(
|
ps = PlayerSeason.objects.create(
|
||||||
player=matching,
|
player=matching,
|
||||||
@ -113,7 +117,7 @@ def test_players_api_search_consistent_with_ui_filters(client):
|
|||||||
)
|
)
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"origin_competition": competition.id,
|
"competition": competition.id,
|
||||||
"nominal_position": position.id,
|
"nominal_position": position.id,
|
||||||
"points_per_game_min": "10",
|
"points_per_game_min": "10",
|
||||||
"sort": "ppg_desc",
|
"sort": "ppg_desc",
|
||||||
@ -175,8 +179,33 @@ def test_players_api_metric_sort_uses_best_eligible_values(client):
|
|||||||
|
|
||||||
response = client.get(reverse("api:players"), data={"sort": "ppg_desc"})
|
response = client.get(reverse("api:players"), data={"sort": "ppg_desc"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
names = [row["full_name"] for row in response.json()["results"]]
|
payload = response.json()
|
||||||
|
names = [row["full_name"] for row in payload["results"]]
|
||||||
assert names.index("Dan High") < names.index("Ion Low")
|
assert names.index("Dan High") < names.index("Ion Low")
|
||||||
|
assert payload["sort"] == "ppg_desc"
|
||||||
|
assert "best eligible values per player" in payload["metric_semantics"]
|
||||||
|
dan = next(row for row in payload["results"] if row["full_name"] == "Dan High")
|
||||||
|
ion = next(row for row in payload["results"] if row["full_name"] == "Ion Low")
|
||||||
|
assert float(dan["ppg_value"]) > float(ion["ppg_value"])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_players_api_metric_fields_are_exposed_and_nullable(client):
|
||||||
|
nationality = Nationality.objects.create(name="Sweden", iso2_code="SE", iso3_code="SWE")
|
||||||
|
Player.objects.create(
|
||||||
|
first_name="No",
|
||||||
|
last_name="Stats",
|
||||||
|
full_name="No Stats",
|
||||||
|
birth_date=date(2002, 1, 1),
|
||||||
|
nationality=nationality,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.get(reverse("api:players"), data={"sort": "name_asc"})
|
||||||
|
assert response.status_code == 200
|
||||||
|
payload = response.json()
|
||||||
|
row = next(item for item in payload["results"] if item["full_name"] == "No Stats")
|
||||||
|
assert row["ppg_value"] is None
|
||||||
|
assert row["mpg_value"] is None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
|
|||||||
143
tests/test_bcl_extractor.py
Normal file
143
tests/test_bcl_extractor.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from apps.ingestion.extractors.bcl import BCLSnapshotExtractor
|
||||||
|
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||||
|
from apps.ingestion.extractors.registry import create_extractor
|
||||||
|
|
||||||
|
|
||||||
|
def _load_fixture(path: str) -> dict:
|
||||||
|
fixture_path = Path(__file__).parent / "fixtures" / path
|
||||||
|
return json.loads(fixture_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_bcl_extractor_normalizes_fixture_payload(tmp_path, settings):
|
||||||
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||||
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
||||||
|
output_path = tmp_path / "bcl.json"
|
||||||
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.extractor_name == "bcl"
|
||||||
|
assert result.source_name == "bcl"
|
||||||
|
assert result.records_count == 1
|
||||||
|
|
||||||
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "bcl"
|
||||||
|
assert payload["snapshot_date"] == "2026-03-13"
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["competition_external_id"] == "bcl"
|
||||||
|
assert row["competition_name"] == "Basketball Champions League"
|
||||||
|
assert row["team_external_id"] == "bcl-team-murcia"
|
||||||
|
assert row["team_name"] == "UCAM Murcia"
|
||||||
|
assert row["player_external_id"] == "bcl-player-42"
|
||||||
|
assert row["full_name"] == "John Carter"
|
||||||
|
assert row["minutes_per_game"] == 29.1
|
||||||
|
assert row["three_pt_pct"] == 37.2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_bcl_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
|
||||||
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||||
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
||||||
|
output_path = tmp_path / "bcl-partial.json"
|
||||||
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.records_count == 1
|
||||||
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["full_name"] == "Alex Novak"
|
||||||
|
assert row["first_name"] is None
|
||||||
|
assert row["last_name"] is None
|
||||||
|
assert row["birth_date"] is None
|
||||||
|
assert row["nationality"] is None
|
||||||
|
assert row["height_cm"] is None
|
||||||
|
assert row["weight_kg"] is None
|
||||||
|
assert row["position"] is None
|
||||||
|
assert row["games_played"] == 10
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_bcl_extractor_still_fails_when_required_stats_are_missing(settings):
|
||||||
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||||
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("bcl/bcl_players_stats_partial_public.json")
|
||||||
|
fixture_payload["data"][0].pop("ppg")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = BCLSnapshotExtractor(http_client=FakeClient())
|
||||||
|
with pytest.raises(ExtractorNormalizationError):
|
||||||
|
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_bcl_extractor_registry_selection(settings):
|
||||||
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||||
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||||
|
extractor = create_extractor("bcl")
|
||||||
|
assert isinstance(extractor, BCLSnapshotExtractor)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_run_bcl_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch):
|
||||||
|
settings.EXTRACTOR_BCL_STATS_URL = "https://www.championsleague.basketball/public/stats.json"
|
||||||
|
settings.EXTRACTOR_BCL_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_EXTERNAL_ID = "bcl"
|
||||||
|
settings.EXTRACTOR_BCL_COMPETITION_NAME = "Basketball Champions League"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("bcl/bcl_players_stats.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.extractors.bcl.ResponsibleHttpClient",
|
||||||
|
lambda **_kwargs: FakeClient(),
|
||||||
|
)
|
||||||
|
|
||||||
|
call_command(
|
||||||
|
"run_bcl_extractor",
|
||||||
|
"--output-path",
|
||||||
|
str(tmp_path),
|
||||||
|
"--snapshot-date",
|
||||||
|
"2026-03-13",
|
||||||
|
)
|
||||||
|
|
||||||
|
files = list(tmp_path.glob("bcl-2026-03-13.json"))
|
||||||
|
assert len(files) == 1
|
||||||
|
payload = json.loads(files[0].read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "bcl"
|
||||||
|
assert len(payload["records"]) == 1
|
||||||
@ -1,38 +0,0 @@
|
|||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
|
|
||||||
env = os.environ.copy()
|
|
||||||
env.update(env_overrides)
|
|
||||||
return subprocess.run(
|
|
||||||
[sys.executable, "-c", code],
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
env=env,
|
|
||||||
check=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_invalid_cron_does_not_crash_config_import_path():
|
|
||||||
result = _run_python_import(
|
|
||||||
(
|
|
||||||
"import config; "
|
|
||||||
"from config.celery import app; "
|
|
||||||
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
|
|
||||||
),
|
|
||||||
{
|
|
||||||
"DJANGO_SETTINGS_MODULE": "config.settings.development",
|
|
||||||
"DJANGO_ENV": "development",
|
|
||||||
"DJANGO_DEBUG": "1",
|
|
||||||
"INGESTION_SCHEDULE_ENABLED": "1",
|
|
||||||
"INGESTION_SCHEDULE_CRON": "bad cron value",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result.returncode == 0
|
|
||||||
assert "beat_schedule_size=0" in result.stdout
|
|
||||||
95
tests/test_daily_orchestration.py
Normal file
95
tests/test_daily_orchestration.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from apps.ingestion.services.daily_orchestration import parse_enabled_extractors, run_daily_orchestration
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _FakeExtractorResult:
|
||||||
|
records_count: int
|
||||||
|
output_path: str
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeExtractor:
|
||||||
|
def __init__(self, name: str):
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def run(self, *, snapshot_date=None):
|
||||||
|
if snapshot_date:
|
||||||
|
return _FakeExtractorResult(records_count=3, output_path=f"/tmp/{self.name}-{snapshot_date}.json")
|
||||||
|
return _FakeExtractorResult(records_count=3, output_path=f"/tmp/{self.name}.json")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _FakeImportRun:
|
||||||
|
id: int = 11
|
||||||
|
status: str = "success"
|
||||||
|
files_processed: int = 2
|
||||||
|
files_total: int = 2
|
||||||
|
rows_upserted: int = 20
|
||||||
|
rows_failed: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeImporter:
|
||||||
|
def __init__(self, **_kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
return _FakeImportRun()
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_enabled_extractors():
|
||||||
|
assert parse_enabled_extractors("lba,bcl") == ["lba", "bcl"]
|
||||||
|
assert parse_enabled_extractors(" lba , , bcl ") == ["lba", "bcl"]
|
||||||
|
assert parse_enabled_extractors("") == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_daily_orchestration_runs_extractors_then_import(settings, monkeypatch):
|
||||||
|
settings.DAILY_ORCHESTRATION_EXTRACTORS = "lba,bcl"
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.services.daily_orchestration.create_extractor",
|
||||||
|
lambda name: _FakeExtractor(name),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.services.daily_orchestration.SnapshotImporter",
|
||||||
|
_FakeImporter,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = run_daily_orchestration(snapshot_date=date(2026, 3, 13))
|
||||||
|
assert [row.extractor_name for row in result.extractors_run] == ["lba", "bcl"]
|
||||||
|
assert result.import_run_id == 11
|
||||||
|
assert result.import_status == "success"
|
||||||
|
assert result.rows_upserted == 20
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_daily_orchestration_raises_when_no_extractors_configured(settings):
|
||||||
|
settings.DAILY_ORCHESTRATION_EXTRACTORS = ""
|
||||||
|
with pytest.raises(ValueError, match="cannot be empty"):
|
||||||
|
run_daily_orchestration()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_run_daily_orchestration_command(settings, monkeypatch, capsys):
|
||||||
|
settings.DAILY_ORCHESTRATION_EXTRACTORS = "lba,bcl"
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.services.daily_orchestration.create_extractor",
|
||||||
|
lambda name: _FakeExtractor(name),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.services.daily_orchestration.SnapshotImporter",
|
||||||
|
_FakeImporter,
|
||||||
|
)
|
||||||
|
|
||||||
|
call_command("run_daily_orchestration", "--snapshot-date", "2026-03-13")
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "Daily orchestration completed" in captured.out
|
||||||
|
assert "extractors=[lba:3, bcl:3]" in captured.out
|
||||||
312
tests/test_extractors_framework.py
Normal file
312
tests/test_extractors_framework.py
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from apps.ingestion.extractors.base import BaseSnapshotExtractor
|
||||||
|
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||||
|
from apps.ingestion.extractors.http import ResponsibleHttpClient
|
||||||
|
from apps.ingestion.extractors.public_json import PublicJsonSnapshotExtractor
|
||||||
|
from apps.ingestion.snapshots.schema import REQUIRED_RECORD_FIELDS
|
||||||
|
|
||||||
|
|
||||||
|
class DummyExtractor(BaseSnapshotExtractor):
|
||||||
|
extractor_name = "dummy"
|
||||||
|
source_name = "dummy_source"
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
return {"rows": [{"name": "Jane Doe"}]}
|
||||||
|
|
||||||
|
def parse(self, payload):
|
||||||
|
return payload["rows"]
|
||||||
|
|
||||||
|
def normalize_record(self, source_record):
|
||||||
|
return {
|
||||||
|
"competition_external_id": "comp-1",
|
||||||
|
"competition_name": "League One",
|
||||||
|
"season": "2025-2026",
|
||||||
|
"team_external_id": "team-1",
|
||||||
|
"team_name": "Team One",
|
||||||
|
"player_external_id": "player-1",
|
||||||
|
"full_name": source_record["name"],
|
||||||
|
"first_name": "Jane",
|
||||||
|
"last_name": "Doe",
|
||||||
|
"birth_date": "2000-01-01",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 180,
|
||||||
|
"weight_kg": 75,
|
||||||
|
"position": "SG",
|
||||||
|
"games_played": 10,
|
||||||
|
"minutes_per_game": 30.0,
|
||||||
|
"points_per_game": 15.0,
|
||||||
|
"rebounds_per_game": 4.0,
|
||||||
|
"assists_per_game": 3.0,
|
||||||
|
"steals_per_game": 1.2,
|
||||||
|
"blocks_per_game": 0.4,
|
||||||
|
"turnovers_per_game": 2.0,
|
||||||
|
"fg_pct": 45.0,
|
||||||
|
"three_pt_pct": 35.0,
|
||||||
|
"ft_pct": 82.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResponse:
|
||||||
|
def __init__(self, payload, status_code=200):
|
||||||
|
self._payload = payload
|
||||||
|
self.status_code = status_code
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
if self.status_code >= 400:
|
||||||
|
raise RuntimeError(f"status={self.status_code}")
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
def _minimal_public_json_record() -> dict:
|
||||||
|
return {
|
||||||
|
"competition_external_id": "comp-1",
|
||||||
|
"competition_name": "League One",
|
||||||
|
"season": "2025-2026",
|
||||||
|
"team_external_id": "team-1",
|
||||||
|
"team_name": "Team One",
|
||||||
|
"player_external_id": "player-1",
|
||||||
|
"full_name": "Jane Doe",
|
||||||
|
"games_played": 12,
|
||||||
|
"minutes_per_game": 27.2,
|
||||||
|
"points_per_game": 13.0,
|
||||||
|
"rebounds_per_game": 4.4,
|
||||||
|
"assists_per_game": 3.1,
|
||||||
|
"steals_per_game": 1.0,
|
||||||
|
"blocks_per_game": 0.3,
|
||||||
|
"turnovers_per_game": 1.8,
|
||||||
|
"fg_pct": 46.2,
|
||||||
|
"three_pt_pct": 35.5,
|
||||||
|
"ft_pct": 82.1,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_base_extractor_run_writes_snapshot_file(tmp_path, settings):
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(tmp_path / "incoming")
|
||||||
|
extractor = DummyExtractor()
|
||||||
|
result = extractor.run(snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.records_count == 1
|
||||||
|
assert result.source_name == "dummy_source"
|
||||||
|
assert result.output_path is not None
|
||||||
|
assert result.output_path.exists()
|
||||||
|
|
||||||
|
payload = json.loads(result.output_path.read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "dummy_source"
|
||||||
|
assert payload["snapshot_date"] == "2026-03-13"
|
||||||
|
assert payload["records"][0]["full_name"] == "Jane Doe"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_public_json_extractor_normalizes_common_field_aliases(tmp_path):
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return {
|
||||||
|
"records": [
|
||||||
|
{
|
||||||
|
"competition_id": 99,
|
||||||
|
"competition_name": "National League",
|
||||||
|
"season": 2025,
|
||||||
|
"team_id": 10,
|
||||||
|
"team_name": "Blue Team",
|
||||||
|
"player_id": 123,
|
||||||
|
"player_name": "John Smith",
|
||||||
|
"first_name": "John",
|
||||||
|
"last_name": "Smith",
|
||||||
|
"birth_date": "2001-05-12",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 198,
|
||||||
|
"weight_kg": 96,
|
||||||
|
"position": "SF",
|
||||||
|
"gp": 20,
|
||||||
|
"mpg": 28.5,
|
||||||
|
"ppg": 14.2,
|
||||||
|
"rpg": 5.1,
|
||||||
|
"apg": 3.2,
|
||||||
|
"spg": 1.1,
|
||||||
|
"bpg": 0.5,
|
||||||
|
"tov": 1.9,
|
||||||
|
"fg_pct": 47.3,
|
||||||
|
"three_pct": 36.1,
|
||||||
|
"ft_pct": 80.0,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
extractor = PublicJsonSnapshotExtractor(
|
||||||
|
url="https://example.com/public-feed.json",
|
||||||
|
source_name="test_public_feed",
|
||||||
|
http_client=FakeClient(),
|
||||||
|
)
|
||||||
|
output_file = tmp_path / "public.json"
|
||||||
|
result = extractor.run(output_path=output_file, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.records_count == 1
|
||||||
|
payload = json.loads(output_file.read_text(encoding="utf-8"))
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["competition_external_id"] == "99"
|
||||||
|
assert row["team_external_id"] == "10"
|
||||||
|
assert row["player_external_id"] == "123"
|
||||||
|
assert row["full_name"] == "John Smith"
|
||||||
|
assert row["three_pt_pct"] == 36.1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_public_json_extractor_accepts_missing_optional_bio_and_physical_fields(tmp_path):
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return {"records": [_minimal_public_json_record()]}
|
||||||
|
|
||||||
|
extractor = PublicJsonSnapshotExtractor(
|
||||||
|
url="https://example.com/public-feed.json",
|
||||||
|
source_name="test_public_feed",
|
||||||
|
http_client=FakeClient(),
|
||||||
|
)
|
||||||
|
output_file = tmp_path / "public-optional.json"
|
||||||
|
result = extractor.run(output_path=output_file, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.records_count == 1
|
||||||
|
payload = json.loads(output_file.read_text(encoding="utf-8"))
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["full_name"] == "Jane Doe"
|
||||||
|
assert row["first_name"] is None
|
||||||
|
assert row["last_name"] is None
|
||||||
|
assert row["birth_date"] is None
|
||||||
|
assert row["nationality"] is None
|
||||||
|
assert row["height_cm"] is None
|
||||||
|
assert row["weight_kg"] is None
|
||||||
|
assert row["position"] is None
|
||||||
|
assert row.get("role") is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_public_json_extractor_fails_when_required_stat_missing():
|
||||||
|
broken = _minimal_public_json_record()
|
||||||
|
broken.pop("points_per_game")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return {"records": [broken]}
|
||||||
|
|
||||||
|
extractor = PublicJsonSnapshotExtractor(
|
||||||
|
url="https://example.com/public-feed.json",
|
||||||
|
source_name="test_public_feed",
|
||||||
|
http_client=FakeClient(),
|
||||||
|
)
|
||||||
|
with pytest.raises(ExtractorNormalizationError):
|
||||||
|
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
@pytest.mark.parametrize("required_field", sorted(REQUIRED_RECORD_FIELDS))
|
||||||
|
def test_public_json_required_fields_follow_snapshot_schema(required_field):
|
||||||
|
broken = _minimal_public_json_record()
|
||||||
|
broken.pop(required_field)
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return {"records": [broken]}
|
||||||
|
|
||||||
|
extractor = PublicJsonSnapshotExtractor(
|
||||||
|
url="https://example.com/public-feed.json",
|
||||||
|
source_name="test_public_feed",
|
||||||
|
http_client=FakeClient(),
|
||||||
|
)
|
||||||
|
with pytest.raises(ExtractorNormalizationError, match="missing required fields"):
|
||||||
|
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_run_extractor_management_command_writes_snapshot(tmp_path, settings):
|
||||||
|
settings.EXTRACTOR_PUBLIC_JSON_URL = "https://example.com/feed.json"
|
||||||
|
settings.EXTRACTOR_PUBLIC_SOURCE_NAME = "cmd_test_source"
|
||||||
|
output_dir = tmp_path / "snapshots"
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return {
|
||||||
|
"records": [
|
||||||
|
{
|
||||||
|
"competition_external_id": "comp-a",
|
||||||
|
"competition_name": "Alpha League",
|
||||||
|
"season": "2025-2026",
|
||||||
|
"team_external_id": "team-a",
|
||||||
|
"team_name": "Alpha Team",
|
||||||
|
"player_external_id": "player-a",
|
||||||
|
"full_name": "Alpha Player",
|
||||||
|
"first_name": "Alpha",
|
||||||
|
"last_name": "Player",
|
||||||
|
"birth_date": "2000-04-01",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 190,
|
||||||
|
"weight_kg": 88,
|
||||||
|
"position": "PG",
|
||||||
|
"games_played": 12,
|
||||||
|
"minutes_per_game": 31.0,
|
||||||
|
"points_per_game": 17.0,
|
||||||
|
"rebounds_per_game": 4.0,
|
||||||
|
"assists_per_game": 6.0,
|
||||||
|
"steals_per_game": 1.3,
|
||||||
|
"blocks_per_game": 0.1,
|
||||||
|
"turnovers_per_game": 2.4,
|
||||||
|
"fg_pct": 44.0,
|
||||||
|
"three_pt_pct": 37.0,
|
||||||
|
"ft_pct": 79.0,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch = pytest.MonkeyPatch()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.extractors.public_json.ResponsibleHttpClient",
|
||||||
|
lambda **_kwargs: FakeClient(),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
call_command(
|
||||||
|
"run_extractor",
|
||||||
|
"public_json_snapshot",
|
||||||
|
"--output-path",
|
||||||
|
str(output_dir),
|
||||||
|
"--snapshot-date",
|
||||||
|
"2026-03-13",
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
monkeypatch.undo()
|
||||||
|
|
||||||
|
files = list(output_dir.glob("public_json_snapshot-2026-03-13.json"))
|
||||||
|
assert len(files) == 1
|
||||||
|
payload = json.loads(files[0].read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "cmd_test_source"
|
||||||
|
assert payload["records"][0]["full_name"] == "Alpha Player"
|
||||||
|
|
||||||
|
|
||||||
|
def test_http_client_retries_on_retryable_status(monkeypatch):
|
||||||
|
class FakeSession:
|
||||||
|
def __init__(self):
|
||||||
|
self.calls = 0
|
||||||
|
|
||||||
|
def get(self, *_args, **_kwargs):
|
||||||
|
self.calls += 1
|
||||||
|
if self.calls == 1:
|
||||||
|
return _FakeResponse({"error": "busy"}, status_code=429)
|
||||||
|
return _FakeResponse({"records": []}, status_code=200)
|
||||||
|
|
||||||
|
client = ResponsibleHttpClient(
|
||||||
|
user_agent="test-agent",
|
||||||
|
timeout_seconds=5,
|
||||||
|
retries=1,
|
||||||
|
retry_sleep_seconds=0,
|
||||||
|
request_delay_seconds=0,
|
||||||
|
session=FakeSession(),
|
||||||
|
)
|
||||||
|
payload = client.get_json("https://example.com/feed.json")
|
||||||
|
assert payload == {"records": []}
|
||||||
363
tests/test_import_snapshots_command.py
Normal file
363
tests/test_import_snapshots_command.py
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from apps.competitions.models import Competition, Season
|
||||||
|
from apps.ingestion.models import ImportFile, ImportRun
|
||||||
|
from apps.players.models import Player
|
||||||
|
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
||||||
|
from apps.teams.models import Team
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_payload() -> dict:
|
||||||
|
return {
|
||||||
|
"source_name": "official_site_feed",
|
||||||
|
"snapshot_date": "2026-03-13",
|
||||||
|
"records": [
|
||||||
|
{
|
||||||
|
"competition_external_id": "comp-nba",
|
||||||
|
"competition_name": "NBA",
|
||||||
|
"season": "2025-2026",
|
||||||
|
"team_external_id": "team-lal",
|
||||||
|
"team_name": "Los Angeles Lakers",
|
||||||
|
"player_external_id": "player-23",
|
||||||
|
"full_name": "LeBron James",
|
||||||
|
"first_name": "LeBron",
|
||||||
|
"last_name": "James",
|
||||||
|
"birth_date": "1984-12-30",
|
||||||
|
"nationality": "US",
|
||||||
|
"height_cm": 206,
|
||||||
|
"weight_kg": 113,
|
||||||
|
"position": "SF",
|
||||||
|
"role": "Primary Creator",
|
||||||
|
"games_played": 60,
|
||||||
|
"minutes_per_game": 34.5,
|
||||||
|
"points_per_game": 25.4,
|
||||||
|
"rebounds_per_game": 7.2,
|
||||||
|
"assists_per_game": 8.1,
|
||||||
|
"steals_per_game": 1.3,
|
||||||
|
"blocks_per_game": 0.7,
|
||||||
|
"turnovers_per_game": 3.2,
|
||||||
|
"fg_pct": 51.1,
|
||||||
|
"three_pt_pct": 38.4,
|
||||||
|
"ft_pct": 79.8,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_payload_for_source(source_name: str, *, competition_name: str = "NBA", team_name: str = "Los Angeles Lakers") -> dict:
|
||||||
|
payload = _valid_payload()
|
||||||
|
payload["source_name"] = source_name
|
||||||
|
payload["records"][0]["competition_name"] = competition_name
|
||||||
|
payload["records"][0]["team_name"] = team_name
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def _write_json(path: Path, payload: dict) -> None:
|
||||||
|
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_valid_snapshot_import(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
file_path = incoming / "nba-2026-03-13.json"
|
||||||
|
_write_json(file_path, payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
run = ImportRun.objects.get()
|
||||||
|
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||||
|
assert run.files_processed == 1
|
||||||
|
assert run.rows_upserted == 1
|
||||||
|
|
||||||
|
import_file = ImportFile.objects.get(import_run=run)
|
||||||
|
assert import_file.status == ImportFile.FileStatus.SUCCESS
|
||||||
|
assert import_file.source_name == "official_site_feed"
|
||||||
|
assert import_file.snapshot_date == date(2026, 3, 13)
|
||||||
|
|
||||||
|
assert (archive / "nba-2026-03-13.json").exists()
|
||||||
|
assert not (incoming / "nba-2026-03-13.json").exists()
|
||||||
|
|
||||||
|
assert Competition.objects.filter(source_name="official_site_feed", source_uid="comp-nba").exists()
|
||||||
|
assert Team.objects.filter(source_name="official_site_feed", source_uid="team-lal").exists()
|
||||||
|
assert Player.objects.filter(source_name="official_site_feed", source_uid="player-23").exists()
|
||||||
|
assert Season.objects.filter(source_uid="season:2025-2026").exists()
|
||||||
|
assert PlayerSeason.objects.count() == 1
|
||||||
|
assert PlayerSeasonStats.objects.count() == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_snapshot_import_succeeds_with_optional_bio_and_physical_fields_missing(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
|
||||||
|
payload["records"][0].pop(optional_field, None)
|
||||||
|
|
||||||
|
file_path = incoming / "optional-missing.json"
|
||||||
|
_write_json(file_path, payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
run = ImportRun.objects.get()
|
||||||
|
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||||
|
player = Player.objects.get(source_uid="player-23")
|
||||||
|
assert player.first_name == "LeBron"
|
||||||
|
assert player.last_name == "James"
|
||||||
|
assert player.birth_date is None
|
||||||
|
assert player.nationality is None
|
||||||
|
assert player.nominal_position is None
|
||||||
|
assert player.height_cm is None
|
||||||
|
assert player.weight_kg is None
|
||||||
|
assert PlayerSeasonStats.objects.count() == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_snapshot_import_preserves_single_name_part_without_forced_split(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
row = payload["records"][0]
|
||||||
|
row["first_name"] = "LeBron"
|
||||||
|
row.pop("last_name")
|
||||||
|
|
||||||
|
file_path = incoming / "single-name-part.json"
|
||||||
|
_write_json(file_path, payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
run = ImportRun.objects.get()
|
||||||
|
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||||
|
player = Player.objects.get(source_uid="player-23")
|
||||||
|
assert player.first_name == "LeBron"
|
||||||
|
assert player.last_name == ""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("source_name", "competition_id", "competition_name"),
|
||||||
|
[
|
||||||
|
("lba", "lba-serie-a", "Lega Basket Serie A"),
|
||||||
|
("bcl", "bcl", "Basketball Champions League"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_partial_public_source_snapshot_imports_for_lba_and_bcl(
|
||||||
|
tmp_path,
|
||||||
|
settings,
|
||||||
|
source_name,
|
||||||
|
competition_id,
|
||||||
|
competition_name,
|
||||||
|
):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
payload["source_name"] = source_name
|
||||||
|
row = payload["records"][0]
|
||||||
|
row["competition_external_id"] = competition_id
|
||||||
|
row["competition_name"] = competition_name
|
||||||
|
for optional_field in ("first_name", "last_name", "birth_date", "nationality", "height_cm", "weight_kg", "position", "role"):
|
||||||
|
row.pop(optional_field, None)
|
||||||
|
|
||||||
|
_write_json(incoming / f"{source_name}.json", payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
run = ImportRun.objects.get()
|
||||||
|
assert run.status == ImportRun.RunStatus.SUCCESS
|
||||||
|
assert Competition.objects.filter(source_uid=competition_id, name=competition_name).exists()
|
||||||
|
assert Player.objects.filter(source_uid="player-23").exists()
|
||||||
|
assert PlayerSeasonStats.objects.count() == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_invalid_snapshot_rejected_and_moved_to_failed(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
del payload["records"][0]["points_per_game"]
|
||||||
|
file_path = incoming / "broken.json"
|
||||||
|
_write_json(file_path, payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
run = ImportRun.objects.get()
|
||||||
|
assert run.status == ImportRun.RunStatus.FAILED
|
||||||
|
|
||||||
|
import_file = ImportFile.objects.get(import_run=run)
|
||||||
|
assert import_file.status == ImportFile.FileStatus.FAILED
|
||||||
|
assert "missing required fields" in import_file.error_message
|
||||||
|
|
||||||
|
assert (failed / "broken.json").exists()
|
||||||
|
assert not (archive / "broken.json").exists()
|
||||||
|
assert not Competition.objects.exists()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_idempotent_reimport_uses_checksum_and_skips_duplicate(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
_write_json(incoming / "first.json", payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
assert Competition.objects.count() == 1
|
||||||
|
assert Player.objects.count() == 1
|
||||||
|
|
||||||
|
# Re-drop same content with different filename.
|
||||||
|
_write_json(incoming / "first-duplicate.json", payload)
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
assert Competition.objects.count() == 1
|
||||||
|
assert Player.objects.count() == 1
|
||||||
|
assert PlayerSeason.objects.count() == 1
|
||||||
|
|
||||||
|
duplicate_file = ImportFile.objects.filter(relative_path="first-duplicate.json").order_by("-id").first()
|
||||||
|
assert duplicate_file is not None
|
||||||
|
assert duplicate_file.status == ImportFile.FileStatus.SKIPPED
|
||||||
|
assert duplicate_file.checksum
|
||||||
|
assert "duplicate checksum" in duplicate_file.error_message.lower()
|
||||||
|
|
||||||
|
assert (archive / "first-duplicate.json").exists()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_same_run_second_file_same_checksum_is_skipped(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload()
|
||||||
|
_write_json(incoming / "a.json", payload)
|
||||||
|
_write_json(incoming / "b.json", payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
files = {row.relative_path: row for row in ImportFile.objects.order_by("relative_path")}
|
||||||
|
assert files["a.json"].status == ImportFile.FileStatus.SUCCESS
|
||||||
|
assert files["b.json"].status == ImportFile.FileStatus.SKIPPED
|
||||||
|
assert files["a.json"].checksum == files["b.json"].checksum
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_same_raw_external_ids_from_different_sources_do_not_collide(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
lba_payload = _valid_payload_for_source("lba", competition_name="Lega Basket Serie A", team_name="Virtus Bologna")
|
||||||
|
bcl_payload = _valid_payload_for_source("bcl", competition_name="Basketball Champions League", team_name="AEK Athens")
|
||||||
|
|
||||||
|
_write_json(incoming / "lba.json", lba_payload)
|
||||||
|
_write_json(incoming / "bcl.json", bcl_payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
assert Competition.objects.filter(source_uid="comp-nba").count() == 2
|
||||||
|
assert Team.objects.filter(source_uid="team-lal").count() == 2
|
||||||
|
assert Player.objects.filter(source_uid="player-23").count() == 2
|
||||||
|
assert Competition.objects.filter(source_name="lba", source_uid="comp-nba", name="Lega Basket Serie A").exists()
|
||||||
|
assert Competition.objects.filter(source_name="bcl", source_uid="comp-nba", name="Basketball Champions League").exists()
|
||||||
|
assert Team.objects.filter(source_name="lba", source_uid="team-lal", name="Virtus Bologna").exists()
|
||||||
|
assert Team.objects.filter(source_name="bcl", source_uid="team-lal", name="AEK Athens").exists()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_reimport_same_source_payload_remains_idempotent(tmp_path, settings):
|
||||||
|
incoming = tmp_path / "incoming"
|
||||||
|
archive = tmp_path / "archive"
|
||||||
|
failed = tmp_path / "failed"
|
||||||
|
incoming.mkdir()
|
||||||
|
archive.mkdir()
|
||||||
|
failed.mkdir()
|
||||||
|
|
||||||
|
payload = _valid_payload_for_source("lba")
|
||||||
|
_write_json(incoming / "lba-1.json", payload)
|
||||||
|
|
||||||
|
settings.STATIC_DATASET_INCOMING_DIR = str(incoming)
|
||||||
|
settings.STATIC_DATASET_ARCHIVE_DIR = str(archive)
|
||||||
|
settings.STATIC_DATASET_FAILED_DIR = str(failed)
|
||||||
|
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
_write_json(incoming / "lba-2.json", payload)
|
||||||
|
call_command("import_snapshots")
|
||||||
|
|
||||||
|
assert Competition.objects.filter(source_name="lba", source_uid="comp-nba").count() == 1
|
||||||
|
assert Team.objects.filter(source_name="lba", source_uid="team-lal").count() == 1
|
||||||
|
assert Player.objects.filter(source_name="lba", source_uid="player-23").count() == 1
|
||||||
@ -1,251 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from apps.competitions.models import Competition, Season
|
|
||||||
from apps.ingestion.models import IngestionError, IngestionRun
|
|
||||||
from apps.ingestion.services.sync import run_sync_job
|
|
||||||
from apps.players.models import Nationality, Player
|
|
||||||
from apps.providers.exceptions import ProviderRateLimitError
|
|
||||||
from apps.providers.models import ExternalMapping
|
|
||||||
from apps.stats.models import PlayerSeason, PlayerSeasonStats
|
|
||||||
from apps.teams.models import Team
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_run_full_sync_creates_domain_objects(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
|
|
||||||
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
|
|
||||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
|
||||||
assert Competition.objects.count() >= 1
|
|
||||||
assert Team.objects.count() >= 1
|
|
||||||
assert Season.objects.count() >= 1
|
|
||||||
assert Player.objects.count() >= 1
|
|
||||||
assert PlayerSeason.objects.count() >= 1
|
|
||||||
assert PlayerSeasonStats.objects.count() >= 1
|
|
||||||
assert Player.objects.filter(origin_competition__isnull=False).exists()
|
|
||||||
assert run.context.get("completed_steps") == [
|
|
||||||
"competitions",
|
|
||||||
"teams",
|
|
||||||
"seasons",
|
|
||||||
"players",
|
|
||||||
"player_stats",
|
|
||||||
"player_careers",
|
|
||||||
]
|
|
||||||
assert run.context.get("source_counts", {}).get("players", 0) >= 1
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_full_sync_is_idempotent(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
|
|
||||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
counts_after_first = {
|
|
||||||
"competition": Competition.objects.count(),
|
|
||||||
"team": Team.objects.count(),
|
|
||||||
"season": Season.objects.count(),
|
|
||||||
"player": Player.objects.count(),
|
|
||||||
"player_season": PlayerSeason.objects.count(),
|
|
||||||
"player_stats": PlayerSeasonStats.objects.count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
counts_after_second = {
|
|
||||||
"competition": Competition.objects.count(),
|
|
||||||
"team": Team.objects.count(),
|
|
||||||
"season": Season.objects.count(),
|
|
||||||
"player": Player.objects.count(),
|
|
||||||
"player_season": PlayerSeason.objects.count(),
|
|
||||||
"player_stats": PlayerSeasonStats.objects.count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
assert counts_after_first == counts_after_second
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_incremental_sync_runs_successfully(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
|
|
||||||
run = run_sync_job(
|
|
||||||
provider_namespace="mvp_demo",
|
|
||||||
job_type=IngestionRun.JobType.INCREMENTAL,
|
|
||||||
cursor="demo-cursor",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
|
||||||
assert run.records_processed > 0
|
|
||||||
assert run.started_at is not None
|
|
||||||
assert run.finished_at is not None
|
|
||||||
assert run.finished_at >= run.started_at
|
|
||||||
assert run.error_summary == ""
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_run_sync_handles_rate_limit(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
|
|
||||||
|
|
||||||
with pytest.raises(ProviderRateLimitError):
|
|
||||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
|
|
||||||
run = IngestionRun.objects.order_by("-id").first()
|
|
||||||
assert run is not None
|
|
||||||
assert run.status == IngestionRun.RunStatus.FAILED
|
|
||||||
assert run.started_at is not None
|
|
||||||
assert run.finished_at is not None
|
|
||||||
assert "Rate limit" in run.error_summary
|
|
||||||
assert IngestionError.objects.filter(ingestion_run=run).exists()
|
|
||||||
|
|
||||||
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_sync_idempotency_with_stable_payload(monkeypatch):
|
|
||||||
class StableProvider:
|
|
||||||
def sync_all(self):
|
|
||||||
return {
|
|
||||||
"competitions": [
|
|
||||||
{
|
|
||||||
"external_id": "competition-nba",
|
|
||||||
"name": "NBA",
|
|
||||||
"slug": "nba",
|
|
||||||
"competition_type": "league",
|
|
||||||
"gender": "men",
|
|
||||||
"level": 1,
|
|
||||||
"country": None,
|
|
||||||
"is_active": True,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"teams": [
|
|
||||||
{
|
|
||||||
"external_id": "team-14",
|
|
||||||
"name": "Los Angeles Lakers",
|
|
||||||
"short_name": "LAL",
|
|
||||||
"slug": "los-angeles-lakers",
|
|
||||||
"country": None,
|
|
||||||
"is_national_team": False,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"seasons": [
|
|
||||||
{
|
|
||||||
"external_id": "season-2024",
|
|
||||||
"label": "2024-2025",
|
|
||||||
"start_date": "2024-10-01",
|
|
||||||
"end_date": "2025-06-30",
|
|
||||||
"is_current": False,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"players": [
|
|
||||||
{
|
|
||||||
"external_id": "player-237",
|
|
||||||
"first_name": "LeBron",
|
|
||||||
"last_name": "James",
|
|
||||||
"full_name": "LeBron James",
|
|
||||||
"birth_date": None,
|
|
||||||
"nationality": None,
|
|
||||||
"nominal_position": {"code": "SF", "name": "Small Forward"},
|
|
||||||
"inferred_role": {"code": "wing", "name": "Wing"},
|
|
||||||
"height_cm": None,
|
|
||||||
"weight_kg": None,
|
|
||||||
"dominant_hand": "unknown",
|
|
||||||
"is_active": True,
|
|
||||||
"aliases": [],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"player_stats": [
|
|
||||||
{
|
|
||||||
"external_id": "ps-2024-237-14",
|
|
||||||
"player_external_id": "player-237",
|
|
||||||
"team_external_id": "team-14",
|
|
||||||
"competition_external_id": "competition-nba",
|
|
||||||
"season_external_id": "season-2024",
|
|
||||||
"games_played": 2,
|
|
||||||
"games_started": 0,
|
|
||||||
"minutes_played": 68,
|
|
||||||
"points": 25,
|
|
||||||
"rebounds": 9,
|
|
||||||
"assists": 8,
|
|
||||||
"steals": 1.5,
|
|
||||||
"blocks": 0.5,
|
|
||||||
"turnovers": 3.5,
|
|
||||||
"fg_pct": 55.0,
|
|
||||||
"three_pct": 45.0,
|
|
||||||
"ft_pct": 95.0,
|
|
||||||
"usage_rate": None,
|
|
||||||
"true_shooting_pct": None,
|
|
||||||
"player_efficiency_rating": None,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"player_careers": [
|
|
||||||
{
|
|
||||||
"external_id": "career-2024-237-14",
|
|
||||||
"player_external_id": "player-237",
|
|
||||||
"team_external_id": "team-14",
|
|
||||||
"competition_external_id": "competition-nba",
|
|
||||||
"season_external_id": "season-2024",
|
|
||||||
"role_code": "",
|
|
||||||
"shirt_number": None,
|
|
||||||
"start_date": "2024-10-01",
|
|
||||||
"end_date": "2025-06-30",
|
|
||||||
"notes": "Imported from balldontlie aggregated box scores",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
def sync_incremental(self, *, cursor: str | None = None):
|
|
||||||
payload = self.sync_all()
|
|
||||||
payload["cursor"] = cursor
|
|
||||||
return payload
|
|
||||||
|
|
||||||
monkeypatch.setattr("apps.ingestion.services.sync.get_provider", lambda namespace: StableProvider())
|
|
||||||
|
|
||||||
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
lebron = Player.objects.get(full_name="LeBron James")
|
|
||||||
assert lebron.nationality is None
|
|
||||||
assert not Nationality.objects.filter(iso2_code="ZZ").exists()
|
|
||||||
|
|
||||||
counts_first = {
|
|
||||||
"competition": Competition.objects.count(),
|
|
||||||
"team": Team.objects.count(),
|
|
||||||
"season": Season.objects.count(),
|
|
||||||
"player": Player.objects.count(),
|
|
||||||
"player_season": PlayerSeason.objects.count(),
|
|
||||||
"player_stats": PlayerSeasonStats.objects.count(),
|
|
||||||
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
run_sync_job(provider_namespace="balldontlie", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
counts_second = {
|
|
||||||
"competition": Competition.objects.count(),
|
|
||||||
"team": Team.objects.count(),
|
|
||||||
"season": Season.objects.count(),
|
|
||||||
"player": Player.objects.count(),
|
|
||||||
"player_season": PlayerSeason.objects.count(),
|
|
||||||
"player_stats": PlayerSeasonStats.objects.count(),
|
|
||||||
"mapping": ExternalMapping.objects.filter(provider_namespace="balldontlie").count(),
|
|
||||||
}
|
|
||||||
|
|
||||||
assert counts_first == counts_second
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_batch_transactions_preserve_prior_step_progress_on_failure(settings, monkeypatch):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
|
|
||||||
def boom(*args, **kwargs):
|
|
||||||
raise RuntimeError("teams-sync-failed")
|
|
||||||
|
|
||||||
monkeypatch.setattr("apps.ingestion.services.sync._sync_teams", boom)
|
|
||||||
|
|
||||||
with pytest.raises(RuntimeError):
|
|
||||||
run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
|
|
||||||
run = IngestionRun.objects.order_by("-id").first()
|
|
||||||
assert run is not None
|
|
||||||
assert run.status == IngestionRun.RunStatus.FAILED
|
|
||||||
assert Competition.objects.exists()
|
|
||||||
assert Team.objects.count() == 0
|
|
||||||
assert run.context.get("completed_steps") == ["competitions"]
|
|
||||||
assert "Unhandled ingestion error" in run.error_summary
|
|
||||||
@ -1,112 +0,0 @@
|
|||||||
import pytest
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from celery.schedules import crontab
|
|
||||||
import psycopg
|
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
from apps.ingestion.models import IngestionRun
|
|
||||||
from apps.ingestion.services.runs import _build_ingestion_lock_key, release_ingestion_lock, try_acquire_ingestion_lock
|
|
||||||
from apps.ingestion.tasks import scheduled_provider_sync, trigger_incremental_sync
|
|
||||||
from config.celery import app as celery_app, build_periodic_schedule
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_periodic_task_registered():
|
|
||||||
assert "apps.ingestion.tasks.scheduled_provider_sync" in celery_app.tasks
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_build_periodic_schedule_enabled(settings):
|
|
||||||
settings.INGESTION_SCHEDULE_ENABLED = True
|
|
||||||
settings.INGESTION_SCHEDULE_CRON = "15 * * * *"
|
|
||||||
|
|
||||||
schedule = build_periodic_schedule()
|
|
||||||
assert "ingestion.scheduled_provider_sync" in schedule
|
|
||||||
entry = schedule["ingestion.scheduled_provider_sync"]
|
|
||||||
assert entry["task"] == "apps.ingestion.tasks.scheduled_provider_sync"
|
|
||||||
assert isinstance(entry["schedule"], crontab)
|
|
||||||
assert entry["schedule"]._orig_minute == "15"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_build_periodic_schedule_disabled(settings):
|
|
||||||
settings.INGESTION_SCHEDULE_ENABLED = False
|
|
||||||
assert build_periodic_schedule() == {}
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
|
|
||||||
settings.INGESTION_SCHEDULE_ENABLED = True
|
|
||||||
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
|
|
||||||
|
|
||||||
with caplog.at_level("ERROR"):
|
|
||||||
schedule = build_periodic_schedule()
|
|
||||||
|
|
||||||
assert schedule == {}
|
|
||||||
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_trigger_incremental_sync_skips_when_advisory_lock_not_acquired(settings, monkeypatch):
|
|
||||||
settings.INGESTION_PREVENT_OVERLAP = True
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def fake_lock(**kwargs):
|
|
||||||
yield False
|
|
||||||
|
|
||||||
monkeypatch.setattr("apps.ingestion.tasks.ingestion_advisory_lock", fake_lock)
|
|
||||||
run_id = trigger_incremental_sync.apply(
|
|
||||||
kwargs={"provider_namespace": "mvp_demo"},
|
|
||||||
).get()
|
|
||||||
skipped_run = IngestionRun.objects.get(id=run_id)
|
|
||||||
assert skipped_run.status == IngestionRun.RunStatus.CANCELED
|
|
||||||
assert "advisory lock" in skipped_run.error_summary
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_advisory_lock_prevents_concurrent_acquisition():
|
|
||||||
provider_namespace = "mvp_demo"
|
|
||||||
job_type = IngestionRun.JobType.INCREMENTAL
|
|
||||||
lock_key = _build_ingestion_lock_key(provider_namespace=provider_namespace, job_type=job_type)
|
|
||||||
|
|
||||||
conninfo = (
|
|
||||||
f"dbname={settings.DATABASES['default']['NAME']} "
|
|
||||||
f"user={settings.DATABASES['default']['USER']} "
|
|
||||||
f"password={settings.DATABASES['default']['PASSWORD']} "
|
|
||||||
f"host={settings.DATABASES['default']['HOST']} "
|
|
||||||
f"port={settings.DATABASES['default']['PORT']}"
|
|
||||||
)
|
|
||||||
with psycopg.connect(conninfo) as external_conn:
|
|
||||||
with external_conn.cursor() as cursor:
|
|
||||||
cursor.execute("SELECT pg_advisory_lock(%s);", [lock_key])
|
|
||||||
acquired, _ = try_acquire_ingestion_lock(
|
|
||||||
provider_namespace=provider_namespace,
|
|
||||||
job_type=job_type,
|
|
||||||
)
|
|
||||||
assert acquired is False
|
|
||||||
cursor.execute("SELECT pg_advisory_unlock(%s);", [lock_key])
|
|
||||||
|
|
||||||
acquired, django_key = try_acquire_ingestion_lock(
|
|
||||||
provider_namespace=provider_namespace,
|
|
||||||
job_type=job_type,
|
|
||||||
)
|
|
||||||
assert acquired is True
|
|
||||||
release_ingestion_lock(lock_key=django_key)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_scheduled_provider_sync_uses_configured_job_type(settings, monkeypatch):
|
|
||||||
settings.INGESTION_SCHEDULE_JOB_TYPE = IngestionRun.JobType.FULL_SYNC
|
|
||||||
settings.INGESTION_SCHEDULE_PROVIDER_NAMESPACE = "mvp_demo"
|
|
||||||
captured = {}
|
|
||||||
|
|
||||||
def fake_runner(**kwargs):
|
|
||||||
captured.update(kwargs)
|
|
||||||
return 99
|
|
||||||
|
|
||||||
monkeypatch.setattr("apps.ingestion.tasks._run_sync_with_overlap_guard", fake_runner)
|
|
||||||
|
|
||||||
result = scheduled_provider_sync.apply().get()
|
|
||||||
assert result == 99
|
|
||||||
assert captured["provider_namespace"] == "mvp_demo"
|
|
||||||
assert captured["job_type"] == IngestionRun.JobType.FULL_SYNC
|
|
||||||
@ -4,8 +4,6 @@ import pytest
|
|||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
from apps.ingestion.models import IngestionRun
|
|
||||||
from apps.ingestion.services.sync import run_sync_job
|
|
||||||
from apps.players.models import Nationality, Player, Position, Role
|
from apps.players.models import Nationality, Player, Position, Role
|
||||||
from apps.scouting.models import SavedSearch
|
from apps.scouting.models import SavedSearch
|
||||||
|
|
||||||
@ -49,25 +47,3 @@ def test_saved_search_run_filters_player_results(client):
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "Marco Rossi" in response.content.decode()
|
assert "Marco Rossi" in response.content.decode()
|
||||||
assert "Luca Bianchi" not in response.content.decode()
|
assert "Luca Bianchi" not in response.content.decode()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_ingestion_output_is_searchable_in_ui_and_api(settings, client):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
run = run_sync_job(provider_namespace="mvp_demo", job_type=IngestionRun.JobType.FULL_SYNC)
|
|
||||||
assert run.status == IngestionRun.RunStatus.SUCCESS
|
|
||||||
|
|
||||||
player = Player.objects.filter(origin_competition__isnull=False).order_by("id").first()
|
|
||||||
assert player is not None
|
|
||||||
assert player.origin_competition_id is not None
|
|
||||||
|
|
||||||
params = {"origin_competition": player.origin_competition_id}
|
|
||||||
ui_response = client.get(reverse("players:index"), data=params)
|
|
||||||
api_response = client.get(reverse("api:players"), data=params)
|
|
||||||
|
|
||||||
assert ui_response.status_code == 200
|
|
||||||
assert api_response.status_code == 200
|
|
||||||
ui_ids = {item.id for item in ui_response.context["players"]}
|
|
||||||
api_ids = {item["id"] for item in api_response.json()["results"]}
|
|
||||||
assert player.id in ui_ids
|
|
||||||
assert player.id in api_ids
|
|
||||||
|
|||||||
143
tests/test_lba_extractor.py
Normal file
143
tests/test_lba_extractor.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from apps.ingestion.extractors.lba import LBASnapshotExtractor
|
||||||
|
from apps.ingestion.extractors.base import ExtractorNormalizationError
|
||||||
|
from apps.ingestion.extractors.registry import create_extractor
|
||||||
|
|
||||||
|
|
||||||
|
def _load_fixture(path: str) -> dict:
|
||||||
|
fixture_path = Path(__file__).parent / "fixtures" / path
|
||||||
|
return json.loads(fixture_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_lba_extractor_normalizes_fixture_payload(tmp_path, settings):
|
||||||
|
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||||
|
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("lba/lba_players_stats.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = LBASnapshotExtractor(http_client=FakeClient())
|
||||||
|
output_path = tmp_path / "lba.json"
|
||||||
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.extractor_name == "lba"
|
||||||
|
assert result.source_name == "lba"
|
||||||
|
assert result.records_count == 1
|
||||||
|
|
||||||
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "lba"
|
||||||
|
assert payload["snapshot_date"] == "2026-03-13"
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["competition_external_id"] == "lba-serie-a"
|
||||||
|
assert row["competition_name"] == "Lega Basket Serie A"
|
||||||
|
assert row["team_external_id"] == "team-virtus-bologna"
|
||||||
|
assert row["team_name"] == "Virtus Bologna"
|
||||||
|
assert row["player_external_id"] == "p-001"
|
||||||
|
assert row["full_name"] == "Marco Rossi"
|
||||||
|
assert row["minutes_per_game"] == 28.3
|
||||||
|
assert row["three_pt_pct"] == 36.5
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_lba_extractor_accepts_partial_public_player_bio_fields(tmp_path, settings):
|
||||||
|
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||||
|
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = LBASnapshotExtractor(http_client=FakeClient())
|
||||||
|
output_path = tmp_path / "lba-partial.json"
|
||||||
|
result = extractor.run(output_path=output_path, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
assert result.records_count == 1
|
||||||
|
payload = json.loads(output_path.read_text(encoding="utf-8"))
|
||||||
|
row = payload["records"][0]
|
||||||
|
assert row["full_name"] == "Andrea Bianchi"
|
||||||
|
assert row["first_name"] is None
|
||||||
|
assert row["last_name"] is None
|
||||||
|
assert row["birth_date"] is None
|
||||||
|
assert row["nationality"] is None
|
||||||
|
assert row["height_cm"] is None
|
||||||
|
assert row["weight_kg"] is None
|
||||||
|
assert row["position"] is None
|
||||||
|
assert row["games_played"] == 18
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_lba_extractor_still_fails_when_required_stats_are_missing(settings):
|
||||||
|
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||||
|
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("lba/lba_players_stats_partial_public.json")
|
||||||
|
fixture_payload["data"][0].pop("ppg")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
extractor = LBASnapshotExtractor(http_client=FakeClient())
|
||||||
|
with pytest.raises(ExtractorNormalizationError):
|
||||||
|
extractor.run(write_output=False, snapshot_date=date(2026, 3, 13))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_lba_extractor_registry_selection(settings):
|
||||||
|
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||||
|
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||||
|
extractor = create_extractor("lba")
|
||||||
|
assert isinstance(extractor, LBASnapshotExtractor)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_run_lba_extractor_command_writes_snapshot(tmp_path, settings, monkeypatch):
|
||||||
|
settings.EXTRACTOR_LBA_STATS_URL = "https://www.legabasket.it/public/stats.json"
|
||||||
|
settings.EXTRACTOR_LBA_SEASON_LABEL = "2025-2026"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_EXTERNAL_ID = "lba-serie-a"
|
||||||
|
settings.EXTRACTOR_LBA_COMPETITION_NAME = "Lega Basket Serie A"
|
||||||
|
|
||||||
|
fixture_payload = _load_fixture("lba/lba_players_stats.json")
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def get_json(self, *_args, **_kwargs):
|
||||||
|
return fixture_payload
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"apps.ingestion.extractors.lba.ResponsibleHttpClient",
|
||||||
|
lambda **_kwargs: FakeClient(),
|
||||||
|
)
|
||||||
|
|
||||||
|
call_command(
|
||||||
|
"run_lba_extractor",
|
||||||
|
"--output-path",
|
||||||
|
str(tmp_path),
|
||||||
|
"--snapshot-date",
|
||||||
|
"2026-03-13",
|
||||||
|
)
|
||||||
|
|
||||||
|
files = list(tmp_path.glob("lba-2026-03-13.json"))
|
||||||
|
assert len(files) == 1
|
||||||
|
payload = json.loads(files[0].read_text(encoding="utf-8"))
|
||||||
|
assert payload["source_name"] == "lba"
|
||||||
|
assert len(payload["records"]) == 1
|
||||||
@ -4,38 +4,71 @@ import pytest
|
|||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
|
|
||||||
from apps.competitions.models import Competition
|
from apps.competitions.models import Competition, Season
|
||||||
|
from apps.ingestion.models import ImportFile, ImportRun
|
||||||
from apps.players.models import Nationality, Player, Position, Role
|
from apps.players.models import Nationality, Player, Position, Role
|
||||||
from apps.providers.models import ExternalMapping
|
|
||||||
from apps.scouting.models import FavoritePlayer, SavedSearch
|
from apps.scouting.models import FavoritePlayer, SavedSearch
|
||||||
|
from apps.teams.models import Team
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_player_unique_full_name_birth_date_constraint():
|
def test_source_uid_uniqueness_is_scoped_by_source_name():
|
||||||
nationality = Nationality.objects.create(name="Italy", iso2_code="IT", iso3_code="ITA")
|
Season.objects.create(
|
||||||
position = Position.objects.create(code="PG", name="Point Guard")
|
source_uid="season-2024",
|
||||||
role = Role.objects.create(code="playmaker", name="Playmaker")
|
label="2024-2025",
|
||||||
|
start_date=date(2024, 10, 1),
|
||||||
|
end_date=date(2025, 6, 30),
|
||||||
|
)
|
||||||
|
Competition.objects.create(
|
||||||
|
source_name="lba",
|
||||||
|
source_uid="comp-001",
|
||||||
|
name="Serie A",
|
||||||
|
slug="serie-a",
|
||||||
|
competition_type=Competition.CompetitionType.LEAGUE,
|
||||||
|
)
|
||||||
|
Team.objects.create(source_name="lba", source_uid="team-001", name="Virtus Bologna", slug="virtus-bologna")
|
||||||
|
|
||||||
|
nationality = Nationality.objects.create(name="Spain", iso2_code="ES", iso3_code="ESP")
|
||||||
|
position = Position.objects.create(code="SF", name="Small Forward")
|
||||||
|
role = Role.objects.create(code="wing", name="Wing")
|
||||||
Player.objects.create(
|
Player.objects.create(
|
||||||
first_name="Marco",
|
source_name="lba",
|
||||||
last_name="Rossi",
|
source_uid="player-001",
|
||||||
full_name="Marco Rossi",
|
first_name="Juan",
|
||||||
birth_date=date(2001, 1, 1),
|
last_name="Perez",
|
||||||
|
full_name="Juan Perez",
|
||||||
|
birth_date=date(2000, 5, 1),
|
||||||
nationality=nationality,
|
nationality=nationality,
|
||||||
nominal_position=position,
|
nominal_position=position,
|
||||||
inferred_role=role,
|
inferred_role=role,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Competition.objects.create(
|
||||||
|
source_name="bcl",
|
||||||
|
source_uid="comp-001",
|
||||||
|
name="BCL",
|
||||||
|
slug="bcl",
|
||||||
|
competition_type=Competition.CompetitionType.INTERNATIONAL,
|
||||||
|
)
|
||||||
|
Team.objects.create(source_name="bcl", source_uid="team-001", name="AEK", slug="aek")
|
||||||
|
Player.objects.create(
|
||||||
|
source_name="bcl",
|
||||||
|
source_uid="player-001",
|
||||||
|
first_name="Juan",
|
||||||
|
last_name="Perez",
|
||||||
|
full_name="Juan Perez",
|
||||||
|
birth_date=date(2000, 5, 1),
|
||||||
|
nationality=nationality,
|
||||||
|
nominal_position=position,
|
||||||
|
inferred_role=role,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert Competition.objects.filter(source_uid="comp-001").count() == 2
|
||||||
|
assert Team.objects.filter(source_uid="team-001").count() == 2
|
||||||
|
assert Player.objects.filter(source_uid="player-001").count() == 2
|
||||||
|
|
||||||
with pytest.raises(IntegrityError):
|
with pytest.raises(IntegrityError):
|
||||||
Player.objects.create(
|
Team.objects.create(source_name="lba", source_uid="team-001", name="Another Team", slug="another-team")
|
||||||
first_name="Marco",
|
|
||||||
last_name="Rossi",
|
|
||||||
full_name="Marco Rossi",
|
|
||||||
birth_date=date(2001, 1, 1),
|
|
||||||
nationality=nationality,
|
|
||||||
nominal_position=position,
|
|
||||||
inferred_role=role,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
@ -50,14 +83,14 @@ def test_saved_search_unique_name_per_user_constraint():
|
|||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_favorite_unique_player_per_user_constraint():
|
def test_favorite_unique_player_per_user_constraint():
|
||||||
user = User.objects.create_user(username="u2", password="pass12345")
|
user = User.objects.create_user(username="u2", password="pass12345")
|
||||||
nationality = Nationality.objects.create(name="Spain", iso2_code="ES", iso3_code="ESP")
|
nationality = Nationality.objects.create(name="France", iso2_code="FR", iso3_code="FRA")
|
||||||
position = Position.objects.create(code="SF", name="Small Forward")
|
position = Position.objects.create(code="PF", name="Power Forward")
|
||||||
role = Role.objects.create(code="wing", name="Wing")
|
role = Role.objects.create(code="big", name="Big")
|
||||||
player = Player.objects.create(
|
player = Player.objects.create(
|
||||||
first_name="Juan",
|
first_name="Pierre",
|
||||||
last_name="Perez",
|
last_name="Durand",
|
||||||
full_name="Juan Perez",
|
full_name="Pierre Durand",
|
||||||
birth_date=date(2000, 5, 1),
|
birth_date=date(2001, 3, 3),
|
||||||
nationality=nationality,
|
nationality=nationality,
|
||||||
nominal_position=position,
|
nominal_position=position,
|
||||||
inferred_role=role,
|
inferred_role=role,
|
||||||
@ -69,24 +102,9 @@ def test_favorite_unique_player_per_user_constraint():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_external_mapping_unique_provider_external_id_constraint():
|
def test_import_file_unique_path_within_import_run():
|
||||||
competition = Competition.objects.create(
|
run = ImportRun.objects.create(source="daily_snapshot")
|
||||||
name="Liga ACB",
|
ImportFile.objects.create(import_run=run, relative_path="players/2026-03-13.json")
|
||||||
slug="liga-acb",
|
|
||||||
competition_type=Competition.CompetitionType.LEAGUE,
|
|
||||||
gender=Competition.Gender.MEN,
|
|
||||||
level=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
ExternalMapping.objects.create(
|
|
||||||
provider_namespace="mvp_demo",
|
|
||||||
external_id="comp-001",
|
|
||||||
content_object=competition,
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(IntegrityError):
|
with pytest.raises(IntegrityError):
|
||||||
ExternalMapping.objects.create(
|
ImportFile.objects.create(import_run=run, relative_path="players/2026-03-13.json")
|
||||||
provider_namespace="mvp_demo",
|
|
||||||
external_id="comp-001",
|
|
||||||
content_object=competition,
|
|
||||||
)
|
|
||||||
|
|||||||
@ -110,8 +110,6 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
|
|||||||
nationality=nationality,
|
nationality=nationality,
|
||||||
nominal_position=position,
|
nominal_position=position,
|
||||||
inferred_role=role,
|
inferred_role=role,
|
||||||
origin_competition=competition,
|
|
||||||
origin_team=team,
|
|
||||||
)
|
)
|
||||||
player_season = PlayerSeason.objects.create(
|
player_season = PlayerSeason.objects.create(
|
||||||
player=player,
|
player=player,
|
||||||
@ -135,7 +133,7 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
|
|||||||
response = client.get(
|
response = client.get(
|
||||||
reverse("players:index"),
|
reverse("players:index"),
|
||||||
data={
|
data={
|
||||||
"origin_competition": competition.id,
|
"competition": competition.id,
|
||||||
"nominal_position": position.id,
|
"nominal_position": position.id,
|
||||||
"sort": "ppg_desc",
|
"sort": "ppg_desc",
|
||||||
"page_size": 20,
|
"page_size": 20,
|
||||||
@ -152,7 +150,7 @@ def test_player_search_combined_filters_sorting_and_pagination(client):
|
|||||||
page2 = client.get(
|
page2 = client.get(
|
||||||
reverse("players:index"),
|
reverse("players:index"),
|
||||||
data={
|
data={
|
||||||
"origin_competition": competition.id,
|
"competition": competition.id,
|
||||||
"nominal_position": position.id,
|
"nominal_position": position.id,
|
||||||
"sort": "ppg_desc",
|
"sort": "ppg_desc",
|
||||||
"page_size": 20,
|
"page_size": 20,
|
||||||
|
|||||||
@ -127,14 +127,13 @@ def test_player_detail_page_loads(client):
|
|||||||
height_cm=201,
|
height_cm=201,
|
||||||
weight_kg=95,
|
weight_kg=95,
|
||||||
)
|
)
|
||||||
PlayerAlias.objects.create(player=player, alias="P. Martin")
|
|
||||||
|
|
||||||
response = client.get(reverse("players:detail", kwargs={"pk": player.pk}))
|
response = client.get(reverse("players:detail", kwargs={"pk": player.pk}))
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
body = response.content.decode()
|
body = response.content.decode()
|
||||||
assert "Paul Martin" in body
|
assert "Paul Martin" in body
|
||||||
assert "P. Martin" in body
|
assert "Summary" in body
|
||||||
|
assert "Season-by-Season Stats" in body
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
@ -242,3 +241,44 @@ def test_player_search_results_render_best_eligible_metric_labels(client):
|
|||||||
assert "Best Eligible PPG" in body
|
assert "Best Eligible PPG" in body
|
||||||
assert "Best Eligible MPG" in body
|
assert "Best Eligible MPG" in body
|
||||||
assert "best eligible values per player" in body.lower()
|
assert "best eligible values per player" in body.lower()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_player_search_results_render_dash_for_missing_eligible_metrics(client):
|
||||||
|
nationality = Nationality.objects.create(name="Norway", iso2_code="NO", iso3_code="NOR")
|
||||||
|
position = Position.objects.create(code="PF", name="Power Forward")
|
||||||
|
role = Role.objects.create(code="big", name="Big")
|
||||||
|
season = Season.objects.create(label="2025-2026", start_date=date(2025, 9, 1), end_date=date(2026, 6, 30))
|
||||||
|
competition = Competition.objects.create(
|
||||||
|
name="BLNO",
|
||||||
|
slug="blno",
|
||||||
|
competition_type=Competition.CompetitionType.LEAGUE,
|
||||||
|
gender=Competition.Gender.MEN,
|
||||||
|
country=nationality,
|
||||||
|
)
|
||||||
|
team = Team.objects.create(name="Oslo", slug="oslo", country=nationality)
|
||||||
|
|
||||||
|
player = Player.objects.create(
|
||||||
|
first_name="Ole",
|
||||||
|
last_name="NoStats",
|
||||||
|
full_name="Ole NoStats",
|
||||||
|
birth_date=date(2001, 1, 1),
|
||||||
|
nationality=nationality,
|
||||||
|
nominal_position=position,
|
||||||
|
inferred_role=role,
|
||||||
|
)
|
||||||
|
PlayerSeason.objects.create(
|
||||||
|
player=player,
|
||||||
|
season=season,
|
||||||
|
team=team,
|
||||||
|
competition=competition,
|
||||||
|
games_played=0,
|
||||||
|
minutes_played=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.get(reverse("players:index"), data={"season": season.id})
|
||||||
|
assert response.status_code == 200
|
||||||
|
body = response.content.decode()
|
||||||
|
assert "Ole NoStats" in body
|
||||||
|
# Missing eligible values are rendered as '-' rather than misleading zeros.
|
||||||
|
assert body.count(">-") > 0
|
||||||
|
|||||||
@ -1,77 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
|
|
||||||
from apps.providers.exceptions import ProviderNotFoundError, ProviderRateLimitError
|
|
||||||
from apps.providers.registry import get_provider
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_mvp_provider_fetch_and_search_players():
|
|
||||||
adapter = MvpDemoProviderAdapter()
|
|
||||||
|
|
||||||
players = adapter.fetch_players()
|
|
||||||
assert len(players) >= 2
|
|
||||||
|
|
||||||
results = adapter.search_players(query="luca")
|
|
||||||
assert any("Luca" in item["full_name"] for item in results)
|
|
||||||
|
|
||||||
detail = adapter.fetch_player(external_player_id="player-001")
|
|
||||||
assert detail is not None
|
|
||||||
assert detail["full_name"] == "Luca Rinaldi"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_mvp_provider_rate_limit_signal():
|
|
||||||
os.environ["PROVIDER_MVP_FORCE_RATE_LIMIT"] = "1"
|
|
||||||
adapter = MvpDemoProviderAdapter()
|
|
||||||
|
|
||||||
with pytest.raises(ProviderRateLimitError):
|
|
||||||
adapter.fetch_players()
|
|
||||||
|
|
||||||
os.environ.pop("PROVIDER_MVP_FORCE_RATE_LIMIT", None)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_provider_registry_resolution(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
provider = get_provider()
|
|
||||||
assert isinstance(provider, MvpDemoProviderAdapter)
|
|
||||||
|
|
||||||
with pytest.raises(ProviderNotFoundError):
|
|
||||||
get_provider("does-not-exist")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_demo_provider_sync_payload_uses_normalized_shape():
|
|
||||||
adapter = MvpDemoProviderAdapter()
|
|
||||||
payload = adapter.sync_all()
|
|
||||||
|
|
||||||
assert set(payload.keys()) == {
|
|
||||||
"players",
|
|
||||||
"competitions",
|
|
||||||
"teams",
|
|
||||||
"seasons",
|
|
||||||
"player_stats",
|
|
||||||
"player_careers",
|
|
||||||
"cursor",
|
|
||||||
}
|
|
||||||
assert payload["cursor"] is None
|
|
||||||
|
|
||||||
player = payload["players"][0]
|
|
||||||
assert set(player.keys()) == {
|
|
||||||
"external_id",
|
|
||||||
"first_name",
|
|
||||||
"last_name",
|
|
||||||
"full_name",
|
|
||||||
"birth_date",
|
|
||||||
"nationality",
|
|
||||||
"nominal_position",
|
|
||||||
"inferred_role",
|
|
||||||
"height_cm",
|
|
||||||
"weight_kg",
|
|
||||||
"dominant_hand",
|
|
||||||
"is_active",
|
|
||||||
"aliases",
|
|
||||||
}
|
|
||||||
@ -1,263 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import time
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from apps.providers.adapters.balldontlie_provider import BalldontlieProviderAdapter
|
|
||||||
from apps.providers.adapters.mvp_provider import MvpDemoProviderAdapter
|
|
||||||
from apps.providers.clients.balldontlie import BalldontlieClient
|
|
||||||
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError, ProviderUnauthorizedError
|
|
||||||
from apps.providers.registry import get_default_provider_namespace, get_provider
|
|
||||||
from apps.providers.services.balldontlie_mappings import map_seasons
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeResponse:
|
|
||||||
def __init__(self, *, status_code: int, payload: dict[str, Any] | None = None, headers: dict[str, str] | None = None, text: str = ""):
|
|
||||||
self.status_code = status_code
|
|
||||||
self._payload = payload or {}
|
|
||||||
self.headers = headers or {}
|
|
||||||
self.text = text
|
|
||||||
|
|
||||||
def json(self):
|
|
||||||
return self._payload
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeSession:
|
|
||||||
def __init__(self, responses: list[Any]):
|
|
||||||
self._responses = responses
|
|
||||||
self.calls: list[dict[str, Any]] = []
|
|
||||||
|
|
||||||
def get(self, *args, **kwargs):
|
|
||||||
self.calls.append(kwargs)
|
|
||||||
item = self._responses.pop(0)
|
|
||||||
if isinstance(item, Exception):
|
|
||||||
raise item
|
|
||||||
return item
|
|
||||||
|
|
||||||
|
|
||||||
class _FakeBalldontlieClient:
|
|
||||||
def get_json(self, path: str, *, params: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
||||||
if path == "/nba/v1/teams":
|
|
||||||
return {
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"id": 14,
|
|
||||||
"full_name": "Los Angeles Lakers",
|
|
||||||
"abbreviation": "LAL",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
return {"data": []}
|
|
||||||
|
|
||||||
def list_paginated(
|
|
||||||
self,
|
|
||||||
path: str,
|
|
||||||
*,
|
|
||||||
params: dict[str, Any] | None = None,
|
|
||||||
per_page: int = 100,
|
|
||||||
page_limit: int = 1,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
if path == "/nba/v1/players":
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"id": 237,
|
|
||||||
"first_name": "LeBron",
|
|
||||||
"last_name": "James",
|
|
||||||
"position": "F",
|
|
||||||
"team": {"id": 14},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
if path == "/nba/v1/stats":
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"pts": 20,
|
|
||||||
"reb": 8,
|
|
||||||
"ast": 7,
|
|
||||||
"stl": 1,
|
|
||||||
"blk": 1,
|
|
||||||
"turnover": 3,
|
|
||||||
"fg_pct": 0.5,
|
|
||||||
"fg3_pct": 0.4,
|
|
||||||
"ft_pct": 0.9,
|
|
||||||
"min": "35:12",
|
|
||||||
"player": {"id": 237},
|
|
||||||
"team": {"id": 14},
|
|
||||||
"game": {"season": 2024},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"pts": 30,
|
|
||||||
"reb": 10,
|
|
||||||
"ast": 9,
|
|
||||||
"stl": 2,
|
|
||||||
"blk": 0,
|
|
||||||
"turnover": 4,
|
|
||||||
"fg_pct": 0.6,
|
|
||||||
"fg3_pct": 0.5,
|
|
||||||
"ft_pct": 1.0,
|
|
||||||
"min": "33:00",
|
|
||||||
"player": {"id": 237},
|
|
||||||
"team": {"id": 14},
|
|
||||||
"game": {"season": 2024},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_provider_registry_backend_selection(settings):
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = ""
|
|
||||||
settings.PROVIDER_BACKEND = "demo"
|
|
||||||
assert get_default_provider_namespace() == "mvp_demo"
|
|
||||||
assert isinstance(get_provider(), MvpDemoProviderAdapter)
|
|
||||||
|
|
||||||
settings.PROVIDER_BACKEND = "balldontlie"
|
|
||||||
assert get_default_provider_namespace() == "balldontlie"
|
|
||||||
assert isinstance(get_provider(), BalldontlieProviderAdapter)
|
|
||||||
|
|
||||||
settings.PROVIDER_DEFAULT_NAMESPACE = "mvp_demo"
|
|
||||||
assert get_default_provider_namespace() == "mvp_demo"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_adapter_maps_payloads(settings):
|
|
||||||
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
|
|
||||||
adapter = BalldontlieProviderAdapter(client=_FakeBalldontlieClient())
|
|
||||||
|
|
||||||
payload = adapter.sync_all()
|
|
||||||
|
|
||||||
assert payload["competitions"][0]["external_id"] == "competition-nba"
|
|
||||||
assert payload["teams"][0]["external_id"] == "team-14"
|
|
||||||
assert payload["players"][0]["external_id"] == "player-237"
|
|
||||||
assert payload["seasons"][0]["external_id"] == "season-2024"
|
|
||||||
assert payload["player_stats"][0]["games_played"] == 2
|
|
||||||
assert payload["player_stats"][0]["points"] == 25.0
|
|
||||||
assert payload["player_stats"][0]["fg_pct"] == 55.0
|
|
||||||
|
|
||||||
player = payload["players"][0]
|
|
||||||
assert player["nationality"] is None
|
|
||||||
assert "current_team_external_id" not in player
|
|
||||||
|
|
||||||
expected_keys = {
|
|
||||||
"external_id",
|
|
||||||
"first_name",
|
|
||||||
"last_name",
|
|
||||||
"full_name",
|
|
||||||
"birth_date",
|
|
||||||
"nationality",
|
|
||||||
"nominal_position",
|
|
||||||
"inferred_role",
|
|
||||||
"height_cm",
|
|
||||||
"weight_kg",
|
|
||||||
"dominant_hand",
|
|
||||||
"is_active",
|
|
||||||
"aliases",
|
|
||||||
}
|
|
||||||
assert set(player.keys()) == expected_keys
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_map_seasons_marks_latest_as_current():
|
|
||||||
seasons = map_seasons([2022, 2024, 2023, 2024])
|
|
||||||
current_rows = [row for row in seasons if row["is_current"]]
|
|
||||||
assert len(current_rows) == 1
|
|
||||||
assert current_rows[0]["external_id"] == "season-2024"
|
|
||||||
assert [row["external_id"] for row in seasons] == ["season-2022", "season-2023", "season-2024"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_adapter_degrades_when_stats_unauthorized(settings):
|
|
||||||
class _UnauthorizedStatsClient(_FakeBalldontlieClient):
|
|
||||||
def list_paginated(self, path: str, *, params=None, per_page=100, page_limit=1):
|
|
||||||
if path == "/nba/v1/stats":
|
|
||||||
raise ProviderUnauthorizedError(
|
|
||||||
provider="balldontlie",
|
|
||||||
path="stats",
|
|
||||||
status_code=401,
|
|
||||||
detail="Unauthorized",
|
|
||||||
)
|
|
||||||
return super().list_paginated(path, params=params, per_page=per_page, page_limit=page_limit)
|
|
||||||
|
|
||||||
settings.PROVIDER_BALLDONTLIE_SEASONS = [2024]
|
|
||||||
settings.PROVIDER_BALLDONTLIE_STATS_STRICT = False
|
|
||||||
adapter = BalldontlieProviderAdapter(client=_UnauthorizedStatsClient())
|
|
||||||
|
|
||||||
payload = adapter.sync_all()
|
|
||||||
assert payload["players"]
|
|
||||||
assert payload["teams"]
|
|
||||||
assert payload["player_stats"] == []
|
|
||||||
assert payload["player_careers"] == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_client_retries_after_rate_limit(monkeypatch, settings):
|
|
||||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
|
||||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
|
||||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
|
||||||
|
|
||||||
session = _FakeSession(
|
|
||||||
responses=[
|
|
||||||
_FakeResponse(status_code=429, headers={"Retry-After": "0"}),
|
|
||||||
_FakeResponse(status_code=200, payload={"data": []}),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
client = BalldontlieClient(session=session)
|
|
||||||
|
|
||||||
payload = client.get_json("players")
|
|
||||||
assert payload == {"data": []}
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_client_timeout_retries_then_fails(monkeypatch, settings):
|
|
||||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
|
||||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
|
||||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
|
||||||
|
|
||||||
session = _FakeSession(responses=[requests.Timeout("slow"), requests.Timeout("slow")])
|
|
||||||
client = BalldontlieClient(session=session)
|
|
||||||
|
|
||||||
with pytest.raises(ProviderTransientError):
|
|
||||||
client.get_json("players")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_client_raises_rate_limit_after_max_retries(monkeypatch, settings):
|
|
||||||
monkeypatch.setattr(time, "sleep", lambda _: None)
|
|
||||||
settings.PROVIDER_REQUEST_RETRIES = 2
|
|
||||||
settings.PROVIDER_REQUEST_RETRY_SLEEP = 0
|
|
||||||
|
|
||||||
session = _FakeSession(
|
|
||||||
responses=[
|
|
||||||
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
|
|
||||||
_FakeResponse(status_code=429, headers={"Retry-After": "1"}),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
client = BalldontlieClient(session=session)
|
|
||||||
|
|
||||||
with pytest.raises(ProviderRateLimitError):
|
|
||||||
client.get_json("players")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
|
||||||
def test_balldontlie_client_cursor_pagination(settings):
|
|
||||||
session = _FakeSession(
|
|
||||||
responses=[
|
|
||||||
_FakeResponse(
|
|
||||||
status_code=200,
|
|
||||||
payload={"data": [{"id": 1}], "meta": {"next_cursor": 101}},
|
|
||||||
),
|
|
||||||
_FakeResponse(
|
|
||||||
status_code=200,
|
|
||||||
payload={"data": [{"id": 2}], "meta": {"next_cursor": None}},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
client = BalldontlieClient(session=session)
|
|
||||||
rows = client.list_paginated("players", per_page=1, page_limit=5)
|
|
||||||
|
|
||||||
assert rows == [{"id": 1}, {"id": 2}]
|
|
||||||
assert "page" not in session.calls[0]["params"]
|
|
||||||
assert "cursor" not in session.calls[0]["params"]
|
|
||||||
assert session.calls[1]["params"]["cursor"] == 101
|
|
||||||
43
tests/test_scheduler_operational_safety.py
Normal file
43
tests/test_scheduler_operational_safety.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _repo_root() -> Path:
|
||||||
|
return Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
def test_scheduler_disabled_mode_stays_alive_without_exit_loop():
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["SCHEDULER_ENABLED"] = "0"
|
||||||
|
env["SCHEDULER_DISABLED_SLEEP_SECONDS"] = "30"
|
||||||
|
|
||||||
|
process = subprocess.Popen(
|
||||||
|
["sh", "scripts/scheduler.sh"],
|
||||||
|
cwd=_repo_root(),
|
||||||
|
env=env,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
time.sleep(1.0)
|
||||||
|
assert process.poll() is None
|
||||||
|
finally:
|
||||||
|
process.terminate()
|
||||||
|
process.wait(timeout=5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scheduler_compose_service_is_profile_gated():
|
||||||
|
compose_text = (_repo_root() / "docker-compose.yml").read_text(encoding="utf-8")
|
||||||
|
assert 'profiles: ["scheduler"]' in compose_text
|
||||||
|
assert "restart: unless-stopped" in compose_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_scheduler_script_declares_idle_disabled_behavior():
|
||||||
|
scheduler_script = (_repo_root() / "scripts/scheduler.sh").read_text(encoding="utf-8")
|
||||||
|
assert "Entering idle mode" in scheduler_script
|
||||||
|
assert "SCHEDULER_DISABLED_SLEEP_SECONDS" in scheduler_script
|
||||||
@ -15,6 +15,20 @@ def test_scouting_index_requires_login(client):
|
|||||||
assert reverse("users:login") in response.url
|
assert reverse("users:login") in response.url
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_saved_search_list_requires_login(client):
|
||||||
|
response = client.get(reverse("scouting:saved_search_list"))
|
||||||
|
assert response.status_code == 302
|
||||||
|
assert reverse("users:login") in response.url
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_watchlist_requires_login(client):
|
||||||
|
response = client.get(reverse("scouting:watchlist"))
|
||||||
|
assert response.status_code == 302
|
||||||
|
assert reverse("users:login") in response.url
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_create_saved_search_from_filters(client):
|
def test_create_saved_search_from_filters(client):
|
||||||
user = User.objects.create_user(username="scout", password="pass12345")
|
user = User.objects.create_user(username="scout", password="pass12345")
|
||||||
@ -60,6 +74,60 @@ def test_saved_search_run_redirects_to_players(client):
|
|||||||
assert "q=rossi" in response.url
|
assert "q=rossi" in response.url
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_saved_search_update_renames_and_updates_filters(client):
|
||||||
|
user = User.objects.create_user(username="scout-update", password="pass12345")
|
||||||
|
client.force_login(user)
|
||||||
|
nationality = Nationality.objects.create(name="Germany", iso2_code="DE", iso3_code="DEU")
|
||||||
|
|
||||||
|
saved = SavedSearch.objects.create(
|
||||||
|
user=user,
|
||||||
|
name="Old Name",
|
||||||
|
filters={"q": "old", "sort": "name_asc"},
|
||||||
|
is_public=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
reverse("scouting:saved_search_edit", kwargs={"pk": saved.pk}),
|
||||||
|
data={
|
||||||
|
"name": "Updated Name",
|
||||||
|
"is_public": "on",
|
||||||
|
"filters_json": '{"q": "new", "nationality": %d, "sort": "ppg_desc"}' % nationality.id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 302
|
||||||
|
saved.refresh_from_db()
|
||||||
|
assert saved.name == "Updated Name"
|
||||||
|
assert saved.is_public is True
|
||||||
|
assert saved.filters["q"] == "new"
|
||||||
|
assert saved.filters["nationality"] == nationality.id
|
||||||
|
assert saved.filters["sort"] == "ppg_desc"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_saved_search_delete_removes_entry(client):
|
||||||
|
user = User.objects.create_user(username="scout-delete", password="pass12345")
|
||||||
|
client.force_login(user)
|
||||||
|
saved = SavedSearch.objects.create(user=user, name="Delete Me", filters={"q": "x"})
|
||||||
|
response = client.post(reverse("scouting:saved_search_delete", kwargs={"pk": saved.pk}))
|
||||||
|
assert response.status_code == 302
|
||||||
|
assert not SavedSearch.objects.filter(pk=saved.pk).exists()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_saved_search_delete_htmx_renders_table(client):
|
||||||
|
user = User.objects.create_user(username="scout-delete-htmx", password="pass12345")
|
||||||
|
client.force_login(user)
|
||||||
|
saved = SavedSearch.objects.create(user=user, name="Delete HTMX", filters={"q": "x"})
|
||||||
|
response = client.post(
|
||||||
|
reverse("scouting:saved_search_delete", kwargs={"pk": saved.pk}),
|
||||||
|
HTTP_HX_REQUEST="true",
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
body = response.content.decode().lower()
|
||||||
|
assert "no saved searches yet" in body
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_favorite_toggle_adds_and_removes(client):
|
def test_favorite_toggle_adds_and_removes(client):
|
||||||
user = User.objects.create_user(username="scout3", password="pass12345")
|
user = User.objects.create_user(username="scout3", password="pass12345")
|
||||||
@ -128,3 +196,26 @@ def test_save_search_htmx_feedback(client):
|
|||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "created" in response.content.decode().lower()
|
assert "created" in response.content.decode().lower()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_watchlist_page_renders_favorite_player(client):
|
||||||
|
user = User.objects.create_user(username="watch-user", password="pass12345")
|
||||||
|
client.force_login(user)
|
||||||
|
nationality = Nationality.objects.create(name="Poland", iso2_code="PL", iso3_code="POL")
|
||||||
|
position = Position.objects.create(code="C", name="Center")
|
||||||
|
role = Role.objects.create(code="rim", name="Rim Protector")
|
||||||
|
player = Player.objects.create(
|
||||||
|
first_name="Adam",
|
||||||
|
last_name="Big",
|
||||||
|
full_name="Adam Big",
|
||||||
|
birth_date=date(2001, 1, 1),
|
||||||
|
nationality=nationality,
|
||||||
|
nominal_position=position,
|
||||||
|
inferred_role=role,
|
||||||
|
)
|
||||||
|
FavoritePlayer.objects.create(user=user, player=player)
|
||||||
|
|
||||||
|
response = client.get(reverse("scouting:watchlist"))
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "Adam Big" in response.content.decode()
|
||||||
|
|||||||
15
tests/test_v2_runtime_boundaries.py
Normal file
15
tests/test_v2_runtime_boundaries.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import pytest
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_legacy_provider_stack_disabled_by_default():
|
||||||
|
assert settings.LEGACY_PROVIDER_STACK_ENABLED is False
|
||||||
|
assert "apps.providers" not in settings.INSTALLED_APPS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_providers_route_not_mounted_by_default(client):
|
||||||
|
response = client.get("/providers/")
|
||||||
|
assert response.status_code == 404
|
||||||
|
|
||||||
Reference in New Issue
Block a user