Reset to HoopScout v2 runtime foundation and simplified topology

This commit is contained in:
Alfredo Di Stasio
2026-03-13 10:31:29 +01:00
parent 3b5f1f37dd
commit bb033222e3
13 changed files with 247 additions and 748 deletions

View File

@ -1,87 +1,53 @@
# Django
# HoopScout v2 runtime profile
DJANGO_SETTINGS_MODULE=config.settings.development
DJANGO_ENV=development
# Required to be a strong, unique value outside development.
DJANGO_SECRET_KEY=change-me-in-production
DJANGO_DEBUG=1
DJANGO_SECRET_KEY=change-me-in-production
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1
DJANGO_TIME_ZONE=UTC
DJANGO_LOG_LEVEL=INFO
DJANGO_LOG_SQL=0
DJANGO_SUPERUSER_USERNAME=admin
DJANGO_SUPERUSER_EMAIL=admin@example.com
DJANGO_SUPERUSER_PASSWORD=adminpass
# Database (PostgreSQL only)
# Container image tags
APP_IMAGE_TAG=latest
NGINX_IMAGE_TAG=latest
# Reserved for future optional scheduler image:
# SCHEDULER_IMAGE_TAG=latest
# Web runtime behavior
GUNICORN_WORKERS=3
AUTO_APPLY_MIGRATIONS=1
AUTO_COLLECTSTATIC=1
# PostgreSQL (primary and only main database)
POSTGRES_DB=hoopscout
POSTGRES_USER=hoopscout
POSTGRES_PASSWORD=hoopscout
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
# Redis / Celery
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_DB=0
CELERY_BROKER_URL=redis://redis:6379/0
CELERY_RESULT_BACKEND=redis://redis:6379/0
# Runtime behavior
AUTO_APPLY_MIGRATIONS=1
AUTO_COLLECTSTATIC=1
AUTO_BUILD_TAILWIND=1
GUNICORN_WORKERS=3
# Development container UID/GID for bind-mounted source write permissions.
# Development UID/GID for bind-mounted source write permissions
LOCAL_UID=1000
LOCAL_GID=1000
# Production-minded security toggles
DJANGO_SECURE_SSL_REDIRECT=1
DJANGO_SECURE_HSTS_SECONDS=31536000
DJANGO_SESSION_COOKIE_SAMESITE=Lax
DJANGO_CSRF_COOKIE_SAMESITE=Lax
# Snapshot storage (volume-backed directories)
SNAPSHOT_INCOMING_DIR=/app/snapshots/incoming
SNAPSHOT_ARCHIVE_DIR=/app/snapshots/archive
SNAPSHOT_FAILED_DIR=/app/snapshots/failed
# Mandatory production variables (example values):
# Future optional scheduler loop settings (not enabled in base v2 runtime)
SCHEDULER_ENABLED=0
SCHEDULER_INTERVAL_SECONDS=900
# API safeguards (read-only API is optional)
API_THROTTLE_ANON=100/hour
API_THROTTLE_USER=1000/hour
# Production profile reminders:
# DJANGO_SETTINGS_MODULE=config.settings.production
# DJANGO_ENV=production
# DJANGO_DEBUG=0
# DJANGO_SECRET_KEY=<strong-unique-secret-at-least-32-chars>
# DJANGO_ALLOWED_HOSTS=app.example.com
# DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com
# Providers / ingestion
PROVIDER_BACKEND=demo
PROVIDER_NAMESPACE_DEMO=mvp_demo
PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie
PROVIDER_DEFAULT_NAMESPACE=
PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json
PROVIDER_REQUEST_RETRIES=3
PROVIDER_REQUEST_RETRY_SLEEP=1
PROVIDER_HTTP_TIMEOUT_SECONDS=10
PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io
PROVIDER_BALLDONTLIE_API_KEY=
# NBA-centric MVP provider seasons to ingest (comma-separated years).
PROVIDER_BALLDONTLIE_SEASONS=2024
PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5
PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100
PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10
PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
# When 0, a 401 on stats endpoint degrades to players/teams-only sync.
PROVIDER_BALLDONTLIE_STATS_STRICT=0
CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500
INGESTION_SCHEDULE_ENABLED=0
# 5-field cron: minute hour day_of_month month day_of_week
# Example hourly: 0 * * * *
INGESTION_SCHEDULE_CRON=*/30 * * * *
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
INGESTION_SCHEDULE_JOB_TYPE=incremental
INGESTION_PREVENT_OVERLAP=1
INGESTION_OVERLAP_WINDOW_MINUTES=180
API_THROTTLE_ANON=100/hour
API_THROTTLE_USER=1000/hour
# Testing (used with pytest-django)
# Keep development settings for local tests unless explicitly validating production settings.
PYTEST_ADDOPTS=-q

View File

@ -1,137 +1,87 @@
# Contributing to HoopScout
# Contributing to HoopScout v2
This repository follows a pragmatic GitFlow model.
The goal is predictable releases with low process overhead.
HoopScout uses GitFlow and a pragmatic, production-minded workflow.
## Branch Roles
- `main`: production-only, always releasable
- `develop`: integration branch for upcoming release
- `feature/*`: feature work, branched from `develop`, merged into `develop`
- `release/*`: stabilization branch, branched from `develop`, merged into `main` and back into `develop`
- `hotfix/*`: urgent production fixes, branched from `main`, merged into `main` and back into `develop`
- `develop`: integration branch
- `feature/*`: feature branches from `develop`
- `release/*`: release hardening branches from `develop`
- `hotfix/*`: urgent production fixes from `main`
## Branch Naming Convention
Use lowercase kebab-case.
## Branch Naming
Use lowercase kebab-case:
- `feature/<scope>-<short-description>`
- `release/<major>.<minor>.<patch>`
- `hotfix/<scope>-<short-description>`
Examples:
- `feature/hoopscout-v2-static-architecture`
- `feature/v2-snapshot-import-command`
- `release/2.0.0`
- `hotfix/nginx-proxy-timeout`
- `feature/search-age-height-filters`
- `feature/providers-mvp-retry-logic`
- `release/0.2.0`
- `hotfix/redis-volume-permissions`
## v2 Development Runtime
The v2 default runtime is intentionally simple:
- `web`
- `postgres`
- `nginx`
No Redis/Celery runtime services in the default v2 foundation.
### Start dev stack
```bash
cp .env.example .env
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
```
### Start release-style stack
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
```
## Day-to-Day Feature Workflow
1. Sync `develop`.
1. Sync `develop`
```bash
git checkout develop
git pull origin develop
```
2. Create branch.
2. Create feature branch
```bash
git checkout -b feature/your-feature-name
```
3. Implement, test, commit in small logical steps.
3. Implement with focused commits and tests.
4. Open PR: `feature/*` -> `develop`.
4. Rebase or merge latest `develop` before PR if needed.
## PR Checklist
```bash
git checkout develop
git pull origin develop
git checkout feature/your-feature-name
git rebase develop
```
- [ ] Target branch is correct
- [ ] Scope is focused (no unrelated refactor)
- [ ] Runtime still starts with docker compose
- [ ] Tests updated/passing for changed scope
- [ ] Docs updated (`README.md`, `.env.example`, this file) when config/runtime changes
- [ ] No secrets committed
5. Open PR: `feature/*` -> `develop`.
## v2 Foundation Rules
## Recommended Release Workflow
1. Create release branch from `develop`.
```bash
git checkout develop
git pull origin develop
git checkout -b release/0.1.0
```
2. On `release/*` allow only:
- bug fixes
- docs/changelog updates
- release metadata/version updates
3. Validate release candidate in Docker.
```bash
docker compose up -d --build
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
```
4. Merge `release/*` into `main`.
5. Tag release on `main` (`v0.1.0`).
6. Merge the same `release/*` back into `develop`.
7. Delete release branch after both merges.
## Recommended Hotfix Workflow
1. Create hotfix branch from `main`.
```bash
git checkout main
git pull origin main
git checkout -b hotfix/your-hotfix-name
```
2. Implement minimal fix and tests.
3. Open PR: `hotfix/*` -> `main`.
4. After merge to `main`, back-merge to `develop`.
5. Tag patch release (`vX.Y.Z`).
## Pull Request Checklist
Before requesting review, confirm:
- [ ] Branch target is correct (`develop`, `main`, or release back-merge)
- [ ] Scope is focused (no unrelated refactors)
- [ ] Docker stack still starts (`docker compose up -d`)
- [ ] Tests updated and passing
- [ ] Migrations included if models changed
- [ ] Docs updated (`README`, `CONTRIBUTING`, `.env.example`) when needed
- [ ] No secrets or credentials committed
- [ ] Changelog entry added under `Unreleased`
## Issue and Feature Templates
Use repository templates in `.github/ISSUE_TEMPLATE/`:
- `bug_report.md`
- `feature_request.md`
Use `.github/PULL_REQUEST_TEMPLATE.md` for PR descriptions.
## Changelog / Release Note Convention
- Single changelog file: `CHANGELOG.md`
- Keep `Unreleased` at top
- Categorize entries under:
- `Added`
- `Changed`
- `Fixed`
- Release format:
- `## [0.1.0] - 2026-03-10`
- Prefer management commands over distributed orchestration unless clearly justified.
- Keep PostgreSQL as source of truth.
- Keep snapshot storage file-based and volume-backed.
- Do not introduce MongoDB or Elasticsearch as source of truth.
## Repository Bootstrap Commands
Maintainers should run these once to start GitFlow from current `main`:
If `develop` is missing in a clone:
```bash
git checkout main
@ -139,39 +89,3 @@ git pull origin main
git checkout -b develop
git push -u origin develop
```
Then start regular feature work:
```bash
git checkout develop
git pull origin develop
git checkout -b feature/first-team-task
```
## Local Development Setup
```bash
cp .env.example .env
docker compose up --build
```
If needed:
```bash
docker compose exec web python manage.py migrate
docker compose exec web python manage.py createsuperuser
```
## Testing Commands
Run full suite:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
```
Run targeted modules while developing:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_players_views.py'
```

View File

@ -32,23 +32,19 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends libpq5 postgresql-client curl nodejs npm \
&& apt-get install -y --no-install-recommends libpq5 postgresql-client curl \
&& rm -rf /var/lib/apt/lists/*
RUN groupadd --gid "${APP_GID}" "${APP_USER}" \
&& useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}"
RUN printf '%s\n' 'export PATH="/opt/venv/bin:/home/app/.local/bin:$PATH"' > /etc/profile.d/hoopscout-path.sh
COPY --from=builder /opt/venv /opt/venv
COPY . /app
RUN if [ -f package.json ]; then npm install --no-audit --no-fund; fi
RUN if [ -f package.json ]; then npm run build; fi
RUN chmod +x /app/entrypoint.sh
RUN mkdir -p /app/staticfiles /app/media /app/runtime /app/node_modules /app/static/vendor \
RUN chmod +x /app/entrypoint.sh \
&& mkdir -p /app/staticfiles /app/media /app/snapshots/incoming /app/snapshots/archive /app/snapshots/failed \
&& chown -R "${APP_UID}:${APP_GID}" /app /opt/venv
USER ${APP_UID}:${APP_GID}
ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000"]
CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "3", "--access-logfile", "-", "--error-logfile", "-"]

461
README.md
View File

@ -1,422 +1,121 @@
# HoopScout
# HoopScout v2 (Foundation Reset)
HoopScout is a production-minded basketball scouting and player search platform.
The main product experience is server-rendered Django Templates with HTMX enhancements.
A minimal read-only API is included as a secondary integration surface.
HoopScout v2 is a controlled greenfield rebuild inside the existing repository.
## Core Stack
Current v2 foundation scope in this branch:
- Django + HTMX server-rendered app
- PostgreSQL as the only primary database
- nginx reverse proxy
- management-command-driven runtime operations
- static snapshot directories persisted via Docker named volumes
- Python 3.12+
- Django
- Django Templates + HTMX
- Tailwind CSS (CLI build pipeline)
- PostgreSQL
- Redis
- Celery + Celery Beat
- Django REST Framework (read-only API)
- pytest
- Docker / Docker Compose
- nginx
Out of scope in this step:
- domain model redesign
- snapshot importer implementation
- extractor implementation
## Architecture Summary
## Runtime Architecture (v2)
- Main UI: Django + HTMX (not SPA)
- Data layer: normalized domain models for players, seasons, competitions, teams, stats, scouting state
- Provider integration: adapter-based abstraction in `apps/providers`
- Ingestion orchestration: `apps/ingestion` with run/error logs and Celery task execution
- Optional API: read-only DRF endpoints under `/api/`
Runtime services are intentionally small:
- `web` (Django/Gunicorn)
- `postgres` (primary DB)
- `nginx` (reverse proxy + static/media serving)
## Repository Structure
No Redis/Celery services are part of the v2 default runtime topology.
Legacy Celery/provider code is still in repository history/codebase but de-emphasized for v2.
```text
.
├── apps/
│ ├── api/
│ ├── competitions/
│ ├── core/
│ ├── ingestion/
│ ├── players/
│ ├── providers/
│ ├── scouting/
│ ├── stats/
│ ├── teams/
│ └── users/
├── config/
│ └── settings/
├── docs/
├── nginx/
├── requirements/
├── package.json
├── tailwind.config.js
├── static/
├── templates/
├── tests/
├── .github/
├── CHANGELOG.md
├── docker-compose.yml
├── Dockerfile
└── entrypoint.sh
```
## Image Strategy
## Quick Start
Compose builds and tags images as:
- `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}`
- `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}`
1. Create local env file:
Reserved for future optional scheduler use:
- `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}`
## Entrypoint Strategy
- `web`: `entrypoint.sh`
- waits for PostgreSQL
- optionally runs migrations/collectstatic
- ensures snapshot directories exist
- `nginx`: `nginx/entrypoint.sh`
- simple runtime entrypoint wrapper
## Compose Files
- `docker-compose.yml`: production-minded baseline runtime (immutable image filesystem)
- `docker-compose.dev.yml`: development override with source bind mount for `web`
- `docker-compose.release.yml`: production settings override (`DJANGO_SETTINGS_MODULE=config.settings.production`)
### Start development runtime
```bash
cp .env.example .env
```
2. Build and run services:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
```
This starts the development-oriented topology (source bind mounts enabled).
In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs).
3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually:
```bash
docker compose exec web python manage.py migrate
```
4. Create a superuser:
```bash
docker compose exec web python manage.py createsuperuser
```
5. Open the app:
- Web: http://localhost
- Admin: http://localhost/admin/
- Health: http://localhost/health/
- API root endpoints: `/api/players/`, `/api/competitions/`, `/api/teams/`, `/api/seasons/`
## Development vs Release Compose
Base compose (`docker-compose.yml`) is release-oriented and immutable for runtime services.
Development mutability is enabled via `docker-compose.dev.yml`.
Development startup (mutable source bind mounts for `web`/`celery_*`):
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build
```
Development startup with Tailwind watch:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build
```
Release-style startup (immutable runtime services):
### Start release-style runtime
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build
```
Optional release-style stop:
## Named Volumes
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml down
```
v2 runtime uses named volumes for persistence:
- `postgres_data`
- `static_data`
- `media_data`
- `snapshots_incoming`
- `snapshots_archive`
- `snapshots_failed`
Notes:
Development override uses separate dev-prefixed volumes to avoid ownership collisions.
- In release-style mode, `web`, `celery_worker`, and `celery_beat` run from built image filesystem with no repository source bind mount.
- In development mode (with `docker-compose.dev.yml`), `web`, `celery_worker`, and `celery_beat` are mutable and bind-mount `.:/app`.
- `tailwind` is a dev-profile service and is not required for release runtime.
- `nginx`, `postgres`, and `redis` service naming remains unchanged.
- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`.
## Environment Variables
## Release Topology Verification
Use `.env.example` as the source of truth.
Inspect merged release config:
Core groups:
- Django runtime/security vars
- PostgreSQL connection vars
- image tag vars (`APP_IMAGE_TAG`, `NGINX_IMAGE_TAG`)
- snapshot directory vars (`SNAPSHOT_*`)
- optional future scheduler vars (`SCHEDULER_*`)
```bash
docker compose -f docker-compose.yml -f docker-compose.release.yml config
```
## Snapshot Storage Convention
What to verify:
Snapshot files are expected under:
- incoming: `/app/snapshots/incoming`
- archive: `/app/snapshots/archive`
- failed: `/app/snapshots/failed`
- `services.web.volumes` does not include a bind mount from repository path to `/app`
- `services.celery_worker.volumes` does not include a bind mount from repository path to `/app`
- `services.celery_beat.volumes` does not include a bind mount from repository path to `/app`
- persistent named volumes still exist for `postgres_data`, `static_data`, `media_data`, `runtime_data`, and `redis_data`
In this foundation step, directories are created and persisted but no importer/extractor is implemented yet.
Automated local/CI-friendly check:
```bash
./scripts/verify_release_topology.sh
```
## Setup and Run Notes
- `web` service starts through `entrypoint.sh` and waits for PostgreSQL readiness.
- `web` service also builds Tailwind CSS before `collectstatic` when `AUTO_BUILD_TAILWIND=1`.
- `web`, `celery_worker`, `celery_beat`, and `tailwind` run as a non-root user inside the image.
- `celery_worker` executes background sync work.
- `celery_beat` triggers periodic provider sync (`apps.ingestion.tasks.scheduled_provider_sync`).
- `tailwind` service runs watch mode for development (`npm run dev`).
- nginx proxies web traffic and serves static/media volume mounts.
## Search Consistency Notes
- The server-rendered player search page (`/players/`) and read-only players API (`/api/players/`) use the same search form and ORM filter service.
- Sorting/filter semantics are aligned across UI, HTMX partial refreshes, and API responses.
- Search result metrics in the UI table use **best eligible semantics**:
- each metric (Games, MPG, PPG, RPG, APG) is the maximum value across eligible player-season rows
- eligibility is scoped by the active season/team/competition/stat filters
- different displayed metrics for one player can come from different eligible rows
- Metric-based API sorting (`ppg_*`, `mpg_*`) uses the same best-eligible semantics as UI search.
## Docker Volumes and Persistence
`docker-compose.yml` uses named volumes:
- `postgres_data`: PostgreSQL persistent database
- `static_data`: collected static assets
- `media_data`: user/provider media artifacts
- `runtime_data`: app runtime files (e.g., celery beat schedule)
- `redis_data`: Redis persistence (`/data` for RDB/AOF files)
- `node_modules_data`: Node modules cache for Tailwind builds in development override
This keeps persistent state outside container lifecycles.
In release-style mode, these volumes remain the persistence layer:
- `postgres_data` for database state
- `static_data` for collected static assets served by nginx
- `media_data` for uploaded/provider media
- `runtime_data` for Celery beat schedule/runtime files
- `redis_data` for Redis persistence
## Migrations
Create migration files:
```bash
docker compose exec web python manage.py makemigrations
```
Apply migrations:
## Migration and Superuser Commands
```bash
docker compose exec web python manage.py migrate
```
## Testing
Run all tests:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q'
```
Run a focused module:
```bash
docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_api.py'
```
## Frontend Assets (Tailwind)
Build Tailwind once:
```bash
docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build'
```
If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once:
```bash
sudo chown -R "$(id -u):$(id -g)" static
```
Run Tailwind in watch mode during development:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up tailwind
```
Source CSS lives in `static/src/tailwind.css` and compiles to `static/css/main.css`.
HTMX is served from local static assets (`static/vendor/htmx.min.js`) instead of a CDN dependency.
## Production Configuration
Use production settings in deployed environments:
```bash
DJANGO_SETTINGS_MODULE=config.settings.production
DJANGO_DEBUG=0
DJANGO_ENV=production
```
When `DJANGO_DEBUG=0`, startup fails fast unless:
- `DJANGO_SECRET_KEY` is a real non-default value
- `DJANGO_ALLOWED_HOSTS` is set
- `DJANGO_CSRF_TRUSTED_ORIGINS` is set (for production settings)
Additional production safety checks:
- `DJANGO_SECRET_KEY` must be strong and non-default in non-development environments
- `DJANGO_ALLOWED_HOSTS` must not contain localhost-style values
- `DJANGO_CSRF_TRUSTED_ORIGINS` must be explicit HTTPS origins only (no localhost/http)
Production settings enable hardened defaults such as:
- secure cookies
- HSTS
- security headers
- `ManifestStaticFilesStorage` for static asset integrity/versioning
### Production Configuration Checklist
- `DJANGO_SETTINGS_MODULE=config.settings.production`
- `DJANGO_ENV=production`
- `DJANGO_DEBUG=0`
- strong `DJANGO_SECRET_KEY` (unique, non-default, >= 32 chars)
- explicit `DJANGO_ALLOWED_HOSTS` (no localhost values)
- explicit `DJANGO_CSRF_TRUSTED_ORIGINS` with HTTPS origins only
- `DJANGO_SECURE_SSL_REDIRECT=1` and `DJANGO_SECURE_HSTS_SECONDS` set appropriately
## Superuser and Auth
Create superuser:
```bash
docker compose exec web python manage.py createsuperuser
```
Default auth routes:
## Health Endpoints
- Signup: `/users/signup/`
- Login: `/users/login/`
- Logout: `/users/logout/`
- app health: `/health/`
- nginx healthcheck proxies `/health/` to `web`
## Ingestion and Manual Sync
## GitFlow
### Trigger via Django Admin
Required branch model:
- `main`: production
- `develop`: integration
- `feature/*`, `release/*`, `hotfix/*`
- Open `/admin/` -> `IngestionRun`
- Use admin actions:
- `Queue full sync (default provider)`
- `Queue incremental sync (default provider)`
- `Retry selected ingestion runs`
This v2 work branch is:
- `feature/hoopscout-v2-static-architecture`
### Trigger from shell (manual)
## Notes on Legacy Layers
```bash
docker compose exec web python manage.py shell
```
```python
from apps.ingestion.tasks import trigger_full_sync
trigger_full_sync.delay(provider_namespace="balldontlie")
```
### Logs and diagnostics
- Run-level status/counters: `IngestionRun`
- Structured error records: `IngestionError`
- Provider entity mappings + diagnostic payload snippets: `ExternalMapping`
- `IngestionRun.error_summary` captures top-level failure/partial-failure context
### Scheduled sync via Celery Beat
Configure scheduled sync through environment variables:
- `INGESTION_SCHEDULE_ENABLED` (`0`/`1`)
- `INGESTION_SCHEDULE_CRON` (5-field cron expression, default `*/30 * * * *`)
- `INGESTION_SCHEDULE_PROVIDER_NAMESPACE` (optional; falls back to default provider namespace)
- `INGESTION_SCHEDULE_JOB_TYPE` (`incremental` or `full_sync`)
- `INGESTION_PREVENT_OVERLAP` (`0`/`1`) to skip obvious overlapping runs
- `INGESTION_OVERLAP_WINDOW_MINUTES` overlap guard window
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
Valid cron examples:
- `*/30 * * * *` every 30 minutes
- `0 * * * *` hourly
- `15 2 * * *` daily at 02:15
Failure behavior for invalid cron values:
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
- periodic ingestion task is disabled until cron is fixed
- an error is logged at startup indicating the invalid schedule value
## Provider Backend Selection
Provider backend is selected via environment variables:
- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`)
- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`)
- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly
The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP).
The adapter follows the published balldontlie OpenAPI contract: server `https://api.balldontlie.io`, NBA endpoints under `/nba/v1/*`, cursor pagination via `meta.next_cursor`, and `stats` ingestion filtered by `seasons[]`.
Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized.
Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md).
## GitFlow Workflow
GitFlow is required in this repository:
- `main`: production branch
- `develop`: integration branch
- `feature/*`: new feature branches from `develop`
- `release/*`: release hardening branches from `develop`
- `hotfix/*`: urgent production fixes from `main`
Read full details in [CONTRIBUTING.md](CONTRIBUTING.md) and [docs/workflow.md](docs/workflow.md).
### Repository Bootstrap Commands
Run these from the current `main` branch to initialize local GitFlow usage:
```bash
git checkout main
git pull origin main
git checkout -b develop
git push -u origin develop
```
Start a feature branch:
```bash
git checkout develop
git pull origin develop
git checkout -b feature/player-search-tuning
```
Start a release branch:
```bash
git checkout develop
git pull origin develop
git checkout -b release/0.1.0
```
Start a hotfix branch:
```bash
git checkout main
git pull origin main
git checkout -b hotfix/fix-redis-persistence
```
## Release Notes / Changelog Convention
- Use [CHANGELOG.md](CHANGELOG.md) with an `Unreleased` section.
- For each merged PR, add short entries under:
- `Added`
- `Changed`
- `Fixed`
- On release, move `Unreleased` items to a dated version section (`[x.y.z] - YYYY-MM-DD`).
Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation.
They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks.

View File

@ -1,3 +1,8 @@
from .celery import app as celery_app
"""
HoopScout v2 runtime package.
__all__ = ("celery_app",)
Celery is intentionally not auto-loaded at import time in v2 foundation runtime.
Legacy task modules remain in-repo and can be loaded explicitly if needed.
"""
__all__ = ()

View File

@ -28,12 +28,12 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
def build_periodic_schedule() -> dict:
if not settings.INGESTION_SCHEDULE_ENABLED:
if not getattr(settings, "INGESTION_SCHEDULE_ENABLED", False):
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
return {}
try:
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
schedule_kwargs = _parse_cron_expression(getattr(settings, "INGESTION_SCHEDULE_CRON", "*/30 * * * *"))
return {
"ingestion.scheduled_provider_sync": {
"task": "apps.ingestion.tasks.scheduled_provider_sync",
@ -44,7 +44,7 @@ def build_periodic_schedule() -> dict:
logger.error(
"Invalid periodic ingestion schedule config. Task disabled. "
"INGESTION_SCHEDULE_CRON=%r error=%s",
settings.INGESTION_SCHEDULE_CRON,
getattr(settings, "INGESTION_SCHEDULE_CRON", ""),
exc,
)
return {}

View File

@ -142,23 +142,16 @@ LOGIN_URL = "users:login"
LOGIN_REDIRECT_URL = "core:dashboard"
LOGOUT_REDIRECT_URL = "core:home"
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
CELERY_ACCEPT_CONTENT = ["json"]
CELERY_TASK_SERIALIZER = "json"
CELERY_RESULT_SERIALIZER = "json"
CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
INGESTION_SCHEDULE_ENABLED = env_bool("INGESTION_SCHEDULE_ENABLED", False)
INGESTION_SCHEDULE_CRON = os.getenv("INGESTION_SCHEDULE_CRON", "*/30 * * * *").strip()
INGESTION_SCHEDULE_PROVIDER_NAMESPACE = os.getenv("INGESTION_SCHEDULE_PROVIDER_NAMESPACE", "").strip()
INGESTION_SCHEDULE_JOB_TYPE = os.getenv("INGESTION_SCHEDULE_JOB_TYPE", "incremental").strip().lower()
INGESTION_PREVENT_OVERLAP = env_bool("INGESTION_PREVENT_OVERLAP", True)
INGESTION_OVERLAP_WINDOW_MINUTES = int(os.getenv("INGESTION_OVERLAP_WINDOW_MINUTES", "180"))
# HoopScout v2 snapshot storage (volume-backed directories).
SNAPSHOT_INCOMING_DIR = os.getenv("SNAPSHOT_INCOMING_DIR", str(BASE_DIR / "snapshots" / "incoming"))
SNAPSHOT_ARCHIVE_DIR = os.getenv("SNAPSHOT_ARCHIVE_DIR", str(BASE_DIR / "snapshots" / "archive"))
SNAPSHOT_FAILED_DIR = os.getenv("SNAPSHOT_FAILED_DIR", str(BASE_DIR / "snapshots" / "failed"))
if INGESTION_SCHEDULE_JOB_TYPE not in {"incremental", "full_sync"}:
raise ImproperlyConfigured("INGESTION_SCHEDULE_JOB_TYPE must be either 'incremental' or 'full_sync'.")
# Optional scheduler command settings for future v2 snapshot jobs.
SCHEDULER_ENABLED = env_bool("SCHEDULER_ENABLED", False)
SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))
if SCHEDULER_INTERVAL_SECONDS < 30:
raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.")
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")

View File

@ -3,25 +3,20 @@ services:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- node_modules_data:/app/node_modules
- static_data:/app/staticfiles
- media_data:/app/media
- runtime_data:/app/runtime
- static_data_dev:/app/staticfiles
- media_data_dev:/app/media
- snapshots_incoming_dev:/app/snapshots/incoming
- snapshots_archive_dev:/app/snapshots/archive
- snapshots_failed_dev:/app/snapshots/failed
celery_worker:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
nginx:
volumes:
- .:/app
- runtime_data:/app/runtime
- static_data_dev:/var/www/static:ro
- media_data_dev:/var/www/media:ro
celery_beat:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- runtime_data:/app/runtime
tailwind:
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- .:/app
- node_modules_data:/app/node_modules
volumes:
static_data_dev:
media_data_dev:
snapshots_incoming_dev:
snapshots_archive_dev:
snapshots_failed_dev:

View File

@ -2,14 +2,5 @@ services:
web:
environment:
DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_DEBUG: "0"
celery_worker:
environment:
DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_DEBUG: "0"
celery_beat:
environment:
DJANGO_SETTINGS_MODULE: config.settings.production
DJANGO_ENV: production
DJANGO_DEBUG: "0"

View File

@ -1,13 +1,43 @@
services:
web:
image: registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
depends_on:
postgres:
condition: service_healthy
user: "10001:10001"
volumes:
- static_data:/app/staticfiles
- media_data:/app/media
- snapshots_incoming:/app/snapshots/incoming
- snapshots_archive:/app/snapshots/archive
- snapshots_failed:/app/snapshots/failed
expose:
- "8000"
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/health/ || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 25s
restart: unless-stopped
nginx:
image: nginx:1.27-alpine
image: registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}
build:
context: .
dockerfile: nginx/Dockerfile
depends_on:
web:
condition: service_healthy
ports:
- "80:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- static_data:/var/www/static:ro
- media_data:/var/www/media:ro
read_only: true
@ -22,91 +52,6 @@ services:
start_period: 10s
restart: unless-stopped
web:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile -
user: "10001:10001"
volumes:
- static_data:/app/staticfiles
- media_data:/app/media
- runtime_data:/app/runtime
expose:
- "8000"
healthcheck:
test: ["CMD-SHELL", "curl -f http://127.0.0.1:8000/health/ || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 20s
restart: unless-stopped
tailwind:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
command: npm run dev
user: "10001:10001"
profiles:
- dev
restart: unless-stopped
celery_worker:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: celery -A config worker -l info
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
healthcheck:
test: ["CMD-SHELL", "celery -A config inspect ping -d celery@$$HOSTNAME | grep -q pong || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: unless-stopped
celery_beat:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule
user: "10001:10001"
volumes:
- runtime_data:/app/runtime
healthcheck:
test: ["CMD-SHELL", "test -f /app/runtime/celerybeat-schedule || exit 1"]
interval: 30s
timeout: 5s
retries: 10
start_period: 20s
restart: unless-stopped
postgres:
image: postgres:16-alpine
environment:
@ -122,22 +67,10 @@ services:
retries: 5
restart: unless-stopped
redis:
image: redis:7-alpine
command: redis-server --save 60 1 --loglevel warning
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
volumes:
postgres_data:
static_data:
media_data:
runtime_data:
redis_data:
node_modules_data:
snapshots_incoming:
snapshots_archive:
snapshots_failed:

View File

@ -8,6 +8,10 @@ done
echo "PostgreSQL is available."
mkdir -p "${SNAPSHOT_INCOMING_DIR:-/app/snapshots/incoming}" \
"${SNAPSHOT_ARCHIVE_DIR:-/app/snapshots/archive}" \
"${SNAPSHOT_FAILED_DIR:-/app/snapshots/failed}"
if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then
echo "Running Django deployment checks..."
python manage.py check --deploy --fail-level WARNING
@ -19,15 +23,6 @@ if [ "${AUTO_APPLY_MIGRATIONS:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
fi
if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then
if [ "${AUTO_BUILD_TAILWIND:-1}" = "1" ] && [ -f /app/package.json ]; then
if [ -x /app/node_modules/.bin/tailwindcss ]; then
echo "Building Tailwind assets..."
npm run build
else
echo "Tailwind dependencies missing; skipping AUTO_BUILD_TAILWIND."
fi
fi
echo "Collecting static files..."
python manage.py collectstatic --noinput
fi

8
nginx/Dockerfile Normal file
View File

@ -0,0 +1,8 @@
FROM nginx:1.27-alpine
COPY nginx/nginx.conf /etc/nginx/nginx.conf
COPY nginx/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["nginx", "-g", "daemon off;"]

4
nginx/entrypoint.sh Normal file
View File

@ -0,0 +1,4 @@
#!/bin/sh
set -e
exec "$@"