diff --git a/.env.example b/.env.example index 0ca5098..b3a1dbb 100644 --- a/.env.example +++ b/.env.example @@ -1,87 +1,53 @@ -# Django +# HoopScout v2 runtime profile DJANGO_SETTINGS_MODULE=config.settings.development DJANGO_ENV=development -# Required to be a strong, unique value outside development. -DJANGO_SECRET_KEY=change-me-in-production DJANGO_DEBUG=1 +DJANGO_SECRET_KEY=change-me-in-production DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1 DJANGO_CSRF_TRUSTED_ORIGINS=http://localhost,http://127.0.0.1 DJANGO_TIME_ZONE=UTC DJANGO_LOG_LEVEL=INFO DJANGO_LOG_SQL=0 -DJANGO_SUPERUSER_USERNAME=admin -DJANGO_SUPERUSER_EMAIL=admin@example.com -DJANGO_SUPERUSER_PASSWORD=adminpass -# Database (PostgreSQL only) +# Container image tags +APP_IMAGE_TAG=latest +NGINX_IMAGE_TAG=latest +# Reserved for future optional scheduler image: +# SCHEDULER_IMAGE_TAG=latest + +# Web runtime behavior +GUNICORN_WORKERS=3 +AUTO_APPLY_MIGRATIONS=1 +AUTO_COLLECTSTATIC=1 + +# PostgreSQL (primary and only main database) POSTGRES_DB=hoopscout POSTGRES_USER=hoopscout POSTGRES_PASSWORD=hoopscout POSTGRES_HOST=postgres POSTGRES_PORT=5432 -# Redis / Celery -REDIS_HOST=redis -REDIS_PORT=6379 -REDIS_DB=0 -CELERY_BROKER_URL=redis://redis:6379/0 -CELERY_RESULT_BACKEND=redis://redis:6379/0 - -# Runtime behavior -AUTO_APPLY_MIGRATIONS=1 -AUTO_COLLECTSTATIC=1 -AUTO_BUILD_TAILWIND=1 -GUNICORN_WORKERS=3 -# Development container UID/GID for bind-mounted source write permissions. +# Development UID/GID for bind-mounted source write permissions LOCAL_UID=1000 LOCAL_GID=1000 -# Production-minded security toggles -DJANGO_SECURE_SSL_REDIRECT=1 -DJANGO_SECURE_HSTS_SECONDS=31536000 -DJANGO_SESSION_COOKIE_SAMESITE=Lax -DJANGO_CSRF_COOKIE_SAMESITE=Lax +# Snapshot storage (volume-backed directories) +SNAPSHOT_INCOMING_DIR=/app/snapshots/incoming +SNAPSHOT_ARCHIVE_DIR=/app/snapshots/archive +SNAPSHOT_FAILED_DIR=/app/snapshots/failed -# Mandatory production variables (example values): +# Future optional scheduler loop settings (not enabled in base v2 runtime) +SCHEDULER_ENABLED=0 +SCHEDULER_INTERVAL_SECONDS=900 + +# API safeguards (read-only API is optional) +API_THROTTLE_ANON=100/hour +API_THROTTLE_USER=1000/hour + +# Production profile reminders: # DJANGO_SETTINGS_MODULE=config.settings.production # DJANGO_ENV=production # DJANGO_DEBUG=0 # DJANGO_SECRET_KEY= # DJANGO_ALLOWED_HOSTS=app.example.com # DJANGO_CSRF_TRUSTED_ORIGINS=https://app.example.com - -# Providers / ingestion -PROVIDER_BACKEND=demo -PROVIDER_NAMESPACE_DEMO=mvp_demo -PROVIDER_NAMESPACE_BALLDONTLIE=balldontlie -PROVIDER_DEFAULT_NAMESPACE= -PROVIDER_MVP_DATA_FILE=/app/apps/providers/data/mvp_provider.json -PROVIDER_REQUEST_RETRIES=3 -PROVIDER_REQUEST_RETRY_SLEEP=1 -PROVIDER_HTTP_TIMEOUT_SECONDS=10 -PROVIDER_BALLDONTLIE_BASE_URL=https://api.balldontlie.io -PROVIDER_BALLDONTLIE_API_KEY= -# NBA-centric MVP provider seasons to ingest (comma-separated years). -PROVIDER_BALLDONTLIE_SEASONS=2024 -PROVIDER_BALLDONTLIE_PLAYERS_PAGE_LIMIT=5 -PROVIDER_BALLDONTLIE_PLAYERS_PER_PAGE=100 -PROVIDER_BALLDONTLIE_STATS_PAGE_LIMIT=10 -PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100 -# When 0, a 401 on stats endpoint degrades to players/teams-only sync. -PROVIDER_BALLDONTLIE_STATS_STRICT=0 -CELERY_TASK_TIME_LIMIT=1800 -CELERY_TASK_SOFT_TIME_LIMIT=1500 -INGESTION_SCHEDULE_ENABLED=0 -# 5-field cron: minute hour day_of_month month day_of_week -# Example hourly: 0 * * * * -INGESTION_SCHEDULE_CRON=*/30 * * * * -INGESTION_SCHEDULE_PROVIDER_NAMESPACE= -INGESTION_SCHEDULE_JOB_TYPE=incremental -INGESTION_PREVENT_OVERLAP=1 -INGESTION_OVERLAP_WINDOW_MINUTES=180 -API_THROTTLE_ANON=100/hour -API_THROTTLE_USER=1000/hour - -# Testing (used with pytest-django) -# Keep development settings for local tests unless explicitly validating production settings. -PYTEST_ADDOPTS=-q diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cece440..1b803bb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,137 +1,87 @@ -# Contributing to HoopScout +# Contributing to HoopScout v2 -This repository follows a pragmatic GitFlow model. -The goal is predictable releases with low process overhead. +HoopScout uses GitFlow and a pragmatic, production-minded workflow. ## Branch Roles - `main`: production-only, always releasable -- `develop`: integration branch for upcoming release -- `feature/*`: feature work, branched from `develop`, merged into `develop` -- `release/*`: stabilization branch, branched from `develop`, merged into `main` and back into `develop` -- `hotfix/*`: urgent production fixes, branched from `main`, merged into `main` and back into `develop` +- `develop`: integration branch +- `feature/*`: feature branches from `develop` +- `release/*`: release hardening branches from `develop` +- `hotfix/*`: urgent production fixes from `main` -## Branch Naming Convention - -Use lowercase kebab-case. +## Branch Naming +Use lowercase kebab-case: - `feature/-` - `release/..` - `hotfix/-` Examples: +- `feature/hoopscout-v2-static-architecture` +- `feature/v2-snapshot-import-command` +- `release/2.0.0` +- `hotfix/nginx-proxy-timeout` -- `feature/search-age-height-filters` -- `feature/providers-mvp-retry-logic` -- `release/0.2.0` -- `hotfix/redis-volume-permissions` +## v2 Development Runtime + +The v2 default runtime is intentionally simple: +- `web` +- `postgres` +- `nginx` + +No Redis/Celery runtime services in the default v2 foundation. + +### Start dev stack + +```bash +cp .env.example .env +docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build +``` + +### Start release-style stack + +```bash +docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build +``` ## Day-to-Day Feature Workflow -1. Sync `develop`. +1. Sync `develop` ```bash git checkout develop git pull origin develop ``` -2. Create branch. +2. Create feature branch ```bash git checkout -b feature/your-feature-name ``` -3. Implement, test, commit in small logical steps. +3. Implement with focused commits and tests. +4. Open PR: `feature/*` -> `develop`. -4. Rebase or merge latest `develop` before PR if needed. +## PR Checklist -```bash -git checkout develop -git pull origin develop -git checkout feature/your-feature-name -git rebase develop -``` +- [ ] Target branch is correct +- [ ] Scope is focused (no unrelated refactor) +- [ ] Runtime still starts with docker compose +- [ ] Tests updated/passing for changed scope +- [ ] Docs updated (`README.md`, `.env.example`, this file) when config/runtime changes +- [ ] No secrets committed -5. Open PR: `feature/*` -> `develop`. +## v2 Foundation Rules -## Recommended Release Workflow - -1. Create release branch from `develop`. - -```bash -git checkout develop -git pull origin develop -git checkout -b release/0.1.0 -``` - -2. On `release/*` allow only: -- bug fixes -- docs/changelog updates -- release metadata/version updates - -3. Validate release candidate in Docker. - -```bash -docker compose up -d --build -docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q' -``` - -4. Merge `release/*` into `main`. -5. Tag release on `main` (`v0.1.0`). -6. Merge the same `release/*` back into `develop`. -7. Delete release branch after both merges. - -## Recommended Hotfix Workflow - -1. Create hotfix branch from `main`. - -```bash -git checkout main -git pull origin main -git checkout -b hotfix/your-hotfix-name -``` - -2. Implement minimal fix and tests. -3. Open PR: `hotfix/*` -> `main`. -4. After merge to `main`, back-merge to `develop`. -5. Tag patch release (`vX.Y.Z`). - -## Pull Request Checklist - -Before requesting review, confirm: - -- [ ] Branch target is correct (`develop`, `main`, or release back-merge) -- [ ] Scope is focused (no unrelated refactors) -- [ ] Docker stack still starts (`docker compose up -d`) -- [ ] Tests updated and passing -- [ ] Migrations included if models changed -- [ ] Docs updated (`README`, `CONTRIBUTING`, `.env.example`) when needed -- [ ] No secrets or credentials committed -- [ ] Changelog entry added under `Unreleased` - -## Issue and Feature Templates - -Use repository templates in `.github/ISSUE_TEMPLATE/`: - -- `bug_report.md` -- `feature_request.md` - -Use `.github/PULL_REQUEST_TEMPLATE.md` for PR descriptions. - -## Changelog / Release Note Convention - -- Single changelog file: `CHANGELOG.md` -- Keep `Unreleased` at top -- Categorize entries under: - - `Added` - - `Changed` - - `Fixed` -- Release format: - - `## [0.1.0] - 2026-03-10` +- Prefer management commands over distributed orchestration unless clearly justified. +- Keep PostgreSQL as source of truth. +- Keep snapshot storage file-based and volume-backed. +- Do not introduce MongoDB or Elasticsearch as source of truth. ## Repository Bootstrap Commands -Maintainers should run these once to start GitFlow from current `main`: +If `develop` is missing in a clone: ```bash git checkout main @@ -139,39 +89,3 @@ git pull origin main git checkout -b develop git push -u origin develop ``` - -Then start regular feature work: - -```bash -git checkout develop -git pull origin develop -git checkout -b feature/first-team-task -``` - -## Local Development Setup - -```bash -cp .env.example .env -docker compose up --build -``` - -If needed: - -```bash -docker compose exec web python manage.py migrate -docker compose exec web python manage.py createsuperuser -``` - -## Testing Commands - -Run full suite: - -```bash -docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q' -``` - -Run targeted modules while developing: - -```bash -docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_players_views.py' -``` diff --git a/Dockerfile b/Dockerfile index 5f74b26..d28329c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,23 +32,19 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app RUN apt-get update \ - && apt-get install -y --no-install-recommends libpq5 postgresql-client curl nodejs npm \ + && apt-get install -y --no-install-recommends libpq5 postgresql-client curl \ && rm -rf /var/lib/apt/lists/* RUN groupadd --gid "${APP_GID}" "${APP_USER}" \ && useradd --uid "${APP_UID}" --gid "${APP_GID}" --create-home --shell /usr/sbin/nologin "${APP_USER}" -RUN printf '%s\n' 'export PATH="/opt/venv/bin:/home/app/.local/bin:$PATH"' > /etc/profile.d/hoopscout-path.sh COPY --from=builder /opt/venv /opt/venv COPY . /app -RUN if [ -f package.json ]; then npm install --no-audit --no-fund; fi -RUN if [ -f package.json ]; then npm run build; fi - -RUN chmod +x /app/entrypoint.sh -RUN mkdir -p /app/staticfiles /app/media /app/runtime /app/node_modules /app/static/vendor \ +RUN chmod +x /app/entrypoint.sh \ + && mkdir -p /app/staticfiles /app/media /app/snapshots/incoming /app/snapshots/archive /app/snapshots/failed \ && chown -R "${APP_UID}:${APP_GID}" /app /opt/venv USER ${APP_UID}:${APP_GID} ENTRYPOINT ["/app/entrypoint.sh"] -CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000"] +CMD ["gunicorn", "config.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "3", "--access-logfile", "-", "--error-logfile", "-"] diff --git a/README.md b/README.md index b2a3a50..188f668 100644 --- a/README.md +++ b/README.md @@ -1,422 +1,121 @@ -# HoopScout +# HoopScout v2 (Foundation Reset) -HoopScout is a production-minded basketball scouting and player search platform. -The main product experience is server-rendered Django Templates with HTMX enhancements. -A minimal read-only API is included as a secondary integration surface. +HoopScout v2 is a controlled greenfield rebuild inside the existing repository. -## Core Stack +Current v2 foundation scope in this branch: +- Django + HTMX server-rendered app +- PostgreSQL as the only primary database +- nginx reverse proxy +- management-command-driven runtime operations +- static snapshot directories persisted via Docker named volumes -- Python 3.12+ -- Django -- Django Templates + HTMX -- Tailwind CSS (CLI build pipeline) -- PostgreSQL -- Redis -- Celery + Celery Beat -- Django REST Framework (read-only API) -- pytest -- Docker / Docker Compose -- nginx +Out of scope in this step: +- domain model redesign +- snapshot importer implementation +- extractor implementation -## Architecture Summary +## Runtime Architecture (v2) -- Main UI: Django + HTMX (not SPA) -- Data layer: normalized domain models for players, seasons, competitions, teams, stats, scouting state -- Provider integration: adapter-based abstraction in `apps/providers` -- Ingestion orchestration: `apps/ingestion` with run/error logs and Celery task execution -- Optional API: read-only DRF endpoints under `/api/` +Runtime services are intentionally small: +- `web` (Django/Gunicorn) +- `postgres` (primary DB) +- `nginx` (reverse proxy + static/media serving) -## Repository Structure +No Redis/Celery services are part of the v2 default runtime topology. +Legacy Celery/provider code is still in repository history/codebase but de-emphasized for v2. -```text -. -├── apps/ -│ ├── api/ -│ ├── competitions/ -│ ├── core/ -│ ├── ingestion/ -│ ├── players/ -│ ├── providers/ -│ ├── scouting/ -│ ├── stats/ -│ ├── teams/ -│ └── users/ -├── config/ -│ └── settings/ -├── docs/ -├── nginx/ -├── requirements/ -├── package.json -├── tailwind.config.js -├── static/ -├── templates/ -├── tests/ -├── .github/ -├── CHANGELOG.md -├── docker-compose.yml -├── Dockerfile -└── entrypoint.sh -``` +## Image Strategy -## Quick Start +Compose builds and tags images as: +- `registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest}` +- `registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest}` -1. Create local env file: +Reserved for future optional scheduler use: +- `registry.younerd.org/hoopscout/scheduler:${APP_IMAGE_TAG:-latest}` + +## Entrypoint Strategy + +- `web`: `entrypoint.sh` + - waits for PostgreSQL + - optionally runs migrations/collectstatic + - ensures snapshot directories exist +- `nginx`: `nginx/entrypoint.sh` + - simple runtime entrypoint wrapper + +## Compose Files + +- `docker-compose.yml`: production-minded baseline runtime (immutable image filesystem) +- `docker-compose.dev.yml`: development override with source bind mount for `web` +- `docker-compose.release.yml`: production settings override (`DJANGO_SETTINGS_MODULE=config.settings.production`) + +### Start development runtime ```bash cp .env.example .env -``` - -2. Build and run services: - -```bash -docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build -``` - -This starts the development-oriented topology (source bind mounts enabled). -In development, bind-mounted app containers run as `LOCAL_UID`/`LOCAL_GID` from `.env` (set them to your host user/group IDs). - -3. If `AUTO_APPLY_MIGRATIONS=0`, run migrations manually: - -```bash -docker compose exec web python manage.py migrate -``` - -4. Create a superuser: - -```bash -docker compose exec web python manage.py createsuperuser -``` - -5. Open the app: - -- Web: http://localhost -- Admin: http://localhost/admin/ -- Health: http://localhost/health/ -- API root endpoints: `/api/players/`, `/api/competitions/`, `/api/teams/`, `/api/seasons/` - -## Development vs Release Compose - -Base compose (`docker-compose.yml`) is release-oriented and immutable for runtime services. -Development mutability is enabled via `docker-compose.dev.yml`. - -Development startup (mutable source bind mounts for `web`/`celery_*`): - -```bash docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build ``` -Development startup with Tailwind watch: - -```bash -docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up --build -``` - -Release-style startup (immutable runtime services): +### Start release-style runtime ```bash docker compose -f docker-compose.yml -f docker-compose.release.yml up -d --build ``` -Optional release-style stop: +## Named Volumes -```bash -docker compose -f docker-compose.yml -f docker-compose.release.yml down -``` +v2 runtime uses named volumes for persistence: +- `postgres_data` +- `static_data` +- `media_data` +- `snapshots_incoming` +- `snapshots_archive` +- `snapshots_failed` -Notes: +Development override uses separate dev-prefixed volumes to avoid ownership collisions. -- In release-style mode, `web`, `celery_worker`, and `celery_beat` run from built image filesystem with no repository source bind mount. -- In development mode (with `docker-compose.dev.yml`), `web`, `celery_worker`, and `celery_beat` are mutable and bind-mount `.:/app`. -- `tailwind` is a dev-profile service and is not required for release runtime. -- `nginx`, `postgres`, and `redis` service naming remains unchanged. -- Release-style `web`, `celery_worker`, and `celery_beat` explicitly run as container user `10001:10001`. +## Environment Variables -## Release Topology Verification +Use `.env.example` as the source of truth. -Inspect merged release config: +Core groups: +- Django runtime/security vars +- PostgreSQL connection vars +- image tag vars (`APP_IMAGE_TAG`, `NGINX_IMAGE_TAG`) +- snapshot directory vars (`SNAPSHOT_*`) +- optional future scheduler vars (`SCHEDULER_*`) -```bash -docker compose -f docker-compose.yml -f docker-compose.release.yml config -``` +## Snapshot Storage Convention -What to verify: +Snapshot files are expected under: +- incoming: `/app/snapshots/incoming` +- archive: `/app/snapshots/archive` +- failed: `/app/snapshots/failed` -- `services.web.volumes` does not include a bind mount from repository path to `/app` -- `services.celery_worker.volumes` does not include a bind mount from repository path to `/app` -- `services.celery_beat.volumes` does not include a bind mount from repository path to `/app` -- persistent named volumes still exist for `postgres_data`, `static_data`, `media_data`, `runtime_data`, and `redis_data` +In this foundation step, directories are created and persisted but no importer/extractor is implemented yet. -Automated local/CI-friendly check: - -```bash -./scripts/verify_release_topology.sh -``` - -## Setup and Run Notes - -- `web` service starts through `entrypoint.sh` and waits for PostgreSQL readiness. -- `web` service also builds Tailwind CSS before `collectstatic` when `AUTO_BUILD_TAILWIND=1`. -- `web`, `celery_worker`, `celery_beat`, and `tailwind` run as a non-root user inside the image. -- `celery_worker` executes background sync work. -- `celery_beat` triggers periodic provider sync (`apps.ingestion.tasks.scheduled_provider_sync`). -- `tailwind` service runs watch mode for development (`npm run dev`). -- nginx proxies web traffic and serves static/media volume mounts. - -## Search Consistency Notes - -- The server-rendered player search page (`/players/`) and read-only players API (`/api/players/`) use the same search form and ORM filter service. -- Sorting/filter semantics are aligned across UI, HTMX partial refreshes, and API responses. -- Search result metrics in the UI table use **best eligible semantics**: - - each metric (Games, MPG, PPG, RPG, APG) is the maximum value across eligible player-season rows - - eligibility is scoped by the active season/team/competition/stat filters - - different displayed metrics for one player can come from different eligible rows -- Metric-based API sorting (`ppg_*`, `mpg_*`) uses the same best-eligible semantics as UI search. - -## Docker Volumes and Persistence - -`docker-compose.yml` uses named volumes: - -- `postgres_data`: PostgreSQL persistent database -- `static_data`: collected static assets -- `media_data`: user/provider media artifacts -- `runtime_data`: app runtime files (e.g., celery beat schedule) -- `redis_data`: Redis persistence (`/data` for RDB/AOF files) -- `node_modules_data`: Node modules cache for Tailwind builds in development override - -This keeps persistent state outside container lifecycles. - -In release-style mode, these volumes remain the persistence layer: - -- `postgres_data` for database state -- `static_data` for collected static assets served by nginx -- `media_data` for uploaded/provider media -- `runtime_data` for Celery beat schedule/runtime files -- `redis_data` for Redis persistence - -## Migrations - -Create migration files: - -```bash -docker compose exec web python manage.py makemigrations -``` - -Apply migrations: +## Migration and Superuser Commands ```bash docker compose exec web python manage.py migrate -``` - -## Testing - -Run all tests: - -```bash -docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q' -``` - -Run a focused module: - -```bash -docker compose run --rm web sh -lc 'pip install -r requirements/dev.txt && pytest -q tests/test_api.py' -``` - -## Frontend Assets (Tailwind) - -Build Tailwind once: - -```bash -docker compose run --rm web sh -lc 'npm install --no-audit --no-fund && npm run build' -``` - -If you see `Permission denied` writing `static/vendor` or `static/css` in development, fix local file ownership once: - -```bash -sudo chown -R "$(id -u):$(id -g)" static -``` - -Run Tailwind in watch mode during development: - -```bash -docker compose -f docker-compose.yml -f docker-compose.dev.yml --profile dev up tailwind -``` - -Source CSS lives in `static/src/tailwind.css` and compiles to `static/css/main.css`. -HTMX is served from local static assets (`static/vendor/htmx.min.js`) instead of a CDN dependency. - -## Production Configuration - -Use production settings in deployed environments: - -```bash -DJANGO_SETTINGS_MODULE=config.settings.production -DJANGO_DEBUG=0 -DJANGO_ENV=production -``` - -When `DJANGO_DEBUG=0`, startup fails fast unless: - -- `DJANGO_SECRET_KEY` is a real non-default value -- `DJANGO_ALLOWED_HOSTS` is set -- `DJANGO_CSRF_TRUSTED_ORIGINS` is set (for production settings) - -Additional production safety checks: - -- `DJANGO_SECRET_KEY` must be strong and non-default in non-development environments -- `DJANGO_ALLOWED_HOSTS` must not contain localhost-style values -- `DJANGO_CSRF_TRUSTED_ORIGINS` must be explicit HTTPS origins only (no localhost/http) - -Production settings enable hardened defaults such as: - -- secure cookies -- HSTS -- security headers -- `ManifestStaticFilesStorage` for static asset integrity/versioning - -### Production Configuration Checklist - -- `DJANGO_SETTINGS_MODULE=config.settings.production` -- `DJANGO_ENV=production` -- `DJANGO_DEBUG=0` -- strong `DJANGO_SECRET_KEY` (unique, non-default, >= 32 chars) -- explicit `DJANGO_ALLOWED_HOSTS` (no localhost values) -- explicit `DJANGO_CSRF_TRUSTED_ORIGINS` with HTTPS origins only -- `DJANGO_SECURE_SSL_REDIRECT=1` and `DJANGO_SECURE_HSTS_SECONDS` set appropriately - -## Superuser and Auth - -Create superuser: - -```bash docker compose exec web python manage.py createsuperuser ``` -Default auth routes: +## Health Endpoints -- Signup: `/users/signup/` -- Login: `/users/login/` -- Logout: `/users/logout/` +- app health: `/health/` +- nginx healthcheck proxies `/health/` to `web` -## Ingestion and Manual Sync +## GitFlow -### Trigger via Django Admin +Required branch model: +- `main`: production +- `develop`: integration +- `feature/*`, `release/*`, `hotfix/*` -- Open `/admin/` -> `IngestionRun` -- Use admin actions: - - `Queue full sync (default provider)` - - `Queue incremental sync (default provider)` - - `Retry selected ingestion runs` +This v2 work branch is: +- `feature/hoopscout-v2-static-architecture` -### Trigger from shell (manual) +## Notes on Legacy Layers -```bash -docker compose exec web python manage.py shell -``` - -```python -from apps.ingestion.tasks import trigger_full_sync -trigger_full_sync.delay(provider_namespace="balldontlie") -``` - -### Logs and diagnostics - -- Run-level status/counters: `IngestionRun` -- Structured error records: `IngestionError` -- Provider entity mappings + diagnostic payload snippets: `ExternalMapping` -- `IngestionRun.error_summary` captures top-level failure/partial-failure context - -### Scheduled sync via Celery Beat - -Configure scheduled sync through environment variables: - -- `INGESTION_SCHEDULE_ENABLED` (`0`/`1`) -- `INGESTION_SCHEDULE_CRON` (5-field cron expression, default `*/30 * * * *`) -- `INGESTION_SCHEDULE_PROVIDER_NAMESPACE` (optional; falls back to default provider namespace) -- `INGESTION_SCHEDULE_JOB_TYPE` (`incremental` or `full_sync`) -- `INGESTION_PREVENT_OVERLAP` (`0`/`1`) to skip obvious overlapping runs -- `INGESTION_OVERLAP_WINDOW_MINUTES` overlap guard window - -When enabled, Celery Beat enqueues the scheduled sync task on the configured cron. -The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync. - -Valid cron examples: - -- `*/30 * * * *` every 30 minutes -- `0 * * * *` hourly -- `15 2 * * *` daily at 02:15 - -Failure behavior for invalid cron values: - -- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web) -- periodic ingestion task is disabled until cron is fixed -- an error is logged at startup indicating the invalid schedule value - -## Provider Backend Selection - -Provider backend is selected via environment variables: - -- `PROVIDER_BACKEND=demo` uses the local JSON fixture adapter (`mvp_demo`) -- `PROVIDER_BACKEND=balldontlie` uses the HTTP adapter (`balldontlie`) -- `PROVIDER_DEFAULT_NAMESPACE` can override backend mapping explicitly - -The balldontlie adapter is NBA-centric and intended as MVP ingestion only. The provider abstraction remains ready for future multi-league providers (for example Sportradar or FIBA GDAP). -The adapter follows the published balldontlie OpenAPI contract: server `https://api.balldontlie.io`, NBA endpoints under `/nba/v1/*`, cursor pagination via `meta.next_cursor`, and `stats` ingestion filtered by `seasons[]`. -Some balldontlie plans do not include stats endpoints; set `PROVIDER_BALLDONTLIE_STATS_STRICT=0` (default) to ingest players/teams/seasons even when stats are unauthorized. - -Provider normalization details and explicit adapter assumptions are documented in [docs/provider-normalization.md](docs/provider-normalization.md). - -## GitFlow Workflow - -GitFlow is required in this repository: - -- `main`: production branch -- `develop`: integration branch -- `feature/*`: new feature branches from `develop` -- `release/*`: release hardening branches from `develop` -- `hotfix/*`: urgent production fixes from `main` - -Read full details in [CONTRIBUTING.md](CONTRIBUTING.md) and [docs/workflow.md](docs/workflow.md). - -### Repository Bootstrap Commands - -Run these from the current `main` branch to initialize local GitFlow usage: - -```bash -git checkout main -git pull origin main -git checkout -b develop -git push -u origin develop -``` - -Start a feature branch: - -```bash -git checkout develop -git pull origin develop -git checkout -b feature/player-search-tuning -``` - -Start a release branch: - -```bash -git checkout develop -git pull origin develop -git checkout -b release/0.1.0 -``` - -Start a hotfix branch: - -```bash -git checkout main -git pull origin main -git checkout -b hotfix/fix-redis-persistence -``` - -## Release Notes / Changelog Convention - -- Use [CHANGELOG.md](CHANGELOG.md) with an `Unreleased` section. -- For each merged PR, add short entries under: - - `Added` - - `Changed` - - `Fixed` -- On release, move `Unreleased` items to a dated version section (`[x.y.z] - YYYY-MM-DD`). +Legacy provider/Celery ingestion layers are not the default runtime path for v2 foundation. +They are intentionally isolated until replaced by v2 snapshot ingestion commands in later tasks. diff --git a/config/__init__.py b/config/__init__.py index 53f4ccb..ebef460 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -1,3 +1,8 @@ -from .celery import app as celery_app +""" +HoopScout v2 runtime package. -__all__ = ("celery_app",) +Celery is intentionally not auto-loaded at import time in v2 foundation runtime. +Legacy task modules remain in-repo and can be loaded explicitly if needed. +""" + +__all__ = () diff --git a/config/celery.py b/config/celery.py index 7942b2b..e3c9c3f 100644 --- a/config/celery.py +++ b/config/celery.py @@ -28,12 +28,12 @@ def _parse_cron_expression(expression: str) -> dict[str, str]: def build_periodic_schedule() -> dict: - if not settings.INGESTION_SCHEDULE_ENABLED: + if not getattr(settings, "INGESTION_SCHEDULE_ENABLED", False): logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.") return {} try: - schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON) + schedule_kwargs = _parse_cron_expression(getattr(settings, "INGESTION_SCHEDULE_CRON", "*/30 * * * *")) return { "ingestion.scheduled_provider_sync": { "task": "apps.ingestion.tasks.scheduled_provider_sync", @@ -44,7 +44,7 @@ def build_periodic_schedule() -> dict: logger.error( "Invalid periodic ingestion schedule config. Task disabled. " "INGESTION_SCHEDULE_CRON=%r error=%s", - settings.INGESTION_SCHEDULE_CRON, + getattr(settings, "INGESTION_SCHEDULE_CRON", ""), exc, ) return {} diff --git a/config/settings/base.py b/config/settings/base.py index e6c9c7a..72c19d7 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -142,23 +142,16 @@ LOGIN_URL = "users:login" LOGIN_REDIRECT_URL = "core:dashboard" LOGOUT_REDIRECT_URL = "core:home" -CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0") -CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/0") -CELERY_ACCEPT_CONTENT = ["json"] -CELERY_TASK_SERIALIZER = "json" -CELERY_RESULT_SERIALIZER = "json" -CELERY_TIMEZONE = TIME_ZONE -CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800")) -CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500")) -INGESTION_SCHEDULE_ENABLED = env_bool("INGESTION_SCHEDULE_ENABLED", False) -INGESTION_SCHEDULE_CRON = os.getenv("INGESTION_SCHEDULE_CRON", "*/30 * * * *").strip() -INGESTION_SCHEDULE_PROVIDER_NAMESPACE = os.getenv("INGESTION_SCHEDULE_PROVIDER_NAMESPACE", "").strip() -INGESTION_SCHEDULE_JOB_TYPE = os.getenv("INGESTION_SCHEDULE_JOB_TYPE", "incremental").strip().lower() -INGESTION_PREVENT_OVERLAP = env_bool("INGESTION_PREVENT_OVERLAP", True) -INGESTION_OVERLAP_WINDOW_MINUTES = int(os.getenv("INGESTION_OVERLAP_WINDOW_MINUTES", "180")) +# HoopScout v2 snapshot storage (volume-backed directories). +SNAPSHOT_INCOMING_DIR = os.getenv("SNAPSHOT_INCOMING_DIR", str(BASE_DIR / "snapshots" / "incoming")) +SNAPSHOT_ARCHIVE_DIR = os.getenv("SNAPSHOT_ARCHIVE_DIR", str(BASE_DIR / "snapshots" / "archive")) +SNAPSHOT_FAILED_DIR = os.getenv("SNAPSHOT_FAILED_DIR", str(BASE_DIR / "snapshots" / "failed")) -if INGESTION_SCHEDULE_JOB_TYPE not in {"incremental", "full_sync"}: - raise ImproperlyConfigured("INGESTION_SCHEDULE_JOB_TYPE must be either 'incremental' or 'full_sync'.") +# Optional scheduler command settings for future v2 snapshot jobs. +SCHEDULER_ENABLED = env_bool("SCHEDULER_ENABLED", False) +SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900")) +if SCHEDULER_INTERVAL_SECONDS < 30: + raise ImproperlyConfigured("SCHEDULER_INTERVAL_SECONDS must be >= 30.") PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower() PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo") diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 8285e76..9323720 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -3,25 +3,20 @@ services: user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" volumes: - .:/app - - node_modules_data:/app/node_modules - - static_data:/app/staticfiles - - media_data:/app/media - - runtime_data:/app/runtime + - static_data_dev:/app/staticfiles + - media_data_dev:/app/media + - snapshots_incoming_dev:/app/snapshots/incoming + - snapshots_archive_dev:/app/snapshots/archive + - snapshots_failed_dev:/app/snapshots/failed - celery_worker: - user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" + nginx: volumes: - - .:/app - - runtime_data:/app/runtime + - static_data_dev:/var/www/static:ro + - media_data_dev:/var/www/media:ro - celery_beat: - user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" - volumes: - - .:/app - - runtime_data:/app/runtime - - tailwind: - user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}" - volumes: - - .:/app - - node_modules_data:/app/node_modules +volumes: + static_data_dev: + media_data_dev: + snapshots_incoming_dev: + snapshots_archive_dev: + snapshots_failed_dev: diff --git a/docker-compose.release.yml b/docker-compose.release.yml index fba5fdc..2a857dc 100644 --- a/docker-compose.release.yml +++ b/docker-compose.release.yml @@ -2,14 +2,5 @@ services: web: environment: DJANGO_SETTINGS_MODULE: config.settings.production - DJANGO_DEBUG: "0" - - celery_worker: - environment: - DJANGO_SETTINGS_MODULE: config.settings.production - DJANGO_DEBUG: "0" - - celery_beat: - environment: - DJANGO_SETTINGS_MODULE: config.settings.production + DJANGO_ENV: production DJANGO_DEBUG: "0" diff --git a/docker-compose.yml b/docker-compose.yml index 3167add..d45d540 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,43 @@ services: + web: + image: registry.younerd.org/hoopscout/web:${APP_IMAGE_TAG:-latest} + build: + context: . + dockerfile: Dockerfile + env_file: + - .env + command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile - + depends_on: + postgres: + condition: service_healthy + user: "10001:10001" + volumes: + - static_data:/app/staticfiles + - media_data:/app/media + - snapshots_incoming:/app/snapshots/incoming + - snapshots_archive:/app/snapshots/archive + - snapshots_failed:/app/snapshots/failed + expose: + - "8000" + healthcheck: + test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/health/ || exit 1"] + interval: 15s + timeout: 5s + retries: 8 + start_period: 25s + restart: unless-stopped + nginx: - image: nginx:1.27-alpine + image: registry.younerd.org/hoopscout/nginx:${NGINX_IMAGE_TAG:-latest} + build: + context: . + dockerfile: nginx/Dockerfile depends_on: web: condition: service_healthy ports: - "80:80" volumes: - - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro - static_data:/var/www/static:ro - media_data:/var/www/media:ro read_only: true @@ -22,91 +52,6 @@ services: start_period: 10s restart: unless-stopped - web: - build: - context: . - dockerfile: Dockerfile - env_file: - - .env - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - command: gunicorn config.wsgi:application --bind 0.0.0.0:8000 --workers ${GUNICORN_WORKERS:-3} --access-logfile - --error-logfile - - user: "10001:10001" - volumes: - - static_data:/app/staticfiles - - media_data:/app/media - - runtime_data:/app/runtime - expose: - - "8000" - healthcheck: - test: ["CMD-SHELL", "curl -f http://127.0.0.1:8000/health/ || exit 1"] - interval: 15s - timeout: 5s - retries: 8 - start_period: 20s - restart: unless-stopped - - tailwind: - build: - context: . - dockerfile: Dockerfile - env_file: - - .env - command: npm run dev - user: "10001:10001" - profiles: - - dev - restart: unless-stopped - - celery_worker: - build: - context: . - dockerfile: Dockerfile - env_file: - - .env - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - command: celery -A config worker -l info - user: "10001:10001" - volumes: - - runtime_data:/app/runtime - healthcheck: - test: ["CMD-SHELL", "celery -A config inspect ping -d celery@$$HOSTNAME | grep -q pong || exit 1"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: unless-stopped - - celery_beat: - build: - context: . - dockerfile: Dockerfile - env_file: - - .env - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - command: celery -A config beat -l info --schedule=/app/runtime/celerybeat-schedule - user: "10001:10001" - volumes: - - runtime_data:/app/runtime - healthcheck: - test: ["CMD-SHELL", "test -f /app/runtime/celerybeat-schedule || exit 1"] - interval: 30s - timeout: 5s - retries: 10 - start_period: 20s - restart: unless-stopped - postgres: image: postgres:16-alpine environment: @@ -122,22 +67,10 @@ services: retries: 5 restart: unless-stopped - redis: - image: redis:7-alpine - command: redis-server --save 60 1 --loglevel warning - volumes: - - redis_data:/data - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 5s - retries: 5 - restart: unless-stopped - volumes: postgres_data: static_data: media_data: - runtime_data: - redis_data: - node_modules_data: + snapshots_incoming: + snapshots_archive: + snapshots_failed: diff --git a/entrypoint.sh b/entrypoint.sh index f9a0d10..f4f793e 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -8,6 +8,10 @@ done echo "PostgreSQL is available." +mkdir -p "${SNAPSHOT_INCOMING_DIR:-/app/snapshots/incoming}" \ + "${SNAPSHOT_ARCHIVE_DIR:-/app/snapshots/archive}" \ + "${SNAPSHOT_FAILED_DIR:-/app/snapshots/failed}" + if [ "${DJANGO_SETTINGS_MODULE:-}" = "config.settings.production" ] && [ "$1" = "gunicorn" ]; then echo "Running Django deployment checks..." python manage.py check --deploy --fail-level WARNING @@ -19,15 +23,6 @@ if [ "${AUTO_APPLY_MIGRATIONS:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then fi if [ "${AUTO_COLLECTSTATIC:-0}" = "1" ] && [ "$1" = "gunicorn" ]; then - if [ "${AUTO_BUILD_TAILWIND:-1}" = "1" ] && [ -f /app/package.json ]; then - if [ -x /app/node_modules/.bin/tailwindcss ]; then - echo "Building Tailwind assets..." - npm run build - else - echo "Tailwind dependencies missing; skipping AUTO_BUILD_TAILWIND." - fi - fi - echo "Collecting static files..." python manage.py collectstatic --noinput fi diff --git a/nginx/Dockerfile b/nginx/Dockerfile new file mode 100644 index 0000000..3c42427 --- /dev/null +++ b/nginx/Dockerfile @@ -0,0 +1,8 @@ +FROM nginx:1.27-alpine + +COPY nginx/nginx.conf /etc/nginx/nginx.conf +COPY nginx/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["nginx", "-g", "daemon off;"] diff --git a/nginx/entrypoint.sh b/nginx/entrypoint.sh new file mode 100644 index 0000000..8959ebc --- /dev/null +++ b/nginx/entrypoint.sh @@ -0,0 +1,4 @@ +#!/bin/sh +set -e + +exec "$@"