diff --git a/.env.example b/.env.example index 5548422..07a593a 100644 --- a/.env.example +++ b/.env.example @@ -66,6 +66,8 @@ PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100 CELERY_TASK_TIME_LIMIT=1800 CELERY_TASK_SOFT_TIME_LIMIT=1500 INGESTION_SCHEDULE_ENABLED=0 +# 5-field cron: minute hour day_of_month month day_of_week +# Example hourly: 0 * * * * INGESTION_SCHEDULE_CRON=*/30 * * * * INGESTION_SCHEDULE_PROVIDER_NAMESPACE= INGESTION_SCHEDULE_JOB_TYPE=incremental diff --git a/README.md b/README.md index d0f8673..cbfe26f 100644 --- a/README.md +++ b/README.md @@ -296,6 +296,18 @@ Configure scheduled sync through environment variables: When enabled, Celery Beat enqueues the scheduled sync task on the configured cron. The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync. +Valid cron examples: + +- `*/30 * * * *` every 30 minutes +- `0 * * * *` hourly +- `15 2 * * *` daily at 02:15 + +Failure behavior for invalid cron values: + +- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web) +- periodic ingestion task is disabled until cron is fixed +- an error is logged at startup indicating the invalid schedule value + ## Provider Backend Selection Provider backend is selected via environment variables: diff --git a/config/celery.py b/config/celery.py index dff93d5..7942b2b 100644 --- a/config/celery.py +++ b/config/celery.py @@ -1,9 +1,11 @@ +import logging import os from celery import Celery from celery.schedules import crontab from django.conf import settings os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development") +logger = logging.getLogger(__name__) app = Celery("hoopscout") app.config_from_object("django.conf:settings", namespace="CELERY") @@ -27,15 +29,25 @@ def _parse_cron_expression(expression: str) -> dict[str, str]: def build_periodic_schedule() -> dict: if not settings.INGESTION_SCHEDULE_ENABLED: + logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.") return {} - schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON) - return { - "ingestion.scheduled_provider_sync": { - "task": "apps.ingestion.tasks.scheduled_provider_sync", - "schedule": crontab(**schedule_kwargs), + try: + schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON) + return { + "ingestion.scheduled_provider_sync": { + "task": "apps.ingestion.tasks.scheduled_provider_sync", + "schedule": crontab(**schedule_kwargs), + } } - } + except Exception as exc: # noqa: BLE001 + logger.error( + "Invalid periodic ingestion schedule config. Task disabled. " + "INGESTION_SCHEDULE_CRON=%r error=%s", + settings.INGESTION_SCHEDULE_CRON, + exc, + ) + return {} app.conf.beat_schedule = build_periodic_schedule() diff --git a/tests/test_celery_schedule_safety.py b/tests/test_celery_schedule_safety.py new file mode 100644 index 0000000..df7015f --- /dev/null +++ b/tests/test_celery_schedule_safety.py @@ -0,0 +1,38 @@ +import os +import subprocess +import sys + +import pytest + + +def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess: + env = os.environ.copy() + env.update(env_overrides) + return subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + env=env, + check=False, + ) + + +@pytest.mark.django_db +def test_invalid_cron_does_not_crash_config_import_path(): + result = _run_python_import( + ( + "import config; " + "from config.celery import app; " + "print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')" + ), + { + "DJANGO_SETTINGS_MODULE": "config.settings.development", + "DJANGO_ENV": "development", + "DJANGO_DEBUG": "1", + "INGESTION_SCHEDULE_ENABLED": "1", + "INGESTION_SCHEDULE_CRON": "bad cron value", + }, + ) + + assert result.returncode == 0 + assert "beat_schedule_size=0" in result.stdout diff --git a/tests/test_ingestion_tasks.py b/tests/test_ingestion_tasks.py index bf6d9e3..ec399f4 100644 --- a/tests/test_ingestion_tasks.py +++ b/tests/test_ingestion_tasks.py @@ -31,6 +31,18 @@ def test_build_periodic_schedule_disabled(settings): assert build_periodic_schedule() == {} +@pytest.mark.django_db +def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog): + settings.INGESTION_SCHEDULE_ENABLED = True + settings.INGESTION_SCHEDULE_CRON = "invalid-cron" + + with caplog.at_level("ERROR"): + schedule = build_periodic_schedule() + + assert schedule == {} + assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages) + + @pytest.mark.django_db def test_trigger_incremental_sync_skips_overlapping_runs(settings): settings.INGESTION_PREVENT_OVERLAP = True