Harden Celery schedule parsing and startup safety

This commit is contained in:
Alfredo Di Stasio
2026-03-10 16:18:57 +01:00
parent 1ba1a8eebd
commit abd3419aac
5 changed files with 82 additions and 6 deletions

View File

@ -66,6 +66,8 @@ PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
CELERY_TASK_TIME_LIMIT=1800 CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500 CELERY_TASK_SOFT_TIME_LIMIT=1500
INGESTION_SCHEDULE_ENABLED=0 INGESTION_SCHEDULE_ENABLED=0
# 5-field cron: minute hour day_of_month month day_of_week
# Example hourly: 0 * * * *
INGESTION_SCHEDULE_CRON=*/30 * * * * INGESTION_SCHEDULE_CRON=*/30 * * * *
INGESTION_SCHEDULE_PROVIDER_NAMESPACE= INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
INGESTION_SCHEDULE_JOB_TYPE=incremental INGESTION_SCHEDULE_JOB_TYPE=incremental

View File

@ -296,6 +296,18 @@ Configure scheduled sync through environment variables:
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron. When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync. The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
Valid cron examples:
- `*/30 * * * *` every 30 minutes
- `0 * * * *` hourly
- `15 2 * * *` daily at 02:15
Failure behavior for invalid cron values:
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
- periodic ingestion task is disabled until cron is fixed
- an error is logged at startup indicating the invalid schedule value
## Provider Backend Selection ## Provider Backend Selection
Provider backend is selected via environment variables: Provider backend is selected via environment variables:

View File

@ -1,9 +1,11 @@
import logging
import os import os
from celery import Celery from celery import Celery
from celery.schedules import crontab from celery.schedules import crontab
from django.conf import settings from django.conf import settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
logger = logging.getLogger(__name__)
app = Celery("hoopscout") app = Celery("hoopscout")
app.config_from_object("django.conf:settings", namespace="CELERY") app.config_from_object("django.conf:settings", namespace="CELERY")
@ -27,15 +29,25 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
def build_periodic_schedule() -> dict: def build_periodic_schedule() -> dict:
if not settings.INGESTION_SCHEDULE_ENABLED: if not settings.INGESTION_SCHEDULE_ENABLED:
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
return {} return {}
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON) try:
return { schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
"ingestion.scheduled_provider_sync": { return {
"task": "apps.ingestion.tasks.scheduled_provider_sync", "ingestion.scheduled_provider_sync": {
"schedule": crontab(**schedule_kwargs), "task": "apps.ingestion.tasks.scheduled_provider_sync",
"schedule": crontab(**schedule_kwargs),
}
} }
} except Exception as exc: # noqa: BLE001
logger.error(
"Invalid periodic ingestion schedule config. Task disabled. "
"INGESTION_SCHEDULE_CRON=%r error=%s",
settings.INGESTION_SCHEDULE_CRON,
exc,
)
return {}
app.conf.beat_schedule = build_periodic_schedule() app.conf.beat_schedule = build_periodic_schedule()

View File

@ -0,0 +1,38 @@
import os
import subprocess
import sys
import pytest
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
env = os.environ.copy()
env.update(env_overrides)
return subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
env=env,
check=False,
)
@pytest.mark.django_db
def test_invalid_cron_does_not_crash_config_import_path():
result = _run_python_import(
(
"import config; "
"from config.celery import app; "
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
),
{
"DJANGO_SETTINGS_MODULE": "config.settings.development",
"DJANGO_ENV": "development",
"DJANGO_DEBUG": "1",
"INGESTION_SCHEDULE_ENABLED": "1",
"INGESTION_SCHEDULE_CRON": "bad cron value",
},
)
assert result.returncode == 0
assert "beat_schedule_size=0" in result.stdout

View File

@ -31,6 +31,18 @@ def test_build_periodic_schedule_disabled(settings):
assert build_periodic_schedule() == {} assert build_periodic_schedule() == {}
@pytest.mark.django_db
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
with caplog.at_level("ERROR"):
schedule = build_periodic_schedule()
assert schedule == {}
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
@pytest.mark.django_db @pytest.mark.django_db
def test_trigger_incremental_sync_skips_overlapping_runs(settings): def test_trigger_incremental_sync_skips_overlapping_runs(settings):
settings.INGESTION_PREVENT_OVERLAP = True settings.INGESTION_PREVENT_OVERLAP = True