Harden Celery schedule parsing and startup safety

This commit is contained in:
Alfredo Di Stasio
2026-03-10 16:18:57 +01:00
parent 1ba1a8eebd
commit abd3419aac
5 changed files with 82 additions and 6 deletions

View File

@ -66,6 +66,8 @@ PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
CELERY_TASK_TIME_LIMIT=1800
CELERY_TASK_SOFT_TIME_LIMIT=1500
INGESTION_SCHEDULE_ENABLED=0
# 5-field cron: minute hour day_of_month month day_of_week
# Example hourly: 0 * * * *
INGESTION_SCHEDULE_CRON=*/30 * * * *
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
INGESTION_SCHEDULE_JOB_TYPE=incremental

View File

@ -296,6 +296,18 @@ Configure scheduled sync through environment variables:
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
Valid cron examples:
- `*/30 * * * *` every 30 minutes
- `0 * * * *` hourly
- `15 2 * * *` daily at 02:15
Failure behavior for invalid cron values:
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
- periodic ingestion task is disabled until cron is fixed
- an error is logged at startup indicating the invalid schedule value
## Provider Backend Selection
Provider backend is selected via environment variables:

View File

@ -1,9 +1,11 @@
import logging
import os
from celery import Celery
from celery.schedules import crontab
from django.conf import settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
logger = logging.getLogger(__name__)
app = Celery("hoopscout")
app.config_from_object("django.conf:settings", namespace="CELERY")
@ -27,15 +29,25 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
def build_periodic_schedule() -> dict:
if not settings.INGESTION_SCHEDULE_ENABLED:
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
return {}
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
return {
"ingestion.scheduled_provider_sync": {
"task": "apps.ingestion.tasks.scheduled_provider_sync",
"schedule": crontab(**schedule_kwargs),
try:
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
return {
"ingestion.scheduled_provider_sync": {
"task": "apps.ingestion.tasks.scheduled_provider_sync",
"schedule": crontab(**schedule_kwargs),
}
}
}
except Exception as exc: # noqa: BLE001
logger.error(
"Invalid periodic ingestion schedule config. Task disabled. "
"INGESTION_SCHEDULE_CRON=%r error=%s",
settings.INGESTION_SCHEDULE_CRON,
exc,
)
return {}
app.conf.beat_schedule = build_periodic_schedule()

View File

@ -0,0 +1,38 @@
import os
import subprocess
import sys
import pytest
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
env = os.environ.copy()
env.update(env_overrides)
return subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
env=env,
check=False,
)
@pytest.mark.django_db
def test_invalid_cron_does_not_crash_config_import_path():
result = _run_python_import(
(
"import config; "
"from config.celery import app; "
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
),
{
"DJANGO_SETTINGS_MODULE": "config.settings.development",
"DJANGO_ENV": "development",
"DJANGO_DEBUG": "1",
"INGESTION_SCHEDULE_ENABLED": "1",
"INGESTION_SCHEDULE_CRON": "bad cron value",
},
)
assert result.returncode == 0
assert "beat_schedule_size=0" in result.stdout

View File

@ -31,6 +31,18 @@ def test_build_periodic_schedule_disabled(settings):
assert build_periodic_schedule() == {}
@pytest.mark.django_db
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
settings.INGESTION_SCHEDULE_ENABLED = True
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
with caplog.at_level("ERROR"):
schedule = build_periodic_schedule()
assert schedule == {}
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
@pytest.mark.django_db
def test_trigger_incremental_sync_skips_overlapping_runs(settings):
settings.INGESTION_PREVENT_OVERLAP = True