Harden Celery schedule parsing and startup safety
This commit is contained in:
@ -66,6 +66,8 @@ PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
|
||||
CELERY_TASK_TIME_LIMIT=1800
|
||||
CELERY_TASK_SOFT_TIME_LIMIT=1500
|
||||
INGESTION_SCHEDULE_ENABLED=0
|
||||
# 5-field cron: minute hour day_of_month month day_of_week
|
||||
# Example hourly: 0 * * * *
|
||||
INGESTION_SCHEDULE_CRON=*/30 * * * *
|
||||
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
|
||||
INGESTION_SCHEDULE_JOB_TYPE=incremental
|
||||
|
||||
12
README.md
12
README.md
@ -296,6 +296,18 @@ Configure scheduled sync through environment variables:
|
||||
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
|
||||
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
|
||||
|
||||
Valid cron examples:
|
||||
|
||||
- `*/30 * * * *` every 30 minutes
|
||||
- `0 * * * *` hourly
|
||||
- `15 2 * * *` daily at 02:15
|
||||
|
||||
Failure behavior for invalid cron values:
|
||||
|
||||
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
|
||||
- periodic ingestion task is disabled until cron is fixed
|
||||
- an error is logged at startup indicating the invalid schedule value
|
||||
|
||||
## Provider Backend Selection
|
||||
|
||||
Provider backend is selected via environment variables:
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
import logging
|
||||
import os
|
||||
from celery import Celery
|
||||
from celery.schedules import crontab
|
||||
from django.conf import settings
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = Celery("hoopscout")
|
||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
@ -27,8 +29,10 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
|
||||
|
||||
def build_periodic_schedule() -> dict:
|
||||
if not settings.INGESTION_SCHEDULE_ENABLED:
|
||||
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
|
||||
return {}
|
||||
|
||||
try:
|
||||
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
|
||||
return {
|
||||
"ingestion.scheduled_provider_sync": {
|
||||
@ -36,6 +40,14 @@ def build_periodic_schedule() -> dict:
|
||||
"schedule": crontab(**schedule_kwargs),
|
||||
}
|
||||
}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.error(
|
||||
"Invalid periodic ingestion schedule config. Task disabled. "
|
||||
"INGESTION_SCHEDULE_CRON=%r error=%s",
|
||||
settings.INGESTION_SCHEDULE_CRON,
|
||||
exc,
|
||||
)
|
||||
return {}
|
||||
|
||||
|
||||
app.conf.beat_schedule = build_periodic_schedule()
|
||||
|
||||
38
tests/test_celery_schedule_safety.py
Normal file
38
tests/test_celery_schedule_safety.py
Normal file
@ -0,0 +1,38 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
|
||||
env = os.environ.copy()
|
||||
env.update(env_overrides)
|
||||
return subprocess.run(
|
||||
[sys.executable, "-c", code],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_invalid_cron_does_not_crash_config_import_path():
|
||||
result = _run_python_import(
|
||||
(
|
||||
"import config; "
|
||||
"from config.celery import app; "
|
||||
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
|
||||
),
|
||||
{
|
||||
"DJANGO_SETTINGS_MODULE": "config.settings.development",
|
||||
"DJANGO_ENV": "development",
|
||||
"DJANGO_DEBUG": "1",
|
||||
"INGESTION_SCHEDULE_ENABLED": "1",
|
||||
"INGESTION_SCHEDULE_CRON": "bad cron value",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "beat_schedule_size=0" in result.stdout
|
||||
@ -31,6 +31,18 @@ def test_build_periodic_schedule_disabled(settings):
|
||||
assert build_periodic_schedule() == {}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
|
||||
settings.INGESTION_SCHEDULE_ENABLED = True
|
||||
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
|
||||
|
||||
with caplog.at_level("ERROR"):
|
||||
schedule = build_periodic_schedule()
|
||||
|
||||
assert schedule == {}
|
||||
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_trigger_incremental_sync_skips_overlapping_runs(settings):
|
||||
settings.INGESTION_PREVENT_OVERLAP = True
|
||||
|
||||
Reference in New Issue
Block a user