Harden Celery schedule parsing and startup safety
This commit is contained in:
@ -66,6 +66,8 @@ PROVIDER_BALLDONTLIE_STATS_PER_PAGE=100
|
|||||||
CELERY_TASK_TIME_LIMIT=1800
|
CELERY_TASK_TIME_LIMIT=1800
|
||||||
CELERY_TASK_SOFT_TIME_LIMIT=1500
|
CELERY_TASK_SOFT_TIME_LIMIT=1500
|
||||||
INGESTION_SCHEDULE_ENABLED=0
|
INGESTION_SCHEDULE_ENABLED=0
|
||||||
|
# 5-field cron: minute hour day_of_month month day_of_week
|
||||||
|
# Example hourly: 0 * * * *
|
||||||
INGESTION_SCHEDULE_CRON=*/30 * * * *
|
INGESTION_SCHEDULE_CRON=*/30 * * * *
|
||||||
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
|
INGESTION_SCHEDULE_PROVIDER_NAMESPACE=
|
||||||
INGESTION_SCHEDULE_JOB_TYPE=incremental
|
INGESTION_SCHEDULE_JOB_TYPE=incremental
|
||||||
|
|||||||
12
README.md
12
README.md
@ -296,6 +296,18 @@ Configure scheduled sync through environment variables:
|
|||||||
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
|
When enabled, Celery Beat enqueues the scheduled sync task on the configured cron.
|
||||||
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
|
The task uses the existing ingestion service path and writes run/error records in the same tables as manual sync.
|
||||||
|
|
||||||
|
Valid cron examples:
|
||||||
|
|
||||||
|
- `*/30 * * * *` every 30 minutes
|
||||||
|
- `0 * * * *` hourly
|
||||||
|
- `15 2 * * *` daily at 02:15
|
||||||
|
|
||||||
|
Failure behavior for invalid cron values:
|
||||||
|
|
||||||
|
- invalid `INGESTION_SCHEDULE_CRON` does not crash unrelated startup paths (for example, web)
|
||||||
|
- periodic ingestion task is disabled until cron is fixed
|
||||||
|
- an error is logged at startup indicating the invalid schedule value
|
||||||
|
|
||||||
## Provider Backend Selection
|
## Provider Backend Selection
|
||||||
|
|
||||||
Provider backend is selected via environment variables:
|
Provider backend is selected via environment variables:
|
||||||
|
|||||||
@ -1,9 +1,11 @@
|
|||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
from celery.schedules import crontab
|
from celery.schedules import crontab
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = Celery("hoopscout")
|
app = Celery("hoopscout")
|
||||||
app.config_from_object("django.conf:settings", namespace="CELERY")
|
app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||||
@ -27,15 +29,25 @@ def _parse_cron_expression(expression: str) -> dict[str, str]:
|
|||||||
|
|
||||||
def build_periodic_schedule() -> dict:
|
def build_periodic_schedule() -> dict:
|
||||||
if not settings.INGESTION_SCHEDULE_ENABLED:
|
if not settings.INGESTION_SCHEDULE_ENABLED:
|
||||||
|
logger.info("Periodic ingestion schedule disabled by INGESTION_SCHEDULE_ENABLED=0.")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
|
try:
|
||||||
return {
|
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
|
||||||
"ingestion.scheduled_provider_sync": {
|
return {
|
||||||
"task": "apps.ingestion.tasks.scheduled_provider_sync",
|
"ingestion.scheduled_provider_sync": {
|
||||||
"schedule": crontab(**schedule_kwargs),
|
"task": "apps.ingestion.tasks.scheduled_provider_sync",
|
||||||
|
"schedule": crontab(**schedule_kwargs),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.error(
|
||||||
|
"Invalid periodic ingestion schedule config. Task disabled. "
|
||||||
|
"INGESTION_SCHEDULE_CRON=%r error=%s",
|
||||||
|
settings.INGESTION_SCHEDULE_CRON,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
app.conf.beat_schedule = build_periodic_schedule()
|
app.conf.beat_schedule = build_periodic_schedule()
|
||||||
|
|||||||
38
tests/test_celery_schedule_safety.py
Normal file
38
tests/test_celery_schedule_safety.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _run_python_import(code: str, env_overrides: dict[str, str]) -> subprocess.CompletedProcess:
|
||||||
|
env = os.environ.copy()
|
||||||
|
env.update(env_overrides)
|
||||||
|
return subprocess.run(
|
||||||
|
[sys.executable, "-c", code],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
env=env,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_invalid_cron_does_not_crash_config_import_path():
|
||||||
|
result = _run_python_import(
|
||||||
|
(
|
||||||
|
"import config; "
|
||||||
|
"from config.celery import app; "
|
||||||
|
"print(f'beat_schedule_size={len(app.conf.beat_schedule or {})}')"
|
||||||
|
),
|
||||||
|
{
|
||||||
|
"DJANGO_SETTINGS_MODULE": "config.settings.development",
|
||||||
|
"DJANGO_ENV": "development",
|
||||||
|
"DJANGO_DEBUG": "1",
|
||||||
|
"INGESTION_SCHEDULE_ENABLED": "1",
|
||||||
|
"INGESTION_SCHEDULE_CRON": "bad cron value",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "beat_schedule_size=0" in result.stdout
|
||||||
@ -31,6 +31,18 @@ def test_build_periodic_schedule_disabled(settings):
|
|||||||
assert build_periodic_schedule() == {}
|
assert build_periodic_schedule() == {}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_build_periodic_schedule_invalid_cron_disables_task_and_logs(settings, caplog):
|
||||||
|
settings.INGESTION_SCHEDULE_ENABLED = True
|
||||||
|
settings.INGESTION_SCHEDULE_CRON = "invalid-cron"
|
||||||
|
|
||||||
|
with caplog.at_level("ERROR"):
|
||||||
|
schedule = build_periodic_schedule()
|
||||||
|
|
||||||
|
assert schedule == {}
|
||||||
|
assert any("Invalid periodic ingestion schedule config. Task disabled." in message for message in caplog.messages)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
def test_trigger_incremental_sync_skips_overlapping_runs(settings):
|
def test_trigger_incremental_sync_skips_overlapping_runs(settings):
|
||||||
settings.INGESTION_PREVENT_OVERLAP = True
|
settings.INGESTION_PREVENT_OVERLAP = True
|
||||||
|
|||||||
Reference in New Issue
Block a user