Wire Celery Beat periodic sync with ingestion run tracking

This commit is contained in:
Alfredo Di Stasio
2026-03-10 13:44:36 +01:00
parent b39c6ced3a
commit ceff4bc42c
12 changed files with 311 additions and 6 deletions

View File

@ -1,8 +1,41 @@
import os
from celery import Celery
from celery.schedules import crontab
from django.conf import settings
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.development")
app = Celery("hoopscout")
app.config_from_object("django.conf:settings", namespace="CELERY")
app.autodiscover_tasks()
def _parse_cron_expression(expression: str) -> dict[str, str]:
parts = expression.split()
if len(parts) != 5:
raise ValueError(
"INGESTION_SCHEDULE_CRON must have 5 fields: minute hour day_of_month month_of_year day_of_week."
)
return {
"minute": parts[0],
"hour": parts[1],
"day_of_month": parts[2],
"month_of_year": parts[3],
"day_of_week": parts[4],
}
def build_periodic_schedule() -> dict:
if not settings.INGESTION_SCHEDULE_ENABLED:
return {}
schedule_kwargs = _parse_cron_expression(settings.INGESTION_SCHEDULE_CRON)
return {
"ingestion.scheduled_provider_sync": {
"task": "apps.ingestion.tasks.scheduled_provider_sync",
"schedule": crontab(**schedule_kwargs),
}
}
app.conf.beat_schedule = build_periodic_schedule()

View File

@ -124,6 +124,15 @@ CELERY_RESULT_SERIALIZER = "json"
CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TIME_LIMIT = int(os.getenv("CELERY_TASK_TIME_LIMIT", "1800"))
CELERY_TASK_SOFT_TIME_LIMIT = int(os.getenv("CELERY_TASK_SOFT_TIME_LIMIT", "1500"))
INGESTION_SCHEDULE_ENABLED = env_bool("INGESTION_SCHEDULE_ENABLED", False)
INGESTION_SCHEDULE_CRON = os.getenv("INGESTION_SCHEDULE_CRON", "*/30 * * * *").strip()
INGESTION_SCHEDULE_PROVIDER_NAMESPACE = os.getenv("INGESTION_SCHEDULE_PROVIDER_NAMESPACE", "").strip()
INGESTION_SCHEDULE_JOB_TYPE = os.getenv("INGESTION_SCHEDULE_JOB_TYPE", "incremental").strip().lower()
INGESTION_PREVENT_OVERLAP = env_bool("INGESTION_PREVENT_OVERLAP", True)
INGESTION_OVERLAP_WINDOW_MINUTES = int(os.getenv("INGESTION_OVERLAP_WINDOW_MINUTES", "180"))
if INGESTION_SCHEDULE_JOB_TYPE not in {"incremental", "full_sync"}:
raise ImproperlyConfigured("INGESTION_SCHEDULE_JOB_TYPE must be either 'incremental' or 'full_sync'.")
PROVIDER_BACKEND = os.getenv("PROVIDER_BACKEND", "demo").strip().lower()
PROVIDER_NAMESPACE_DEMO = os.getenv("PROVIDER_NAMESPACE_DEMO", "mvp_demo")