Wire Celery Beat periodic sync with ingestion run tracking

This commit is contained in:
Alfredo Di Stasio
2026-03-10 13:44:36 +01:00
parent b39c6ced3a
commit ceff4bc42c
12 changed files with 311 additions and 6 deletions

View File

@ -1,3 +1,6 @@
from datetime import timedelta
from django.db.models import Q
from django.utils import timezone
from apps.ingestion.models import IngestionError, IngestionRun
@ -14,12 +17,22 @@ def start_ingestion_run(*, provider_namespace: str, job_type: str, triggered_by=
)
def finish_ingestion_run(*, run: IngestionRun, status: str, processed: int = 0, created: int = 0, updated: int = 0, failed: int = 0) -> IngestionRun:
def finish_ingestion_run(
*,
run: IngestionRun,
status: str,
processed: int = 0,
created: int = 0,
updated: int = 0,
failed: int = 0,
error_summary: str = "",
) -> IngestionRun:
run.status = status
run.records_processed = processed
run.records_created = created
run.records_updated = updated
run.records_failed = failed
run.error_summary = error_summary
run.finished_at = timezone.now()
run.save(
update_fields=[
@ -28,12 +41,37 @@ def finish_ingestion_run(*, run: IngestionRun, status: str, processed: int = 0,
"records_created",
"records_updated",
"records_failed",
"error_summary",
"finished_at",
]
)
return run
def mark_ingestion_run_skipped(*, provider_namespace: str, job_type: str, reason: str, context: dict | None = None) -> IngestionRun:
now = timezone.now()
run = IngestionRun.objects.create(
provider_namespace=provider_namespace,
job_type=job_type,
status=IngestionRun.RunStatus.CANCELED,
started_at=now,
finished_at=now,
error_summary=reason,
context=context or {},
)
return run
def has_running_ingestion_run(*, provider_namespace: str, job_type: str, within_minutes: int) -> bool:
cutoff = timezone.now() - timedelta(minutes=max(within_minutes, 1))
return IngestionRun.objects.filter(
provider_namespace=provider_namespace,
job_type=job_type,
status=IngestionRun.RunStatus.RUNNING,
started_at__gte=cutoff,
).filter(Q(finished_at__isnull=True) | Q(finished_at__gte=cutoff)).exists()
def log_ingestion_error(*, run: IngestionRun, message: str, provider_namespace: str, severity: str = IngestionError.Severity.ERROR, entity_type: str = "", external_id: str = "", raw_payload: dict | None = None) -> IngestionError:
return IngestionError.objects.create(
ingestion_run=run,

View File

@ -427,6 +427,12 @@ def run_sync_job(
context=context or {},
)
summary = SyncSummary()
logger.info(
"Starting ingestion run id=%s provider=%s job_type=%s",
run.id,
provider_namespace,
job_type,
)
try:
provider = get_provider(provider_namespace)
@ -444,6 +450,9 @@ def run_sync_job(
_sync_player_stats(provider_namespace, payload.get("player_stats", []), run, summary)
_sync_player_careers(provider_namespace, payload.get("player_careers", []), run, summary)
success_error_summary = ""
if summary.failed > 0:
success_error_summary = f"Completed with {summary.failed} failed record(s)."
finish_ingestion_run(
run=run,
status=IngestionRun.RunStatus.SUCCESS,
@ -451,6 +460,16 @@ def run_sync_job(
created=summary.created,
updated=summary.updated,
failed=summary.failed,
error_summary=success_error_summary,
)
logger.info(
"Completed ingestion run id=%s status=%s processed=%s created=%s updated=%s failed=%s",
run.id,
IngestionRun.RunStatus.SUCCESS,
summary.processed,
summary.created,
summary.updated,
summary.failed,
)
return run
@ -471,6 +490,7 @@ def run_sync_job(
created=summary.created,
updated=summary.updated,
failed=summary.failed + 1,
error_summary=f"Rate limit from provider: {exc}",
)
raise
@ -490,6 +510,7 @@ def run_sync_job(
created=summary.created,
updated=summary.updated,
failed=summary.failed + 1,
error_summary=f"Transient provider error: {exc}",
)
raise
@ -509,5 +530,6 @@ def run_sync_job(
created=summary.created,
updated=summary.updated,
failed=summary.failed + 1,
error_summary=f"Unhandled ingestion error: {exc}",
)
raise