Improve ingestion concurrency safety and batch transaction robustness

This commit is contained in:
Alfredo Di Stasio
2026-03-10 16:37:29 +01:00
parent abd3419aac
commit 2252821daf
5 changed files with 202 additions and 44 deletions

View File

@ -4,7 +4,7 @@ from celery import shared_task
from django.conf import settings
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.runs import has_running_ingestion_run, mark_ingestion_run_skipped
from apps.ingestion.services.runs import ingestion_advisory_lock, mark_ingestion_run_skipped
from apps.ingestion.services.sync import run_sync_job
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
from apps.providers.registry import get_default_provider_namespace
@ -21,23 +21,29 @@ def _run_sync_with_overlap_guard(
cursor: str | None = None,
):
effective_context = context or {}
if settings.INGESTION_PREVENT_OVERLAP and has_running_ingestion_run(
provider_namespace=provider_namespace,
job_type=job_type,
within_minutes=settings.INGESTION_OVERLAP_WINDOW_MINUTES,
):
reason = (
f"Skipped due to overlapping running job for provider={provider_namespace}, "
f"job_type={job_type}."
)
logger.warning(reason)
run = mark_ingestion_run_skipped(
provider_namespace=provider_namespace,
job_type=job_type,
reason=reason,
context=effective_context,
)
return run.id
if settings.INGESTION_PREVENT_OVERLAP:
with ingestion_advisory_lock(provider_namespace=provider_namespace, job_type=job_type) as acquired:
if not acquired:
reason = (
f"Skipped due to advisory lock for provider={provider_namespace}, "
f"job_type={job_type}."
)
logger.warning(reason)
run = mark_ingestion_run_skipped(
provider_namespace=provider_namespace,
job_type=job_type,
reason=reason,
context=effective_context,
)
return run.id
return run_sync_job(
provider_namespace=provider_namespace,
job_type=job_type,
triggered_by_id=triggered_by_id,
context=effective_context,
cursor=cursor,
).id
return run_sync_job(
provider_namespace=provider_namespace,