Improve ingestion concurrency safety and batch transaction robustness
This commit is contained in:
@ -4,7 +4,7 @@ from celery import shared_task
|
||||
from django.conf import settings
|
||||
|
||||
from apps.ingestion.models import IngestionRun
|
||||
from apps.ingestion.services.runs import has_running_ingestion_run, mark_ingestion_run_skipped
|
||||
from apps.ingestion.services.runs import ingestion_advisory_lock, mark_ingestion_run_skipped
|
||||
from apps.ingestion.services.sync import run_sync_job
|
||||
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
|
||||
from apps.providers.registry import get_default_provider_namespace
|
||||
@ -21,23 +21,29 @@ def _run_sync_with_overlap_guard(
|
||||
cursor: str | None = None,
|
||||
):
|
||||
effective_context = context or {}
|
||||
if settings.INGESTION_PREVENT_OVERLAP and has_running_ingestion_run(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
within_minutes=settings.INGESTION_OVERLAP_WINDOW_MINUTES,
|
||||
):
|
||||
reason = (
|
||||
f"Skipped due to overlapping running job for provider={provider_namespace}, "
|
||||
f"job_type={job_type}."
|
||||
)
|
||||
logger.warning(reason)
|
||||
run = mark_ingestion_run_skipped(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
reason=reason,
|
||||
context=effective_context,
|
||||
)
|
||||
return run.id
|
||||
if settings.INGESTION_PREVENT_OVERLAP:
|
||||
with ingestion_advisory_lock(provider_namespace=provider_namespace, job_type=job_type) as acquired:
|
||||
if not acquired:
|
||||
reason = (
|
||||
f"Skipped due to advisory lock for provider={provider_namespace}, "
|
||||
f"job_type={job_type}."
|
||||
)
|
||||
logger.warning(reason)
|
||||
run = mark_ingestion_run_skipped(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
reason=reason,
|
||||
context=effective_context,
|
||||
)
|
||||
return run.id
|
||||
|
||||
return run_sync_job(
|
||||
provider_namespace=provider_namespace,
|
||||
job_type=job_type,
|
||||
triggered_by_id=triggered_by_id,
|
||||
context=effective_context,
|
||||
cursor=cursor,
|
||||
).id
|
||||
|
||||
return run_sync_job(
|
||||
provider_namespace=provider_namespace,
|
||||
|
||||
Reference in New Issue
Block a user