Improve ingestion concurrency safety and batch transaction robustness

This commit is contained in:
Alfredo Di Stasio
2026-03-10 16:37:29 +01:00
parent abd3419aac
commit 2252821daf
5 changed files with 202 additions and 44 deletions

View File

@ -9,7 +9,12 @@ from django.utils.dateparse import parse_date
from apps.competitions.models import Competition, Season
from apps.ingestion.models import IngestionRun
from apps.ingestion.services.runs import finish_ingestion_run, log_ingestion_error, start_ingestion_run
from apps.ingestion.services.runs import (
finish_ingestion_run,
log_ingestion_error,
start_ingestion_run,
update_ingestion_run_progress,
)
from apps.players.models import Nationality, Player, PlayerAlias, PlayerCareerEntry, Position, Role
from apps.players.services.origin import refresh_player_origin
from apps.providers.exceptions import ProviderRateLimitError, ProviderTransientError
@ -441,14 +446,54 @@ def run_sync_job(
if job_type == IngestionRun.JobType.INCREMENTAL
else provider.sync_all()
)
source_counts = {
"competitions": len(payload.get("competitions", [])),
"teams": len(payload.get("teams", [])),
"seasons": len(payload.get("seasons", [])),
"players": len(payload.get("players", [])),
"player_stats": len(payload.get("player_stats", [])),
"player_careers": len(payload.get("player_careers", [])),
}
with transaction.atomic():
_sync_competitions(provider_namespace, payload.get("competitions", []), run, summary)
_sync_teams(provider_namespace, payload.get("teams", []), run, summary)
_sync_seasons(provider_namespace, payload.get("seasons", []), run, summary)
_sync_players(provider_namespace, payload.get("players", []), run, summary)
_sync_player_stats(provider_namespace, payload.get("player_stats", []), run, summary)
_sync_player_careers(provider_namespace, payload.get("player_careers", []), run, summary)
steps: list[tuple[str, callable, list[dict]]] = [
("competitions", _sync_competitions, payload.get("competitions", [])),
("teams", _sync_teams, payload.get("teams", [])),
("seasons", _sync_seasons, payload.get("seasons", [])),
("players", _sync_players, payload.get("players", [])),
("player_stats", _sync_player_stats, payload.get("player_stats", [])),
("player_careers", _sync_player_careers, payload.get("player_careers", [])),
]
for step_name, step_fn, step_payload in steps:
step_summary = SyncSummary()
with transaction.atomic():
step_fn(provider_namespace, step_payload, run, step_summary)
summary.processed += step_summary.processed
summary.created += step_summary.created
summary.updated += step_summary.updated
summary.failed += step_summary.failed
update_ingestion_run_progress(
run=run,
completed_step=step_name,
step_summary={
"processed": step_summary.processed,
"created": step_summary.created,
"updated": step_summary.updated,
"failed": step_summary.failed,
},
source_counts=source_counts,
)
logger.info(
"Completed ingestion step run_id=%s step=%s processed=%s created=%s updated=%s failed=%s",
run.id,
step_name,
step_summary.processed,
step_summary.created,
step_summary.updated,
step_summary.failed,
)
success_error_summary = ""
if summary.failed > 0: