Files
hoopscout/apps/ingestion/models.py
2026-03-13 14:00:39 +01:00

168 lines
6.4 KiB
Python

from django.conf import settings
from django.db import models
class ImportRun(models.Model):
class RunStatus(models.TextChoices):
PENDING = "pending", "Pending"
RUNNING = "running", "Running"
SUCCESS = "success", "Success"
FAILED = "failed", "Failed"
CANCELED = "canceled", "Canceled"
source = models.CharField(max_length=80, default="snapshot")
status = models.CharField(max_length=24, choices=RunStatus.choices, default=RunStatus.PENDING)
triggered_by = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.SET_NULL,
blank=True,
null=True,
related_name="import_runs",
)
started_at = models.DateTimeField(blank=True, null=True)
finished_at = models.DateTimeField(blank=True, null=True)
files_total = models.PositiveIntegerField(default=0)
files_processed = models.PositiveIntegerField(default=0)
rows_total = models.PositiveIntegerField(default=0)
rows_upserted = models.PositiveIntegerField(default=0)
rows_failed = models.PositiveIntegerField(default=0)
error_summary = models.TextField(blank=True, default="")
context = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
indexes = [
models.Index(fields=["source", "status"]),
models.Index(fields=["created_at"]),
models.Index(fields=["started_at"]),
models.Index(fields=["finished_at"]),
]
def __str__(self) -> str:
return f"{self.source} | {self.status} | {self.created_at:%Y-%m-%d %H:%M}"
class ImportFile(models.Model):
class FileStatus(models.TextChoices):
PENDING = "pending", "Pending"
PROCESSING = "processing", "Processing"
SUCCESS = "success", "Success"
FAILED = "failed", "Failed"
SKIPPED = "skipped", "Skipped"
import_run = models.ForeignKey(
"ingestion.ImportRun",
on_delete=models.CASCADE,
related_name="files",
)
relative_path = models.CharField(max_length=260)
source_name = models.CharField(max_length=120, blank=True)
snapshot_date = models.DateField(blank=True, null=True)
status = models.CharField(max_length=24, choices=FileStatus.choices, default=FileStatus.PENDING)
checksum = models.CharField(max_length=128, blank=True)
file_size_bytes = models.PositiveBigIntegerField(blank=True, null=True)
rows_total = models.PositiveIntegerField(default=0)
rows_upserted = models.PositiveIntegerField(default=0)
rows_failed = models.PositiveIntegerField(default=0)
error_message = models.TextField(blank=True)
payload_preview = models.JSONField(default=dict, blank=True)
processed_at = models.DateTimeField(blank=True, null=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
constraints = [
models.UniqueConstraint(
fields=["import_run", "relative_path"],
name="uq_import_file_per_run_path",
),
]
indexes = [
models.Index(fields=["import_run", "status"]),
models.Index(fields=["relative_path"]),
models.Index(fields=["source_name", "snapshot_date"]),
models.Index(fields=["processed_at"]),
]
def __str__(self) -> str:
return f"{self.relative_path} [{self.status}]"
class IngestionRun(models.Model):
class RunStatus(models.TextChoices):
PENDING = "pending", "Pending"
RUNNING = "running", "Running"
SUCCESS = "success", "Success"
FAILED = "failed", "Failed"
CANCELED = "canceled", "Canceled"
class JobType(models.TextChoices):
FULL_SYNC = "full_sync", "Full Sync"
INCREMENTAL = "incremental", "Incremental"
MANUAL = "manual", "Manual"
provider_namespace = models.CharField(max_length=80)
job_type = models.CharField(max_length=32, choices=JobType.choices)
status = models.CharField(max_length=24, choices=RunStatus.choices, default=RunStatus.PENDING)
triggered_by = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.SET_NULL,
blank=True,
null=True,
related_name="ingestion_runs",
)
started_at = models.DateTimeField(blank=True, null=True)
finished_at = models.DateTimeField(blank=True, null=True)
records_processed = models.PositiveIntegerField(default=0)
records_created = models.PositiveIntegerField(default=0)
records_updated = models.PositiveIntegerField(default=0)
records_failed = models.PositiveIntegerField(default=0)
error_summary = models.TextField(blank=True, default="")
context = models.JSONField(default=dict, blank=True)
raw_payload = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-created_at"]
indexes = [
models.Index(fields=["provider_namespace", "status"]),
models.Index(fields=["provider_namespace", "job_type"]),
models.Index(fields=["started_at"]),
models.Index(fields=["finished_at"]),
]
def __str__(self) -> str:
return f"{self.provider_namespace} | {self.job_type} | {self.status}"
class IngestionError(models.Model):
class Severity(models.TextChoices):
WARNING = "warning", "Warning"
ERROR = "error", "Error"
CRITICAL = "critical", "Critical"
ingestion_run = models.ForeignKey(
"ingestion.IngestionRun",
on_delete=models.CASCADE,
related_name="errors",
)
provider_namespace = models.CharField(max_length=80)
entity_type = models.CharField(max_length=80, blank=True)
external_id = models.CharField(max_length=160, blank=True)
severity = models.CharField(max_length=16, choices=Severity.choices, default=Severity.ERROR)
message = models.TextField()
raw_payload = models.JSONField(default=dict, blank=True)
occurred_at = models.DateTimeField(auto_now_add=True)
class Meta:
ordering = ["-occurred_at"]
indexes = [
models.Index(fields=["ingestion_run", "occurred_at"]),
models.Index(fields=["provider_namespace", "entity_type", "external_id"]),
models.Index(fields=["severity"]),
]
def __str__(self) -> str:
return f"{self.provider_namespace}:{self.entity_type} [{self.severity}]"