fix(v2-ingestion): harden LBA/BCL snapshot contract for public data

This commit is contained in:
Alfredo Di Stasio
2026-03-20 14:46:25 +01:00
parent 1aad6945c7
commit 6066d2a0bb
10 changed files with 339 additions and 31 deletions

View File

@ -14,13 +14,6 @@ REQUIRED_RECORD_FIELDS = {
"team_name",
"player_external_id",
"full_name",
"first_name",
"last_name",
"birth_date",
"nationality",
"height_cm",
"weight_kg",
"position",
"games_played",
"minutes_per_game",
"points_per_game",
@ -34,6 +27,16 @@ REQUIRED_RECORD_FIELDS = {
"ft_pct",
}
OPTIONAL_RECORD_FIELDS = {
"first_name",
"last_name",
"birth_date",
"nationality",
"height_cm",
"weight_kg",
"position",
}
ALLOWED_TOP_LEVEL_FIELDS = {
"source_name",
"snapshot_date",
@ -42,7 +45,7 @@ ALLOWED_TOP_LEVEL_FIELDS = {
"raw_payload",
}
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | {
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | {
"role",
"source_metadata",
"raw_payload",
@ -69,6 +72,15 @@ class SnapshotSchemaValidator:
raise SnapshotValidationError(f"{field} must be a non-empty string")
return value.strip()
@staticmethod
def _optional_string(value: Any, field: str) -> str | None:
if value in (None, ""):
return None
if not isinstance(value, str):
raise SnapshotValidationError(f"{field} must be a string when provided")
stripped = value.strip()
return stripped or None
@staticmethod
def _require_non_negative_int(value: Any, field: str) -> int:
if isinstance(value, bool):
@ -81,6 +93,12 @@ class SnapshotSchemaValidator:
raise SnapshotValidationError(f"{field} must be a non-negative integer")
return parsed
@classmethod
def _optional_non_negative_int(cls, value: Any, field: str) -> int | None:
if value in (None, ""):
return None
return cls._require_non_negative_int(value, field)
@staticmethod
def _require_float(value: Any, field: str) -> float:
try:
@ -112,23 +130,26 @@ class SnapshotSchemaValidator:
"team_name",
"player_external_id",
"full_name",
"first_name",
"last_name",
"nationality",
"position",
):
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
for field in ("first_name", "last_name", "nationality", "position"):
normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}")
if record.get("role") is not None:
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
birth_date = parse_date(str(record.get("birth_date")))
if not birth_date:
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
normalized["birth_date"] = birth_date.isoformat()
birth_date_raw = record.get("birth_date")
if birth_date_raw in (None, ""):
normalized["birth_date"] = None
else:
birth_date = parse_date(str(birth_date_raw))
if not birth_date:
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
normalized["birth_date"] = birth_date.isoformat()
normalized["height_cm"] = cls._require_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
normalized["weight_kg"] = cls._require_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
for field in (