fix(v2-ingestion): harden LBA/BCL snapshot contract for public data
This commit is contained in:
@ -16,6 +16,28 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||
return None
|
||||
|
||||
|
||||
ESSENTIAL_FIELDS = {
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
"season",
|
||||
"team_external_id",
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"games_played",
|
||||
"minutes_per_game",
|
||||
"points_per_game",
|
||||
"rebounds_per_game",
|
||||
"assists_per_game",
|
||||
"steals_per_game",
|
||||
"blocks_per_game",
|
||||
"turnovers_per_game",
|
||||
"fg_pct",
|
||||
"three_pt_pct",
|
||||
"ft_pct",
|
||||
}
|
||||
|
||||
|
||||
class BCLSnapshotExtractor(BaseSnapshotExtractor):
|
||||
"""
|
||||
Basketball Champions League MVP extractor.
|
||||
@ -122,7 +144,7 @@ class BCLSnapshotExtractor(BaseSnapshotExtractor):
|
||||
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
|
||||
}
|
||||
|
||||
missing = [key for key, value in normalized.items() if key != "role" and value in (None, "")]
|
||||
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
|
||||
if missing:
|
||||
raise ExtractorNormalizationError(f"bcl row missing required fields: {', '.join(sorted(missing))}")
|
||||
|
||||
|
||||
@ -16,6 +16,28 @@ def _first_non_empty(record: dict[str, Any], *keys: str) -> Any:
|
||||
return None
|
||||
|
||||
|
||||
ESSENTIAL_FIELDS = {
|
||||
"competition_external_id",
|
||||
"competition_name",
|
||||
"season",
|
||||
"team_external_id",
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"games_played",
|
||||
"minutes_per_game",
|
||||
"points_per_game",
|
||||
"rebounds_per_game",
|
||||
"assists_per_game",
|
||||
"steals_per_game",
|
||||
"blocks_per_game",
|
||||
"turnovers_per_game",
|
||||
"fg_pct",
|
||||
"three_pt_pct",
|
||||
"ft_pct",
|
||||
}
|
||||
|
||||
|
||||
class LBASnapshotExtractor(BaseSnapshotExtractor):
|
||||
"""
|
||||
LBA (Lega Basket Serie A) MVP extractor.
|
||||
@ -122,7 +144,7 @@ class LBASnapshotExtractor(BaseSnapshotExtractor):
|
||||
"ft_pct": _first_non_empty(source_record, "ft_pct", "ft_percentage"),
|
||||
}
|
||||
|
||||
missing = [key for key, value in normalized.items() if key != "role" and value in (None, "")]
|
||||
missing = [key for key in ESSENTIAL_FIELDS if normalized.get(key) in (None, "")]
|
||||
if missing:
|
||||
raise ExtractorNormalizationError(f"lba row missing required fields: {', '.join(sorted(missing))}")
|
||||
|
||||
|
||||
@ -62,6 +62,21 @@ def _parse_season_dates(label: str) -> tuple[date, date]:
|
||||
return date(year, 9, 1), date(year + 1, 7, 31)
|
||||
|
||||
|
||||
def _parse_optional_birth_date(value: str | None) -> date | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return parse_date(value)
|
||||
|
||||
|
||||
def _split_name_parts(full_name: str) -> tuple[str, str]:
|
||||
parts = full_name.strip().split(maxsplit=1)
|
||||
if not parts:
|
||||
return "", ""
|
||||
if len(parts) == 1:
|
||||
return parts[0], ""
|
||||
return parts[0], parts[1]
|
||||
|
||||
|
||||
def _resolve_nationality(value: str | None) -> Nationality | None:
|
||||
if not value:
|
||||
return None
|
||||
@ -152,10 +167,13 @@ def _upsert_record(record: dict[str, Any], *, source_name: str, snapshot_date: d
|
||||
},
|
||||
)
|
||||
|
||||
position, _ = Position.objects.get_or_create(
|
||||
code=_position_code(record["position"]),
|
||||
defaults={"name": record["position"]},
|
||||
)
|
||||
position_value = record.get("position")
|
||||
position = None
|
||||
if position_value:
|
||||
position, _ = Position.objects.get_or_create(
|
||||
code=_position_code(position_value),
|
||||
defaults={"name": position_value},
|
||||
)
|
||||
role = None
|
||||
if record.get("role"):
|
||||
role, _ = Role.objects.get_or_create(
|
||||
@ -163,19 +181,24 @@ def _upsert_record(record: dict[str, Any], *, source_name: str, snapshot_date: d
|
||||
defaults={"name": record["role"]},
|
||||
)
|
||||
|
||||
first_name = record.get("first_name") or ""
|
||||
last_name = record.get("last_name") or ""
|
||||
if not first_name and not last_name:
|
||||
first_name, last_name = _split_name_parts(record["full_name"])
|
||||
|
||||
player, _ = Player.objects.update_or_create(
|
||||
source_name=source_key,
|
||||
source_uid=record["player_external_id"],
|
||||
defaults={
|
||||
"first_name": record["first_name"],
|
||||
"last_name": record["last_name"],
|
||||
"first_name": first_name,
|
||||
"last_name": last_name,
|
||||
"full_name": record["full_name"],
|
||||
"birth_date": parse_date(record["birth_date"]),
|
||||
"birth_date": _parse_optional_birth_date(record.get("birth_date")),
|
||||
"nationality": _resolve_nationality(record.get("nationality")),
|
||||
"nominal_position": position,
|
||||
"inferred_role": role,
|
||||
"height_cm": record["height_cm"],
|
||||
"weight_kg": record["weight_kg"],
|
||||
"height_cm": record.get("height_cm"),
|
||||
"weight_kg": record.get("weight_kg"),
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
|
||||
@ -14,13 +14,6 @@ REQUIRED_RECORD_FIELDS = {
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"birth_date",
|
||||
"nationality",
|
||||
"height_cm",
|
||||
"weight_kg",
|
||||
"position",
|
||||
"games_played",
|
||||
"minutes_per_game",
|
||||
"points_per_game",
|
||||
@ -34,6 +27,16 @@ REQUIRED_RECORD_FIELDS = {
|
||||
"ft_pct",
|
||||
}
|
||||
|
||||
OPTIONAL_RECORD_FIELDS = {
|
||||
"first_name",
|
||||
"last_name",
|
||||
"birth_date",
|
||||
"nationality",
|
||||
"height_cm",
|
||||
"weight_kg",
|
||||
"position",
|
||||
}
|
||||
|
||||
ALLOWED_TOP_LEVEL_FIELDS = {
|
||||
"source_name",
|
||||
"snapshot_date",
|
||||
@ -42,7 +45,7 @@ ALLOWED_TOP_LEVEL_FIELDS = {
|
||||
"raw_payload",
|
||||
}
|
||||
|
||||
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | {
|
||||
ALLOWED_RECORD_FIELDS = REQUIRED_RECORD_FIELDS | OPTIONAL_RECORD_FIELDS | {
|
||||
"role",
|
||||
"source_metadata",
|
||||
"raw_payload",
|
||||
@ -69,6 +72,15 @@ class SnapshotSchemaValidator:
|
||||
raise SnapshotValidationError(f"{field} must be a non-empty string")
|
||||
return value.strip()
|
||||
|
||||
@staticmethod
|
||||
def _optional_string(value: Any, field: str) -> str | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if not isinstance(value, str):
|
||||
raise SnapshotValidationError(f"{field} must be a string when provided")
|
||||
stripped = value.strip()
|
||||
return stripped or None
|
||||
|
||||
@staticmethod
|
||||
def _require_non_negative_int(value: Any, field: str) -> int:
|
||||
if isinstance(value, bool):
|
||||
@ -81,6 +93,12 @@ class SnapshotSchemaValidator:
|
||||
raise SnapshotValidationError(f"{field} must be a non-negative integer")
|
||||
return parsed
|
||||
|
||||
@classmethod
|
||||
def _optional_non_negative_int(cls, value: Any, field: str) -> int | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
return cls._require_non_negative_int(value, field)
|
||||
|
||||
@staticmethod
|
||||
def _require_float(value: Any, field: str) -> float:
|
||||
try:
|
||||
@ -112,23 +130,26 @@ class SnapshotSchemaValidator:
|
||||
"team_name",
|
||||
"player_external_id",
|
||||
"full_name",
|
||||
"first_name",
|
||||
"last_name",
|
||||
"nationality",
|
||||
"position",
|
||||
):
|
||||
normalized[field] = cls._require_string(record.get(field), f"record[{index}].{field}")
|
||||
|
||||
for field in ("first_name", "last_name", "nationality", "position"):
|
||||
normalized[field] = cls._optional_string(record.get(field), f"record[{index}].{field}")
|
||||
|
||||
if record.get("role") is not None:
|
||||
normalized["role"] = cls._require_string(record.get("role"), f"record[{index}].role")
|
||||
|
||||
birth_date = parse_date(str(record.get("birth_date")))
|
||||
if not birth_date:
|
||||
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
||||
normalized["birth_date"] = birth_date.isoformat()
|
||||
birth_date_raw = record.get("birth_date")
|
||||
if birth_date_raw in (None, ""):
|
||||
normalized["birth_date"] = None
|
||||
else:
|
||||
birth_date = parse_date(str(birth_date_raw))
|
||||
if not birth_date:
|
||||
raise SnapshotValidationError(f"record[{index}].birth_date must be YYYY-MM-DD")
|
||||
normalized["birth_date"] = birth_date.isoformat()
|
||||
|
||||
normalized["height_cm"] = cls._require_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
||||
normalized["weight_kg"] = cls._require_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
||||
normalized["height_cm"] = cls._optional_non_negative_int(record.get("height_cm"), f"record[{index}].height_cm")
|
||||
normalized["weight_kg"] = cls._optional_non_negative_int(record.get("weight_kg"), f"record[{index}].weight_kg")
|
||||
normalized["games_played"] = cls._require_non_negative_int(record.get("games_played"), f"record[{index}].games_played")
|
||||
|
||||
for field in (
|
||||
|
||||
Reference in New Issue
Block a user