feat(v2): add snapshot extractor framework and run command

This commit is contained in:
Alfredo Di Stasio
2026-03-13 14:24:54 +01:00
parent 6fc583c79f
commit 850e4de71b
10 changed files with 796 additions and 0 deletions

View File

@ -156,6 +156,25 @@ STATIC_DATASET_FAILED_DIR = os.getenv(
os.getenv("SNAPSHOT_FAILED_DIR", str(BASE_DIR / "snapshots" / "failed")),
)
# v2 extractor framework runtime settings.
EXTRACTOR_USER_AGENT = os.getenv("EXTRACTOR_USER_AGENT", "HoopScoutBot/2.0 (+https://younerd.org)")
EXTRACTOR_HTTP_TIMEOUT_SECONDS = float(os.getenv("EXTRACTOR_HTTP_TIMEOUT_SECONDS", "15"))
EXTRACTOR_HTTP_RETRIES = int(os.getenv("EXTRACTOR_HTTP_RETRIES", "2"))
EXTRACTOR_RETRY_SLEEP_SECONDS = float(os.getenv("EXTRACTOR_RETRY_SLEEP_SECONDS", "1.0"))
EXTRACTOR_REQUEST_DELAY_SECONDS = float(os.getenv("EXTRACTOR_REQUEST_DELAY_SECONDS", "0.5"))
EXTRACTOR_PUBLIC_JSON_URL = os.getenv("EXTRACTOR_PUBLIC_JSON_URL", "").strip()
EXTRACTOR_PUBLIC_SOURCE_NAME = os.getenv("EXTRACTOR_PUBLIC_SOURCE_NAME", "public_json_source").strip()
EXTRACTOR_INCLUDE_RAW_PAYLOAD = env_bool("EXTRACTOR_INCLUDE_RAW_PAYLOAD", False)
if EXTRACTOR_HTTP_TIMEOUT_SECONDS <= 0:
raise ImproperlyConfigured("EXTRACTOR_HTTP_TIMEOUT_SECONDS must be > 0.")
if EXTRACTOR_HTTP_RETRIES < 0:
raise ImproperlyConfigured("EXTRACTOR_HTTP_RETRIES must be >= 0.")
if EXTRACTOR_RETRY_SLEEP_SECONDS < 0:
raise ImproperlyConfigured("EXTRACTOR_RETRY_SLEEP_SECONDS must be >= 0.")
if EXTRACTOR_REQUEST_DELAY_SECONDS < 0:
raise ImproperlyConfigured("EXTRACTOR_REQUEST_DELAY_SECONDS must be >= 0.")
# Optional scheduler command settings for future v2 snapshot jobs.
SCHEDULER_ENABLED = env_bool("SCHEDULER_ENABLED", False)
SCHEDULER_INTERVAL_SECONDS = int(os.getenv("SCHEDULER_INTERVAL_SECONDS", "900"))