""" KS2 attainment data re-importer. Triggers a full re-import of the KS2 CSV data by calling the backend's admin endpoint. The backend owns the migration logic and CSV column mappings; this module is a thin trigger so the re-import can be orchestrated via Kestra like all other data sources. The CSV files must already be present in the data volume under /data/{year}/england_ks2final.csv (populated at deploy time from the repo's data/ directory). """ import time import requests from config import BACKEND_URL, ADMIN_API_KEY HEADERS = {"X-API-Key": ADMIN_API_KEY} POLL_INTERVAL = 30 # seconds between status checks MAX_WAIT = 7200 # 2 hours def download(): """No download step — CSVs are shipped with the repo.""" print("KS2 CSVs are bundled in the data volume; no download needed.") return {"skipped": True} def load(): """Trigger full KS2 re-import and poll until complete.""" start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true" status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status" print(f"POST {start_url}") resp = requests.post(start_url, headers=HEADERS, timeout=30) resp.raise_for_status() print(f"Started: {resp.json()}") print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...") elapsed = 0 while elapsed < MAX_WAIT: time.sleep(POLL_INTERVAL) elapsed += POLL_INTERVAL sr = requests.get(status_url, headers=HEADERS, timeout=15) sr.raise_for_status() state = sr.json() print(f" [{elapsed // 60}m] {state}") if state.get("done"): print("Re-import complete.") return state if not state.get("running"): raise RuntimeError(f"Re-import stopped unexpectedly: {state}") raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--action", choices=["download", "load", "all"], default="all") args = parser.parse_args() if args.action in ("download", "all"): download() if args.action in ("load", "all"): load()