Files
school_compare/integrator/scripts/sources/ks2.py

67 lines
2.2 KiB
Python
Raw Normal View History

"""
KS2 attainment data re-importer.
Triggers a full re-import of the KS2 CSV data by calling the backend's
admin endpoint. The backend owns the migration logic and CSV column mappings;
this module is a thin trigger so the re-import can be orchestrated via Kestra
like all other data sources.
The CSV files must already be present in the data volume under
/data/{year}/england_ks2final.csv
(populated at deploy time from the repo's data/ directory).
"""
import time
import requests
from config import BACKEND_URL, ADMIN_API_KEY
HEADERS = {"X-API-Key": ADMIN_API_KEY}
POLL_INTERVAL = 30 # seconds between status checks
MAX_WAIT = 7200 # 2 hours
def download():
"""No download step — CSVs are shipped with the repo."""
print("KS2 CSVs are bundled in the data volume; no download needed.")
return {"skipped": True}
def load():
"""Trigger full KS2 re-import and poll until complete."""
start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status"
print(f"POST {start_url}")
resp = requests.post(start_url, headers=HEADERS, timeout=30)
resp.raise_for_status()
print(f"Started: {resp.json()}")
print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...")
elapsed = 0
while elapsed < MAX_WAIT:
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
sr = requests.get(status_url, headers=HEADERS, timeout=15)
sr.raise_for_status()
state = sr.json()
print(f" [{elapsed // 60}m] {state}")
if state.get("done"):
print("Re-import complete.")
return state
if not state.get("running"):
raise RuntimeError(f"Re-import stopped unexpectedly: {state}")
raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
args = parser.parse_args()
if args.action in ("download", "all"):
download()
if args.action in ("load", "all"):
load()