Files
school_compare/integrator/scripts/sources/ks2.py
Tudor d00dc699cc
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m8s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
fix(ks2): fire-and-forget instead of polling to avoid socket timeout
Kestra's HTTP client socket read timeout is shorter than any reasonable
wait for a full geocoded migration. POST /api/admin/reimport-ks2 returns
immediately with {status:started}; the backend runs the job in a thread.
Check GET /api/admin/reimport-ks2/status or watch the UI for schools.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 21:21:31 +00:00

50 lines
1.6 KiB
Python

"""
KS2 attainment data re-importer.
Triggers a full re-import of the KS2 CSV data by calling the backend's
admin endpoint. The backend owns the migration logic and CSV column mappings;
this module is a thin trigger so the re-import can be orchestrated via Kestra
like all other data sources.
The CSV files must already be present in the data volume under
/data/{year}/england_ks2final.csv
(populated at deploy time from the repo's data/ directory).
"""
import requests
from config import BACKEND_URL, ADMIN_API_KEY
HEADERS = {"X-API-Key": ADMIN_API_KEY}
def download():
"""No download step — CSVs are shipped with the repo."""
print("KS2 CSVs are bundled in the data volume; no download needed.")
return {"skipped": True}
def load():
"""Trigger KS2 re-import on the backend and return immediately.
The migration (including geocoding) runs as a background thread on the
backend and can take up to an hour. Poll GET /api/admin/reimport-ks2/status
to check progress, or simply wait for schools to appear in the UI.
"""
url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
print(f"POST {url}")
resp = requests.post(url, headers=HEADERS, timeout=30)
resp.raise_for_status()
result = resp.json()
print(f"Result: {result}")
return result
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
args = parser.parse_args()
if args.action in ("download", "all"):
download()
if args.action in ("load", "all"):
load()