All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
The geocoding pass over ~15k schools takes longer than any reasonable
HTTP timeout. New approach:
- POST /api/admin/reimport-ks2 starts migration in background thread,
returns {"status":"started"} immediately
- GET /api/admin/reimport-ks2/status returns {running, done}
- ks2.py polls status every 30s (max 2h) before returning
- Kestra flow timeout bumped to PT2H
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
"""
|
|
KS2 attainment data re-importer.
|
|
|
|
Triggers a full re-import of the KS2 CSV data by calling the backend's
|
|
admin endpoint. The backend owns the migration logic and CSV column mappings;
|
|
this module is a thin trigger so the re-import can be orchestrated via Kestra
|
|
like all other data sources.
|
|
|
|
The CSV files must already be present in the data volume under
|
|
/data/{year}/england_ks2final.csv
|
|
(populated at deploy time from the repo's data/ directory).
|
|
"""
|
|
import time
|
|
import requests
|
|
from config import BACKEND_URL, ADMIN_API_KEY
|
|
|
|
HEADERS = {"X-API-Key": ADMIN_API_KEY}
|
|
POLL_INTERVAL = 30 # seconds between status checks
|
|
MAX_WAIT = 7200 # 2 hours
|
|
|
|
|
|
def download():
|
|
"""No download step — CSVs are shipped with the repo."""
|
|
print("KS2 CSVs are bundled in the data volume; no download needed.")
|
|
return {"skipped": True}
|
|
|
|
|
|
def load():
|
|
"""Trigger full KS2 re-import and poll until complete."""
|
|
start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
|
|
status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status"
|
|
|
|
print(f"POST {start_url}")
|
|
resp = requests.post(start_url, headers=HEADERS, timeout=30)
|
|
resp.raise_for_status()
|
|
print(f"Started: {resp.json()}")
|
|
|
|
print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...")
|
|
elapsed = 0
|
|
while elapsed < MAX_WAIT:
|
|
time.sleep(POLL_INTERVAL)
|
|
elapsed += POLL_INTERVAL
|
|
|
|
sr = requests.get(status_url, headers=HEADERS, timeout=15)
|
|
sr.raise_for_status()
|
|
state = sr.json()
|
|
print(f" [{elapsed // 60}m] {state}")
|
|
|
|
if state.get("done"):
|
|
print("Re-import complete.")
|
|
return state
|
|
if not state.get("running"):
|
|
raise RuntimeError(f"Re-import stopped unexpectedly: {state}")
|
|
|
|
raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
|
args = parser.parse_args()
|
|
if args.action in ("download", "all"):
|
|
download()
|
|
if args.action in ("load", "all"):
|
|
load()
|