school_compare/integrator/scripts/sources/ks2.py

"""
KS2 attainment data re-importer.

Triggers a full re-import of the KS2 CSV data by calling the backend's
admin endpoint. The backend owns the migration logic and CSV column mappings;
this module is a thin trigger so the re-import can be orchestrated via Kestra
like all other data sources.

The CSV files must already be present in the data volume under
  /data/{year}/england_ks2final.csv
(populated at deploy time from the repo's data/ directory).
"""
import requests
from config import BACKEND_URL, ADMIN_API_KEY

HEADERS = {"X-API-Key": ADMIN_API_KEY}


def download():
    """No download step — CSVs are shipped with the repo."""
    print("KS2 CSVs are bundled in the data volume; no download needed.")
    return {"skipped": True}


def load():
    """Trigger KS2 re-import on the backend and return immediately.

    The migration (including geocoding) runs as a background thread on the
    backend and can take up to an hour. Poll GET /api/admin/reimport-ks2/status
    to check progress, or simply wait for schools to appear in the UI.
    """
    url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
    print(f"POST {url}")
    resp = requests.post(url, headers=HEADERS, timeout=30)
    resp.raise_for_status()
    result = resp.json()
    print(f"Result: {result}")
    return result


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
    args = parser.parse_args()
    if args.action in ("download", "all"):
        download()
    if args.action in ("load", "all"):
        load()