diff --git a/integrator/flows/ks2.yml b/integrator/flows/ks2.yml index 646c785..d2b2178 100644 --- a/integrator/flows/ks2.yml +++ b/integrator/flows/ks2.yml @@ -10,7 +10,7 @@ tasks: uri: http://integrator:8001/run/ks2?action=load method: POST allowFailed: false - timeout: PT2H # polls backend every 30s; geocoding 15k schools takes up to 1h + timeout: PT30S # fire-and-forget; backend runs migration in background errors: - id: notify-failure diff --git a/integrator/scripts/sources/ks2.py b/integrator/scripts/sources/ks2.py index 21a9f7a..7bc2bdc 100644 --- a/integrator/scripts/sources/ks2.py +++ b/integrator/scripts/sources/ks2.py @@ -10,13 +10,10 @@ The CSV files must already be present in the data volume under /data/{year}/england_ks2final.csv (populated at deploy time from the repo's data/ directory). """ -import time import requests from config import BACKEND_URL, ADMIN_API_KEY HEADERS = {"X-API-Key": ADMIN_API_KEY} -POLL_INTERVAL = 30 # seconds between status checks -MAX_WAIT = 7200 # 2 hours def download(): @@ -26,33 +23,19 @@ def download(): def load(): - """Trigger full KS2 re-import and poll until complete.""" - start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true" - status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status" + """Trigger KS2 re-import on the backend and return immediately. - print(f"POST {start_url}") - resp = requests.post(start_url, headers=HEADERS, timeout=30) + The migration (including geocoding) runs as a background thread on the + backend and can take up to an hour. Poll GET /api/admin/reimport-ks2/status + to check progress, or simply wait for schools to appear in the UI. + """ + url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true" + print(f"POST {url}") + resp = requests.post(url, headers=HEADERS, timeout=30) resp.raise_for_status() - print(f"Started: {resp.json()}") - - print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...") - elapsed = 0 - while elapsed < MAX_WAIT: - time.sleep(POLL_INTERVAL) - elapsed += POLL_INTERVAL - - sr = requests.get(status_url, headers=HEADERS, timeout=15) - sr.raise_for_status() - state = sr.json() - print(f" [{elapsed // 60}m] {state}") - - if state.get("done"): - print("Re-import complete.") - return state - if not state.get("running"): - raise RuntimeError(f"Re-import stopped unexpectedly: {state}") - - raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes") + result = resp.json() + print(f"Result: {result}") + return result if __name__ == "__main__":