fix(ks2): make reimport async with polling to avoid HTTP timeout
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
The geocoding pass over ~15k schools takes longer than any reasonable
HTTP timeout. New approach:
- POST /api/admin/reimport-ks2 starts migration in background thread,
returns {"status":"started"} immediately
- GET /api/admin/reimport-ks2/status returns {running, done}
- ks2.py polls status every 30s (max 2h) before returning
- Kestra flow timeout bumped to PT2H
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,10 +10,14 @@ The CSV files must already be present in the data volume under
|
||||
/data/{year}/england_ks2final.csv
|
||||
(populated at deploy time from the repo's data/ directory).
|
||||
"""
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
from config import BACKEND_URL, ADMIN_API_KEY
|
||||
|
||||
HEADERS = {"X-API-Key": ADMIN_API_KEY}
|
||||
POLL_INTERVAL = 30 # seconds between status checks
|
||||
MAX_WAIT = 7200 # 2 hours
|
||||
|
||||
|
||||
def download():
|
||||
"""No download step — CSVs are shipped with the repo."""
|
||||
@@ -22,18 +26,33 @@ def download():
|
||||
|
||||
|
||||
def load():
|
||||
"""Trigger full KS2 re-import via the backend admin endpoint (with geocoding)."""
|
||||
url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
|
||||
print(f"POST {url}")
|
||||
resp = requests.post(
|
||||
url,
|
||||
headers={"X-API-Key": ADMIN_API_KEY},
|
||||
timeout=900, # migration can take ~10 minutes
|
||||
)
|
||||
"""Trigger full KS2 re-import and poll until complete."""
|
||||
start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
|
||||
status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status"
|
||||
|
||||
print(f"POST {start_url}")
|
||||
resp = requests.post(start_url, headers=HEADERS, timeout=30)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
print(f"Result: {result}")
|
||||
return result
|
||||
print(f"Started: {resp.json()}")
|
||||
|
||||
print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...")
|
||||
elapsed = 0
|
||||
while elapsed < MAX_WAIT:
|
||||
time.sleep(POLL_INTERVAL)
|
||||
elapsed += POLL_INTERVAL
|
||||
|
||||
sr = requests.get(status_url, headers=HEADERS, timeout=15)
|
||||
sr.raise_for_status()
|
||||
state = sr.json()
|
||||
print(f" [{elapsed // 60}m] {state}")
|
||||
|
||||
if state.get("done"):
|
||||
print("Re-import complete.")
|
||||
return state
|
||||
if not state.get("running"):
|
||||
raise RuntimeError(f"Re-import stopped unexpectedly: {state}")
|
||||
|
||||
raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user