fix(ks2): make reimport async with polling to avoid HTTP timeout
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
The geocoding pass over ~15k schools takes longer than any reasonable
HTTP timeout. New approach:
- POST /api/admin/reimport-ks2 starts migration in background thread,
returns {"status":"started"} immediately
- GET /api/admin/reimport-ks2/status returns {running, done}
- ks2.py polls status every 30s (max 2h) before returning
- Kestra flow timeout bumped to PT2H
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -635,6 +635,9 @@ async def reload_data(
|
|||||||
return {"status": "reloaded"}
|
return {"status": "reloaded"}
|
||||||
|
|
||||||
|
|
||||||
|
_reimport_status: dict = {"running": False, "done": False, "error": None}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/admin/reimport-ks2")
|
@app.post("/api/admin/reimport-ks2")
|
||||||
@limiter.limit("2/minute")
|
@limiter.limit("2/minute")
|
||||||
async def reimport_ks2(
|
async def reimport_ks2(
|
||||||
@@ -643,19 +646,45 @@ async def reimport_ks2(
|
|||||||
_: bool = Depends(verify_admin_api_key)
|
_: bool = Depends(verify_admin_api_key)
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Re-run the full KS2 CSV migration (drop + reimport schools and results).
|
Start a full KS2 CSV migration in the background and return immediately.
|
||||||
Use when the database has been wiped and needs repopulating from the CSV files.
|
Poll GET /api/admin/reimport-ks2/status to check progress.
|
||||||
Pass ?geocode=false to skip postcode → lat/lng resolution.
|
Pass ?geocode=false to skip postcode → lat/lng resolution.
|
||||||
Runs synchronously — expect this to take several minutes.
|
|
||||||
Requires X-API-Key header with valid admin API key.
|
Requires X-API-Key header with valid admin API key.
|
||||||
"""
|
"""
|
||||||
loop = asyncio.get_event_loop()
|
global _reimport_status
|
||||||
success = await loop.run_in_executor(None, lambda: run_full_migration(geocode=geocode))
|
if _reimport_status["running"]:
|
||||||
|
return {"status": "already_running"}
|
||||||
|
|
||||||
|
_reimport_status = {"running": True, "done": False, "error": None}
|
||||||
|
|
||||||
|
def _run():
|
||||||
|
global _reimport_status
|
||||||
|
try:
|
||||||
|
success = run_full_migration(geocode=geocode)
|
||||||
if not success:
|
if not success:
|
||||||
raise HTTPException(status_code=500, detail="Migration failed: no CSV data found in data directory")
|
_reimport_status = {"running": False, "done": False, "error": "No CSV data found"}
|
||||||
|
return
|
||||||
clear_cache()
|
clear_cache()
|
||||||
load_school_data()
|
load_school_data()
|
||||||
return {"status": "reimported"}
|
_reimport_status = {"running": False, "done": True, "error": None}
|
||||||
|
except Exception as exc:
|
||||||
|
_reimport_status = {"running": False, "done": False, "error": str(exc)}
|
||||||
|
|
||||||
|
import threading
|
||||||
|
threading.Thread(target=_run, daemon=True).start()
|
||||||
|
return {"status": "started"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/admin/reimport-ks2/status")
|
||||||
|
async def reimport_ks2_status(
|
||||||
|
request: Request,
|
||||||
|
_: bool = Depends(verify_admin_api_key)
|
||||||
|
):
|
||||||
|
"""Poll this endpoint to check reimport progress."""
|
||||||
|
s = _reimport_status
|
||||||
|
if s["error"]:
|
||||||
|
raise HTTPException(status_code=500, detail=s["error"])
|
||||||
|
return {"running": s["running"], "done": s["done"]}
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ tasks:
|
|||||||
uri: http://integrator:8001/run/ks2?action=load
|
uri: http://integrator:8001/run/ks2?action=load
|
||||||
method: POST
|
method: POST
|
||||||
allowFailed: false
|
allowFailed: false
|
||||||
timeout: PT20M # full migration takes ~10 min; give headroom
|
timeout: PT2H # polls backend every 30s; geocoding 15k schools takes up to 1h
|
||||||
|
|
||||||
errors:
|
errors:
|
||||||
- id: notify-failure
|
- id: notify-failure
|
||||||
|
|||||||
@@ -10,10 +10,14 @@ The CSV files must already be present in the data volume under
|
|||||||
/data/{year}/england_ks2final.csv
|
/data/{year}/england_ks2final.csv
|
||||||
(populated at deploy time from the repo's data/ directory).
|
(populated at deploy time from the repo's data/ directory).
|
||||||
"""
|
"""
|
||||||
import sys
|
import time
|
||||||
import requests
|
import requests
|
||||||
from config import BACKEND_URL, ADMIN_API_KEY
|
from config import BACKEND_URL, ADMIN_API_KEY
|
||||||
|
|
||||||
|
HEADERS = {"X-API-Key": ADMIN_API_KEY}
|
||||||
|
POLL_INTERVAL = 30 # seconds between status checks
|
||||||
|
MAX_WAIT = 7200 # 2 hours
|
||||||
|
|
||||||
|
|
||||||
def download():
|
def download():
|
||||||
"""No download step — CSVs are shipped with the repo."""
|
"""No download step — CSVs are shipped with the repo."""
|
||||||
@@ -22,18 +26,33 @@ def download():
|
|||||||
|
|
||||||
|
|
||||||
def load():
|
def load():
|
||||||
"""Trigger full KS2 re-import via the backend admin endpoint (with geocoding)."""
|
"""Trigger full KS2 re-import and poll until complete."""
|
||||||
url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
|
start_url = f"{BACKEND_URL}/api/admin/reimport-ks2?geocode=true"
|
||||||
print(f"POST {url}")
|
status_url = f"{BACKEND_URL}/api/admin/reimport-ks2/status"
|
||||||
resp = requests.post(
|
|
||||||
url,
|
print(f"POST {start_url}")
|
||||||
headers={"X-API-Key": ADMIN_API_KEY},
|
resp = requests.post(start_url, headers=HEADERS, timeout=30)
|
||||||
timeout=900, # migration can take ~10 minutes
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json()
|
print(f"Started: {resp.json()}")
|
||||||
print(f"Result: {result}")
|
|
||||||
return result
|
print(f"Polling {status_url} every {POLL_INTERVAL}s (max {MAX_WAIT // 60} min)...")
|
||||||
|
elapsed = 0
|
||||||
|
while elapsed < MAX_WAIT:
|
||||||
|
time.sleep(POLL_INTERVAL)
|
||||||
|
elapsed += POLL_INTERVAL
|
||||||
|
|
||||||
|
sr = requests.get(status_url, headers=HEADERS, timeout=15)
|
||||||
|
sr.raise_for_status()
|
||||||
|
state = sr.json()
|
||||||
|
print(f" [{elapsed // 60}m] {state}")
|
||||||
|
|
||||||
|
if state.get("done"):
|
||||||
|
print("Re-import complete.")
|
||||||
|
return state
|
||||||
|
if not state.get("running"):
|
||||||
|
raise RuntimeError(f"Re-import stopped unexpectedly: {state}")
|
||||||
|
|
||||||
|
raise TimeoutError(f"KS2 re-import did not complete within {MAX_WAIT // 60} minutes")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user