feat(data): integrate 9 UK government data sources via Kestra
Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.
Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
ofsted_parent_view, school_census, admissions, sen_detail, phonics,
school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date
Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry
Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
70
integrator/server.py
Normal file
70
integrator/server.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Data integrator HTTP server.
|
||||
Kestra calls this server via HTTP tasks to trigger download/load operations.
|
||||
"""
|
||||
import importlib
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
sys.path.insert(0, "/app/scripts")
|
||||
|
||||
app = FastAPI(title="SchoolCompare Data Integrator", version="1.0.0")
|
||||
|
||||
SOURCES = {
|
||||
"ofsted", "gias", "parent_view",
|
||||
"census", "admissions", "sen_detail",
|
||||
"phonics", "idaci", "finance",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.post("/run/{source}")
|
||||
def run_source(source: str, action: str = "all"):
|
||||
"""
|
||||
Trigger a data source download and/or load.
|
||||
action: "download" | "load" | "all"
|
||||
"""
|
||||
if source not in SOURCES:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown source '{source}'. Available: {sorted(SOURCES)}")
|
||||
if action not in ("download", "load", "all"):
|
||||
raise HTTPException(status_code=400, detail="action must be 'download', 'load', or 'all'")
|
||||
|
||||
try:
|
||||
mod = importlib.import_module(f"sources.{source}")
|
||||
result = {}
|
||||
|
||||
if action in ("download", "all"):
|
||||
mod.download()
|
||||
|
||||
if action in ("load", "all"):
|
||||
result = mod.load()
|
||||
|
||||
return {"source": source, "action": action, "result": result}
|
||||
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
raise HTTPException(status_code=500, detail={"error": str(e), "traceback": tb})
|
||||
|
||||
|
||||
@app.post("/run-all")
|
||||
def run_all(action: str = "all"):
|
||||
"""Trigger all sources in sequence."""
|
||||
results = {}
|
||||
for source in sorted(SOURCES):
|
||||
try:
|
||||
mod = importlib.import_module(f"sources.{source}")
|
||||
if action in ("download", "all"):
|
||||
mod.download()
|
||||
if action in ("load", "all"):
|
||||
results[source] = mod.load()
|
||||
except Exception as e:
|
||||
results[source] = {"error": str(e)}
|
||||
return results
|
||||
Reference in New Issue
Block a user