feat(data): integrate 9 UK government data sources via Kestra
Some checks failed
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Trigger Portainer Update (push) Has been cancelled
Build and Push Docker Images / Build Frontend (Next.js) (push) Has been cancelled

Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.

Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
  ofsted_parent_view, school_census, admissions, sen_detail, phonics,
  school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date

Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
  sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry

Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
  survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
  Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-24 11:44:04 +00:00
parent c49593d4d6
commit dd49ef28b2
36 changed files with 2849 additions and 8 deletions

View File

@@ -16,7 +16,11 @@ from sqlalchemy.orm import joinedload, Session
from .config import settings
from .database import SessionLocal, get_db_session
from .models import School, SchoolResult
from .models import (
School, SchoolResult,
OfstedInspection, OfstedParentView, SchoolCensus,
SchoolAdmissions, SenDetail, Phonics, SchoolDeprivation, SchoolFinance,
)
from .schemas import SCHOOL_TYPE_MAP
# Cache for user search postcode geocoding (not for school data)
@@ -381,6 +385,12 @@ def school_to_dict(school: School, include_results: bool = False) -> dict:
"postcode": school.postcode,
"latitude": school.latitude,
"longitude": school.longitude,
# GIAS fields
"website": school.website,
"headteacher_name": school.headteacher_name,
"capacity": school.capacity,
"trust_name": school.trust_name,
"gender": school.gender,
}
if include_results and school.results:
@@ -455,8 +465,25 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
# Query all schools with their results
schools = db.query(School).options(joinedload(School.results)).all()
# Load Ofsted data into a lookup dict (urn → grade, date)
ofsted_lookup: Dict[int, dict] = {}
try:
ofsted_rows = db.query(
OfstedInspection.urn,
OfstedInspection.overall_effectiveness,
OfstedInspection.inspection_date,
).all()
for o in ofsted_rows:
ofsted_lookup[o.urn] = {
"ofsted_grade": o.overall_effectiveness,
"ofsted_date": o.inspection_date.isoformat() if o.inspection_date else None,
}
except Exception:
pass # Table may not exist yet on first run
rows = []
for school in schools:
ofsted = ofsted_lookup.get(school.urn, {})
for result in school.results:
row = {
"urn": school.urn,
@@ -468,6 +495,15 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
"postcode": school.postcode,
"latitude": school.latitude,
"longitude": school.longitude,
# GIAS fields
"website": school.website,
"headteacher_name": school.headteacher_name,
"capacity": school.capacity,
"trust_name": school.trust_name,
"gender": school.gender,
# Ofsted (for list view)
"ofsted_grade": ofsted.get("ofsted_grade"),
"ofsted_date": ofsted.get("ofsted_date"),
**result_to_dict(result)
}
rows.append(row)
@@ -511,3 +547,126 @@ def clear_cache():
"""Clear all caches."""
global _df_cache
_df_cache = None
def get_supplementary_data(db: Session, urn: int) -> dict:
"""
Fetch all supplementary data for a single school URN.
Returns a dict with keys: ofsted, parent_view, census, admissions, sen_detail,
phonics, deprivation, finance. Values are dicts or None.
"""
result = {}
def safe_query(model, pk_field, latest_year_field=None):
try:
if latest_year_field:
row = (
db.query(model)
.filter(getattr(model, pk_field) == urn)
.order_by(getattr(model, latest_year_field).desc())
.first()
)
else:
row = db.query(model).filter(getattr(model, pk_field) == urn).first()
return row
except Exception:
return None
# Ofsted inspection
o = safe_query(OfstedInspection, "urn")
result["ofsted"] = {
"overall_effectiveness": o.overall_effectiveness,
"quality_of_education": o.quality_of_education,
"behaviour_attitudes": o.behaviour_attitudes,
"personal_development": o.personal_development,
"leadership_management": o.leadership_management,
"early_years_provision": o.early_years_provision,
"previous_overall": o.previous_overall,
"inspection_date": o.inspection_date.isoformat() if o.inspection_date else None,
"inspection_type": o.inspection_type,
} if o else None
# Parent View
pv = safe_query(OfstedParentView, "urn")
result["parent_view"] = {
"survey_date": pv.survey_date.isoformat() if pv.survey_date else None,
"total_responses": pv.total_responses,
"q_happy_pct": pv.q_happy_pct,
"q_safe_pct": pv.q_safe_pct,
"q_behaviour_pct": pv.q_behaviour_pct,
"q_bullying_pct": pv.q_bullying_pct,
"q_communication_pct": pv.q_communication_pct,
"q_progress_pct": pv.q_progress_pct,
"q_teaching_pct": pv.q_teaching_pct,
"q_information_pct": pv.q_information_pct,
"q_curriculum_pct": pv.q_curriculum_pct,
"q_future_pct": pv.q_future_pct,
"q_leadership_pct": pv.q_leadership_pct,
"q_wellbeing_pct": pv.q_wellbeing_pct,
"q_recommend_pct": pv.q_recommend_pct,
"q_sen_pct": pv.q_sen_pct,
} if pv else None
# School Census (latest year)
c = safe_query(SchoolCensus, "urn", "year")
result["census"] = {
"year": c.year,
"class_size_avg": c.class_size_avg,
"ethnicity_white_pct": c.ethnicity_white_pct,
"ethnicity_asian_pct": c.ethnicity_asian_pct,
"ethnicity_black_pct": c.ethnicity_black_pct,
"ethnicity_mixed_pct": c.ethnicity_mixed_pct,
"ethnicity_other_pct": c.ethnicity_other_pct,
} if c else None
# Admissions (latest year)
a = safe_query(SchoolAdmissions, "urn", "year")
result["admissions"] = {
"year": a.year,
"published_admission_number": a.published_admission_number,
"total_applications": a.total_applications,
"first_preference_offers_pct": a.first_preference_offers_pct,
"oversubscribed": a.oversubscribed,
} if a else None
# SEN Detail (latest year)
s = safe_query(SenDetail, "urn", "year")
result["sen_detail"] = {
"year": s.year,
"primary_need_speech_pct": s.primary_need_speech_pct,
"primary_need_autism_pct": s.primary_need_autism_pct,
"primary_need_mld_pct": s.primary_need_mld_pct,
"primary_need_spld_pct": s.primary_need_spld_pct,
"primary_need_semh_pct": s.primary_need_semh_pct,
"primary_need_physical_pct": s.primary_need_physical_pct,
"primary_need_other_pct": s.primary_need_other_pct,
} if s else None
# Phonics (latest year)
ph = safe_query(Phonics, "urn", "year")
result["phonics"] = {
"year": ph.year,
"year1_phonics_pct": ph.year1_phonics_pct,
"year2_phonics_pct": ph.year2_phonics_pct,
} if ph else None
# Deprivation
d = safe_query(SchoolDeprivation, "urn")
result["deprivation"] = {
"lsoa_code": d.lsoa_code,
"idaci_score": d.idaci_score,
"idaci_decile": d.idaci_decile,
} if d else None
# Finance (latest year)
f = safe_query(SchoolFinance, "urn", "year")
result["finance"] = {
"year": f.year,
"per_pupil_spend": f.per_pupil_spend,
"staff_cost_pct": f.staff_cost_pct,
"teacher_cost_pct": f.teacher_cost_pct,
"support_staff_cost_pct": f.support_staff_cost_pct,
"premises_cost_pct": f.premises_cost_pct,
} if f else None
return result