feat(data): integrate 9 UK government data sources via Kestra
Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.
Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
ofsted_parent_view, school_census, admissions, sen_detail, phonics,
school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date
Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry
Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,7 @@ from .data_loader import (
|
||||
clear_cache,
|
||||
load_school_data,
|
||||
geocode_single_postcode,
|
||||
get_supplementary_data,
|
||||
)
|
||||
from .data_loader import get_data_info as get_db_info
|
||||
from .database import check_and_migrate_if_needed
|
||||
@@ -384,6 +385,16 @@ async def get_school_details(request: Request, urn: int):
|
||||
# Get latest info for the school
|
||||
latest = school_data.iloc[-1]
|
||||
|
||||
# Fetch supplementary data (Ofsted, Parent View, admissions, etc.)
|
||||
from .database import SessionLocal
|
||||
supplementary = {}
|
||||
try:
|
||||
db = SessionLocal()
|
||||
supplementary = get_supplementary_data(db, urn)
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"school_info": {
|
||||
"urn": urn,
|
||||
@@ -396,8 +407,23 @@ async def get_school_details(request: Request, urn: int):
|
||||
"latitude": latest.get("latitude"),
|
||||
"longitude": latest.get("longitude"),
|
||||
"phase": "Primary",
|
||||
# GIAS fields
|
||||
"website": latest.get("website"),
|
||||
"headteacher_name": latest.get("headteacher_name"),
|
||||
"capacity": latest.get("capacity"),
|
||||
"trust_name": latest.get("trust_name"),
|
||||
"gender": latest.get("gender"),
|
||||
},
|
||||
"yearly_data": clean_for_json(school_data),
|
||||
# Supplementary data (null if not yet populated by Kestra)
|
||||
"ofsted": supplementary.get("ofsted"),
|
||||
"parent_view": supplementary.get("parent_view"),
|
||||
"census": supplementary.get("census"),
|
||||
"admissions": supplementary.get("admissions"),
|
||||
"sen_detail": supplementary.get("sen_detail"),
|
||||
"phonics": supplementary.get("phonics"),
|
||||
"deprivation": supplementary.get("deprivation"),
|
||||
"finance": supplementary.get("finance"),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,11 @@ from sqlalchemy.orm import joinedload, Session
|
||||
|
||||
from .config import settings
|
||||
from .database import SessionLocal, get_db_session
|
||||
from .models import School, SchoolResult
|
||||
from .models import (
|
||||
School, SchoolResult,
|
||||
OfstedInspection, OfstedParentView, SchoolCensus,
|
||||
SchoolAdmissions, SenDetail, Phonics, SchoolDeprivation, SchoolFinance,
|
||||
)
|
||||
from .schemas import SCHOOL_TYPE_MAP
|
||||
|
||||
# Cache for user search postcode geocoding (not for school data)
|
||||
@@ -381,6 +385,12 @@ def school_to_dict(school: School, include_results: bool = False) -> dict:
|
||||
"postcode": school.postcode,
|
||||
"latitude": school.latitude,
|
||||
"longitude": school.longitude,
|
||||
# GIAS fields
|
||||
"website": school.website,
|
||||
"headteacher_name": school.headteacher_name,
|
||||
"capacity": school.capacity,
|
||||
"trust_name": school.trust_name,
|
||||
"gender": school.gender,
|
||||
}
|
||||
|
||||
if include_results and school.results:
|
||||
@@ -455,8 +465,25 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
||||
# Query all schools with their results
|
||||
schools = db.query(School).options(joinedload(School.results)).all()
|
||||
|
||||
# Load Ofsted data into a lookup dict (urn → grade, date)
|
||||
ofsted_lookup: Dict[int, dict] = {}
|
||||
try:
|
||||
ofsted_rows = db.query(
|
||||
OfstedInspection.urn,
|
||||
OfstedInspection.overall_effectiveness,
|
||||
OfstedInspection.inspection_date,
|
||||
).all()
|
||||
for o in ofsted_rows:
|
||||
ofsted_lookup[o.urn] = {
|
||||
"ofsted_grade": o.overall_effectiveness,
|
||||
"ofsted_date": o.inspection_date.isoformat() if o.inspection_date else None,
|
||||
}
|
||||
except Exception:
|
||||
pass # Table may not exist yet on first run
|
||||
|
||||
rows = []
|
||||
for school in schools:
|
||||
ofsted = ofsted_lookup.get(school.urn, {})
|
||||
for result in school.results:
|
||||
row = {
|
||||
"urn": school.urn,
|
||||
@@ -468,6 +495,15 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
||||
"postcode": school.postcode,
|
||||
"latitude": school.latitude,
|
||||
"longitude": school.longitude,
|
||||
# GIAS fields
|
||||
"website": school.website,
|
||||
"headteacher_name": school.headteacher_name,
|
||||
"capacity": school.capacity,
|
||||
"trust_name": school.trust_name,
|
||||
"gender": school.gender,
|
||||
# Ofsted (for list view)
|
||||
"ofsted_grade": ofsted.get("ofsted_grade"),
|
||||
"ofsted_date": ofsted.get("ofsted_date"),
|
||||
**result_to_dict(result)
|
||||
}
|
||||
rows.append(row)
|
||||
@@ -511,3 +547,126 @@ def clear_cache():
|
||||
"""Clear all caches."""
|
||||
global _df_cache
|
||||
_df_cache = None
|
||||
|
||||
|
||||
def get_supplementary_data(db: Session, urn: int) -> dict:
|
||||
"""
|
||||
Fetch all supplementary data for a single school URN.
|
||||
Returns a dict with keys: ofsted, parent_view, census, admissions, sen_detail,
|
||||
phonics, deprivation, finance. Values are dicts or None.
|
||||
"""
|
||||
result = {}
|
||||
|
||||
def safe_query(model, pk_field, latest_year_field=None):
|
||||
try:
|
||||
if latest_year_field:
|
||||
row = (
|
||||
db.query(model)
|
||||
.filter(getattr(model, pk_field) == urn)
|
||||
.order_by(getattr(model, latest_year_field).desc())
|
||||
.first()
|
||||
)
|
||||
else:
|
||||
row = db.query(model).filter(getattr(model, pk_field) == urn).first()
|
||||
return row
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# Ofsted inspection
|
||||
o = safe_query(OfstedInspection, "urn")
|
||||
result["ofsted"] = {
|
||||
"overall_effectiveness": o.overall_effectiveness,
|
||||
"quality_of_education": o.quality_of_education,
|
||||
"behaviour_attitudes": o.behaviour_attitudes,
|
||||
"personal_development": o.personal_development,
|
||||
"leadership_management": o.leadership_management,
|
||||
"early_years_provision": o.early_years_provision,
|
||||
"previous_overall": o.previous_overall,
|
||||
"inspection_date": o.inspection_date.isoformat() if o.inspection_date else None,
|
||||
"inspection_type": o.inspection_type,
|
||||
} if o else None
|
||||
|
||||
# Parent View
|
||||
pv = safe_query(OfstedParentView, "urn")
|
||||
result["parent_view"] = {
|
||||
"survey_date": pv.survey_date.isoformat() if pv.survey_date else None,
|
||||
"total_responses": pv.total_responses,
|
||||
"q_happy_pct": pv.q_happy_pct,
|
||||
"q_safe_pct": pv.q_safe_pct,
|
||||
"q_behaviour_pct": pv.q_behaviour_pct,
|
||||
"q_bullying_pct": pv.q_bullying_pct,
|
||||
"q_communication_pct": pv.q_communication_pct,
|
||||
"q_progress_pct": pv.q_progress_pct,
|
||||
"q_teaching_pct": pv.q_teaching_pct,
|
||||
"q_information_pct": pv.q_information_pct,
|
||||
"q_curriculum_pct": pv.q_curriculum_pct,
|
||||
"q_future_pct": pv.q_future_pct,
|
||||
"q_leadership_pct": pv.q_leadership_pct,
|
||||
"q_wellbeing_pct": pv.q_wellbeing_pct,
|
||||
"q_recommend_pct": pv.q_recommend_pct,
|
||||
"q_sen_pct": pv.q_sen_pct,
|
||||
} if pv else None
|
||||
|
||||
# School Census (latest year)
|
||||
c = safe_query(SchoolCensus, "urn", "year")
|
||||
result["census"] = {
|
||||
"year": c.year,
|
||||
"class_size_avg": c.class_size_avg,
|
||||
"ethnicity_white_pct": c.ethnicity_white_pct,
|
||||
"ethnicity_asian_pct": c.ethnicity_asian_pct,
|
||||
"ethnicity_black_pct": c.ethnicity_black_pct,
|
||||
"ethnicity_mixed_pct": c.ethnicity_mixed_pct,
|
||||
"ethnicity_other_pct": c.ethnicity_other_pct,
|
||||
} if c else None
|
||||
|
||||
# Admissions (latest year)
|
||||
a = safe_query(SchoolAdmissions, "urn", "year")
|
||||
result["admissions"] = {
|
||||
"year": a.year,
|
||||
"published_admission_number": a.published_admission_number,
|
||||
"total_applications": a.total_applications,
|
||||
"first_preference_offers_pct": a.first_preference_offers_pct,
|
||||
"oversubscribed": a.oversubscribed,
|
||||
} if a else None
|
||||
|
||||
# SEN Detail (latest year)
|
||||
s = safe_query(SenDetail, "urn", "year")
|
||||
result["sen_detail"] = {
|
||||
"year": s.year,
|
||||
"primary_need_speech_pct": s.primary_need_speech_pct,
|
||||
"primary_need_autism_pct": s.primary_need_autism_pct,
|
||||
"primary_need_mld_pct": s.primary_need_mld_pct,
|
||||
"primary_need_spld_pct": s.primary_need_spld_pct,
|
||||
"primary_need_semh_pct": s.primary_need_semh_pct,
|
||||
"primary_need_physical_pct": s.primary_need_physical_pct,
|
||||
"primary_need_other_pct": s.primary_need_other_pct,
|
||||
} if s else None
|
||||
|
||||
# Phonics (latest year)
|
||||
ph = safe_query(Phonics, "urn", "year")
|
||||
result["phonics"] = {
|
||||
"year": ph.year,
|
||||
"year1_phonics_pct": ph.year1_phonics_pct,
|
||||
"year2_phonics_pct": ph.year2_phonics_pct,
|
||||
} if ph else None
|
||||
|
||||
# Deprivation
|
||||
d = safe_query(SchoolDeprivation, "urn")
|
||||
result["deprivation"] = {
|
||||
"lsoa_code": d.lsoa_code,
|
||||
"idaci_score": d.idaci_score,
|
||||
"idaci_decile": d.idaci_decile,
|
||||
} if d else None
|
||||
|
||||
# Finance (latest year)
|
||||
f = safe_query(SchoolFinance, "urn", "year")
|
||||
result["finance"] = {
|
||||
"year": f.year,
|
||||
"per_pupil_spend": f.per_pupil_spend,
|
||||
"staff_cost_pct": f.staff_cost_pct,
|
||||
"teacher_cost_pct": f.teacher_cost_pct,
|
||||
"support_staff_cost_pct": f.support_staff_cost_pct,
|
||||
"premises_cost_pct": f.premises_cost_pct,
|
||||
} if f else None
|
||||
|
||||
return result
|
||||
|
||||
@@ -7,7 +7,7 @@ from datetime import datetime
|
||||
|
||||
from sqlalchemy import (
|
||||
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
|
||||
Text, Boolean, DateTime
|
||||
Text, Boolean, DateTime, Date
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from .database import Base
|
||||
@@ -38,7 +38,16 @@ class School(Base):
|
||||
# Geocoding (cached)
|
||||
latitude = Column(Float)
|
||||
longitude = Column(Float)
|
||||
|
||||
|
||||
# GIAS enrichment fields
|
||||
website = Column(String(255))
|
||||
headteacher_name = Column(String(200))
|
||||
capacity = Column(Integer)
|
||||
trust_name = Column(String(255))
|
||||
trust_uid = Column(String(20))
|
||||
gender = Column(String(20)) # Mixed / Girls / Boys
|
||||
nursery_provision = Column(Boolean)
|
||||
|
||||
# Relationships
|
||||
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
|
||||
|
||||
@@ -150,6 +159,169 @@ class SchemaVersion(Base):
|
||||
return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Supplementary data tables (populated by the Kestra data integrator)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class OfstedInspection(Base):
|
||||
"""Latest Ofsted inspection judgement per school."""
|
||||
__tablename__ = "ofsted_inspections"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
inspection_date = Column(Date)
|
||||
publication_date = Column(Date)
|
||||
inspection_type = Column(String(100)) # Section 5 / Section 8 etc.
|
||||
# 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate
|
||||
overall_effectiveness = Column(Integer)
|
||||
quality_of_education = Column(Integer)
|
||||
behaviour_attitudes = Column(Integer)
|
||||
personal_development = Column(Integer)
|
||||
leadership_management = Column(Integer)
|
||||
early_years_provision = Column(Integer) # nullable — not all schools
|
||||
previous_overall = Column(Integer) # for trend display
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OfstedInspection(urn={self.urn}, overall={self.overall_effectiveness})>"
|
||||
|
||||
|
||||
class OfstedParentView(Base):
|
||||
"""Ofsted Parent View survey — latest per school. 14 questions, % saying Yes."""
|
||||
__tablename__ = "ofsted_parent_view"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
survey_date = Column(Date)
|
||||
total_responses = Column(Integer)
|
||||
q_happy_pct = Column(Float) # My child is happy at this school
|
||||
q_safe_pct = Column(Float) # My child feels safe at this school
|
||||
q_bullying_pct = Column(Float) # School deals with bullying well
|
||||
q_communication_pct = Column(Float) # School keeps me informed
|
||||
q_progress_pct = Column(Float) # My child does well / good progress
|
||||
q_teaching_pct = Column(Float) # Teaching is good
|
||||
q_information_pct = Column(Float) # I receive valuable info about progress
|
||||
q_curriculum_pct = Column(Float) # Broad range of subjects taught
|
||||
q_future_pct = Column(Float) # Prepares child well for the future
|
||||
q_leadership_pct = Column(Float) # Led and managed effectively
|
||||
q_wellbeing_pct = Column(Float) # Supports wider personal development
|
||||
q_behaviour_pct = Column(Float) # Pupils are well behaved
|
||||
q_recommend_pct = Column(Float) # I would recommend this school
|
||||
q_sen_pct = Column(Float) # Good information about child's SEN (where applicable)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<OfstedParentView(urn={self.urn}, responses={self.total_responses})>"
|
||||
|
||||
|
||||
class SchoolCensus(Base):
|
||||
"""Annual school census snapshot — class sizes and ethnicity breakdown."""
|
||||
__tablename__ = "school_census"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
year = Column(Integer, primary_key=True)
|
||||
class_size_avg = Column(Float)
|
||||
ethnicity_white_pct = Column(Float)
|
||||
ethnicity_asian_pct = Column(Float)
|
||||
ethnicity_black_pct = Column(Float)
|
||||
ethnicity_mixed_pct = Column(Float)
|
||||
ethnicity_other_pct = Column(Float)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_school_census_urn_year', 'urn', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SchoolCensus(urn={self.urn}, year={self.year})>"
|
||||
|
||||
|
||||
class SchoolAdmissions(Base):
|
||||
"""Annual admissions statistics per school."""
|
||||
__tablename__ = "school_admissions"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
year = Column(Integer, primary_key=True)
|
||||
published_admission_number = Column(Integer) # PAN
|
||||
total_applications = Column(Integer)
|
||||
first_preference_offers_pct = Column(Float) # % receiving 1st choice
|
||||
oversubscribed = Column(Boolean)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_school_admissions_urn_year', 'urn', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SchoolAdmissions(urn={self.urn}, year={self.year})>"
|
||||
|
||||
|
||||
class SenDetail(Base):
|
||||
"""SEN primary need type breakdown — more granular than school_results context fields."""
|
||||
__tablename__ = "sen_detail"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
year = Column(Integer, primary_key=True)
|
||||
primary_need_speech_pct = Column(Float) # SLCN
|
||||
primary_need_autism_pct = Column(Float) # ASD
|
||||
primary_need_mld_pct = Column(Float) # Moderate learning difficulty
|
||||
primary_need_spld_pct = Column(Float) # Specific learning difficulty (dyslexia etc.)
|
||||
primary_need_semh_pct = Column(Float) # Social, emotional, mental health
|
||||
primary_need_physical_pct = Column(Float) # Physical/sensory
|
||||
primary_need_other_pct = Column(Float)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_sen_detail_urn_year', 'urn', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SenDetail(urn={self.urn}, year={self.year})>"
|
||||
|
||||
|
||||
class Phonics(Base):
|
||||
"""Phonics Screening Check pass rates."""
|
||||
__tablename__ = "phonics"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
year = Column(Integer, primary_key=True)
|
||||
year1_phonics_pct = Column(Float) # % reaching expected standard in Year 1
|
||||
year2_phonics_pct = Column(Float) # % reaching standard in Year 2 (re-takers)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_phonics_urn_year', 'urn', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Phonics(urn={self.urn}, year={self.year})>"
|
||||
|
||||
|
||||
class SchoolDeprivation(Base):
|
||||
"""IDACI deprivation index — derived via postcode → LSOA lookup."""
|
||||
__tablename__ = "school_deprivation"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
lsoa_code = Column(String(20))
|
||||
idaci_score = Column(Float) # 0–1, higher = more deprived
|
||||
idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SchoolDeprivation(urn={self.urn}, decile={self.idaci_decile})>"
|
||||
|
||||
|
||||
class SchoolFinance(Base):
|
||||
"""FBIT financial benchmarking data."""
|
||||
__tablename__ = "school_finance"
|
||||
|
||||
urn = Column(Integer, primary_key=True)
|
||||
year = Column(Integer, primary_key=True)
|
||||
per_pupil_spend = Column(Float) # £ total expenditure per pupil
|
||||
staff_cost_pct = Column(Float) # % of budget on all staff
|
||||
teacher_cost_pct = Column(Float) # % on teachers specifically
|
||||
support_staff_cost_pct = Column(Float)
|
||||
premises_cost_pct = Column(Float)
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_school_finance_urn_year', 'urn', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SchoolFinance(urn={self.urn}, year={self.year})>"
|
||||
|
||||
|
||||
# Mapping from CSV columns to model fields
|
||||
SCHOOL_FIELD_MAPPING = {
|
||||
'urn': 'urn',
|
||||
|
||||
@@ -13,10 +13,11 @@ WHEN TO BUMP:
|
||||
"""
|
||||
|
||||
# Current schema version - increment when models change
|
||||
SCHEMA_VERSION = 2
|
||||
SCHEMA_VERSION = 3
|
||||
|
||||
# Changelog for documentation
|
||||
SCHEMA_CHANGELOG = {
|
||||
1: "Initial schema with School and SchoolResult tables",
|
||||
2: "Added pupil absence fields (reading, maths, gps, writing, science)",
|
||||
3: "Added supplementary data tables: ofsted, parent_view, census, admissions, sen_detail, phonics, deprivation, finance; GIAS columns on schools",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user