feat(data): integrate 9 UK government data sources via Kestra
Some checks failed
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Trigger Portainer Update (push) Has been cancelled
Build and Push Docker Images / Build Frontend (Next.js) (push) Has been cancelled

Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.

Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
  ofsted_parent_view, school_census, admissions, sen_detail, phonics,
  school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date

Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
  sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry

Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
  survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
  Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-24 11:44:04 +00:00
parent c49593d4d6
commit dd49ef28b2
36 changed files with 2849 additions and 8 deletions

View File

@@ -7,7 +7,7 @@ from datetime import datetime
from sqlalchemy import (
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
Text, Boolean, DateTime
Text, Boolean, DateTime, Date
)
from sqlalchemy.orm import relationship
from .database import Base
@@ -38,7 +38,16 @@ class School(Base):
# Geocoding (cached)
latitude = Column(Float)
longitude = Column(Float)
# GIAS enrichment fields
website = Column(String(255))
headteacher_name = Column(String(200))
capacity = Column(Integer)
trust_name = Column(String(255))
trust_uid = Column(String(20))
gender = Column(String(20)) # Mixed / Girls / Boys
nursery_provision = Column(Boolean)
# Relationships
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
@@ -150,6 +159,169 @@ class SchemaVersion(Base):
return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"
# ---------------------------------------------------------------------------
# Supplementary data tables (populated by the Kestra data integrator)
# ---------------------------------------------------------------------------
class OfstedInspection(Base):
"""Latest Ofsted inspection judgement per school."""
__tablename__ = "ofsted_inspections"
urn = Column(Integer, primary_key=True)
inspection_date = Column(Date)
publication_date = Column(Date)
inspection_type = Column(String(100)) # Section 5 / Section 8 etc.
# 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate
overall_effectiveness = Column(Integer)
quality_of_education = Column(Integer)
behaviour_attitudes = Column(Integer)
personal_development = Column(Integer)
leadership_management = Column(Integer)
early_years_provision = Column(Integer) # nullable — not all schools
previous_overall = Column(Integer) # for trend display
def __repr__(self):
return f"<OfstedInspection(urn={self.urn}, overall={self.overall_effectiveness})>"
class OfstedParentView(Base):
"""Ofsted Parent View survey — latest per school. 14 questions, % saying Yes."""
__tablename__ = "ofsted_parent_view"
urn = Column(Integer, primary_key=True)
survey_date = Column(Date)
total_responses = Column(Integer)
q_happy_pct = Column(Float) # My child is happy at this school
q_safe_pct = Column(Float) # My child feels safe at this school
q_bullying_pct = Column(Float) # School deals with bullying well
q_communication_pct = Column(Float) # School keeps me informed
q_progress_pct = Column(Float) # My child does well / good progress
q_teaching_pct = Column(Float) # Teaching is good
q_information_pct = Column(Float) # I receive valuable info about progress
q_curriculum_pct = Column(Float) # Broad range of subjects taught
q_future_pct = Column(Float) # Prepares child well for the future
q_leadership_pct = Column(Float) # Led and managed effectively
q_wellbeing_pct = Column(Float) # Supports wider personal development
q_behaviour_pct = Column(Float) # Pupils are well behaved
q_recommend_pct = Column(Float) # I would recommend this school
q_sen_pct = Column(Float) # Good information about child's SEN (where applicable)
def __repr__(self):
return f"<OfstedParentView(urn={self.urn}, responses={self.total_responses})>"
class SchoolCensus(Base):
"""Annual school census snapshot — class sizes and ethnicity breakdown."""
__tablename__ = "school_census"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
class_size_avg = Column(Float)
ethnicity_white_pct = Column(Float)
ethnicity_asian_pct = Column(Float)
ethnicity_black_pct = Column(Float)
ethnicity_mixed_pct = Column(Float)
ethnicity_other_pct = Column(Float)
__table_args__ = (
Index('ix_school_census_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolCensus(urn={self.urn}, year={self.year})>"
class SchoolAdmissions(Base):
"""Annual admissions statistics per school."""
__tablename__ = "school_admissions"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
published_admission_number = Column(Integer) # PAN
total_applications = Column(Integer)
first_preference_offers_pct = Column(Float) # % receiving 1st choice
oversubscribed = Column(Boolean)
__table_args__ = (
Index('ix_school_admissions_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolAdmissions(urn={self.urn}, year={self.year})>"
class SenDetail(Base):
"""SEN primary need type breakdown — more granular than school_results context fields."""
__tablename__ = "sen_detail"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
primary_need_speech_pct = Column(Float) # SLCN
primary_need_autism_pct = Column(Float) # ASD
primary_need_mld_pct = Column(Float) # Moderate learning difficulty
primary_need_spld_pct = Column(Float) # Specific learning difficulty (dyslexia etc.)
primary_need_semh_pct = Column(Float) # Social, emotional, mental health
primary_need_physical_pct = Column(Float) # Physical/sensory
primary_need_other_pct = Column(Float)
__table_args__ = (
Index('ix_sen_detail_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SenDetail(urn={self.urn}, year={self.year})>"
class Phonics(Base):
"""Phonics Screening Check pass rates."""
__tablename__ = "phonics"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
year1_phonics_pct = Column(Float) # % reaching expected standard in Year 1
year2_phonics_pct = Column(Float) # % reaching standard in Year 2 (re-takers)
__table_args__ = (
Index('ix_phonics_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<Phonics(urn={self.urn}, year={self.year})>"
class SchoolDeprivation(Base):
"""IDACI deprivation index — derived via postcode → LSOA lookup."""
__tablename__ = "school_deprivation"
urn = Column(Integer, primary_key=True)
lsoa_code = Column(String(20))
idaci_score = Column(Float) # 01, higher = more deprived
idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived
def __repr__(self):
return f"<SchoolDeprivation(urn={self.urn}, decile={self.idaci_decile})>"
class SchoolFinance(Base):
"""FBIT financial benchmarking data."""
__tablename__ = "school_finance"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
per_pupil_spend = Column(Float) # £ total expenditure per pupil
staff_cost_pct = Column(Float) # % of budget on all staff
teacher_cost_pct = Column(Float) # % on teachers specifically
support_staff_cost_pct = Column(Float)
premises_cost_pct = Column(Float)
__table_args__ = (
Index('ix_school_finance_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolFinance(urn={self.urn}, year={self.year})>"
# Mapping from CSV columns to model fields
SCHOOL_FIELD_MAPPING = {
'urn': 'urn',