Files
school_compare/backend/models.py
Tudor dd49ef28b2
Some checks failed
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 47s
Build and Push Docker Images / Trigger Portainer Update (push) Has been cancelled
Build and Push Docker Images / Build Frontend (Next.js) (push) Has been cancelled
feat(data): integrate 9 UK government data sources via Kestra
Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.

Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
  ofsted_parent_view, school_census, admissions, sen_detail, phonics,
  school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date

Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
  sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry

Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
  survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
  Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 11:44:04 +00:00

393 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
SQLAlchemy database models for school data.
Normalized schema with separate tables for schools and yearly results.
"""
from datetime import datetime
from sqlalchemy import (
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
Text, Boolean, DateTime, Date
)
from sqlalchemy.orm import relationship
from .database import Base
class School(Base):
"""
Core school information - relatively static data.
"""
__tablename__ = "schools"
id = Column(Integer, primary_key=True, autoincrement=True)
urn = Column(Integer, unique=True, nullable=False, index=True)
school_name = Column(String(255), nullable=False)
local_authority = Column(String(100))
local_authority_code = Column(Integer)
school_type = Column(String(100))
school_type_code = Column(String(10))
religious_denomination = Column(String(100))
age_range = Column(String(20))
# Address
address1 = Column(String(255))
address2 = Column(String(255))
town = Column(String(100))
postcode = Column(String(20), index=True)
# Geocoding (cached)
latitude = Column(Float)
longitude = Column(Float)
# GIAS enrichment fields
website = Column(String(255))
headteacher_name = Column(String(200))
capacity = Column(Integer)
trust_name = Column(String(255))
trust_uid = Column(String(20))
gender = Column(String(20)) # Mixed / Girls / Boys
nursery_provision = Column(Boolean)
# Relationships
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
def __repr__(self):
return f"<School(urn={self.urn}, name='{self.school_name}')>"
@property
def address(self):
"""Combine address fields into single string."""
parts = [self.address1, self.address2, self.town, self.postcode]
return ", ".join(p for p in parts if p)
class SchoolResult(Base):
"""
Yearly KS2 results for a school.
Each school can have multiple years of results.
"""
__tablename__ = "school_results"
id = Column(Integer, primary_key=True, autoincrement=True)
school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False)
year = Column(Integer, nullable=False, index=True)
# Pupil numbers
total_pupils = Column(Integer)
eligible_pupils = Column(Integer)
# Core KS2 metrics - Expected Standard
rwm_expected_pct = Column(Float)
reading_expected_pct = Column(Float)
writing_expected_pct = Column(Float)
maths_expected_pct = Column(Float)
gps_expected_pct = Column(Float)
science_expected_pct = Column(Float)
# Higher Standard
rwm_high_pct = Column(Float)
reading_high_pct = Column(Float)
writing_high_pct = Column(Float)
maths_high_pct = Column(Float)
gps_high_pct = Column(Float)
# Progress Scores
reading_progress = Column(Float)
writing_progress = Column(Float)
maths_progress = Column(Float)
# Average Scores
reading_avg_score = Column(Float)
maths_avg_score = Column(Float)
gps_avg_score = Column(Float)
# School Context
disadvantaged_pct = Column(Float)
eal_pct = Column(Float)
sen_support_pct = Column(Float)
sen_ehcp_pct = Column(Float)
stability_pct = Column(Float)
# Pupil Absence from Tests
reading_absence_pct = Column(Float)
gps_absence_pct = Column(Float)
maths_absence_pct = Column(Float)
writing_absence_pct = Column(Float)
science_absence_pct = Column(Float)
# Gender Breakdown
rwm_expected_boys_pct = Column(Float)
rwm_expected_girls_pct = Column(Float)
rwm_high_boys_pct = Column(Float)
rwm_high_girls_pct = Column(Float)
# Disadvantaged Performance
rwm_expected_disadvantaged_pct = Column(Float)
rwm_expected_non_disadvantaged_pct = Column(Float)
disadvantaged_gap = Column(Float)
# 3-Year Averages
rwm_expected_3yr_pct = Column(Float)
reading_avg_3yr = Column(Float)
maths_avg_3yr = Column(Float)
# Relationship
school = relationship("School", back_populates="results")
# Constraints
__table_args__ = (
UniqueConstraint('school_id', 'year', name='uq_school_year'),
Index('ix_school_results_school_year', 'school_id', 'year'),
)
def __repr__(self):
return f"<SchoolResult(school_id={self.school_id}, year={self.year})>"
class SchemaVersion(Base):
"""
Tracks database schema version for automatic migrations.
Single-row table that stores the current schema version.
"""
__tablename__ = "schema_version"
id = Column(Integer, primary_key=True, default=1)
version = Column(Integer, nullable=False)
migrated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"
# ---------------------------------------------------------------------------
# Supplementary data tables (populated by the Kestra data integrator)
# ---------------------------------------------------------------------------
class OfstedInspection(Base):
"""Latest Ofsted inspection judgement per school."""
__tablename__ = "ofsted_inspections"
urn = Column(Integer, primary_key=True)
inspection_date = Column(Date)
publication_date = Column(Date)
inspection_type = Column(String(100)) # Section 5 / Section 8 etc.
# 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate
overall_effectiveness = Column(Integer)
quality_of_education = Column(Integer)
behaviour_attitudes = Column(Integer)
personal_development = Column(Integer)
leadership_management = Column(Integer)
early_years_provision = Column(Integer) # nullable — not all schools
previous_overall = Column(Integer) # for trend display
def __repr__(self):
return f"<OfstedInspection(urn={self.urn}, overall={self.overall_effectiveness})>"
class OfstedParentView(Base):
"""Ofsted Parent View survey — latest per school. 14 questions, % saying Yes."""
__tablename__ = "ofsted_parent_view"
urn = Column(Integer, primary_key=True)
survey_date = Column(Date)
total_responses = Column(Integer)
q_happy_pct = Column(Float) # My child is happy at this school
q_safe_pct = Column(Float) # My child feels safe at this school
q_bullying_pct = Column(Float) # School deals with bullying well
q_communication_pct = Column(Float) # School keeps me informed
q_progress_pct = Column(Float) # My child does well / good progress
q_teaching_pct = Column(Float) # Teaching is good
q_information_pct = Column(Float) # I receive valuable info about progress
q_curriculum_pct = Column(Float) # Broad range of subjects taught
q_future_pct = Column(Float) # Prepares child well for the future
q_leadership_pct = Column(Float) # Led and managed effectively
q_wellbeing_pct = Column(Float) # Supports wider personal development
q_behaviour_pct = Column(Float) # Pupils are well behaved
q_recommend_pct = Column(Float) # I would recommend this school
q_sen_pct = Column(Float) # Good information about child's SEN (where applicable)
def __repr__(self):
return f"<OfstedParentView(urn={self.urn}, responses={self.total_responses})>"
class SchoolCensus(Base):
"""Annual school census snapshot — class sizes and ethnicity breakdown."""
__tablename__ = "school_census"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
class_size_avg = Column(Float)
ethnicity_white_pct = Column(Float)
ethnicity_asian_pct = Column(Float)
ethnicity_black_pct = Column(Float)
ethnicity_mixed_pct = Column(Float)
ethnicity_other_pct = Column(Float)
__table_args__ = (
Index('ix_school_census_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolCensus(urn={self.urn}, year={self.year})>"
class SchoolAdmissions(Base):
"""Annual admissions statistics per school."""
__tablename__ = "school_admissions"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
published_admission_number = Column(Integer) # PAN
total_applications = Column(Integer)
first_preference_offers_pct = Column(Float) # % receiving 1st choice
oversubscribed = Column(Boolean)
__table_args__ = (
Index('ix_school_admissions_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolAdmissions(urn={self.urn}, year={self.year})>"
class SenDetail(Base):
"""SEN primary need type breakdown — more granular than school_results context fields."""
__tablename__ = "sen_detail"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
primary_need_speech_pct = Column(Float) # SLCN
primary_need_autism_pct = Column(Float) # ASD
primary_need_mld_pct = Column(Float) # Moderate learning difficulty
primary_need_spld_pct = Column(Float) # Specific learning difficulty (dyslexia etc.)
primary_need_semh_pct = Column(Float) # Social, emotional, mental health
primary_need_physical_pct = Column(Float) # Physical/sensory
primary_need_other_pct = Column(Float)
__table_args__ = (
Index('ix_sen_detail_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SenDetail(urn={self.urn}, year={self.year})>"
class Phonics(Base):
"""Phonics Screening Check pass rates."""
__tablename__ = "phonics"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
year1_phonics_pct = Column(Float) # % reaching expected standard in Year 1
year2_phonics_pct = Column(Float) # % reaching standard in Year 2 (re-takers)
__table_args__ = (
Index('ix_phonics_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<Phonics(urn={self.urn}, year={self.year})>"
class SchoolDeprivation(Base):
"""IDACI deprivation index — derived via postcode → LSOA lookup."""
__tablename__ = "school_deprivation"
urn = Column(Integer, primary_key=True)
lsoa_code = Column(String(20))
idaci_score = Column(Float) # 01, higher = more deprived
idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived
def __repr__(self):
return f"<SchoolDeprivation(urn={self.urn}, decile={self.idaci_decile})>"
class SchoolFinance(Base):
"""FBIT financial benchmarking data."""
__tablename__ = "school_finance"
urn = Column(Integer, primary_key=True)
year = Column(Integer, primary_key=True)
per_pupil_spend = Column(Float) # £ total expenditure per pupil
staff_cost_pct = Column(Float) # % of budget on all staff
teacher_cost_pct = Column(Float) # % on teachers specifically
support_staff_cost_pct = Column(Float)
premises_cost_pct = Column(Float)
__table_args__ = (
Index('ix_school_finance_urn_year', 'urn', 'year'),
)
def __repr__(self):
return f"<SchoolFinance(urn={self.urn}, year={self.year})>"
# Mapping from CSV columns to model fields
SCHOOL_FIELD_MAPPING = {
'urn': 'urn',
'school_name': 'school_name',
'local_authority': 'local_authority',
'local_authority_code': 'local_authority_code',
'school_type': 'school_type',
'school_type_code': 'school_type_code',
'religious_denomination': 'religious_denomination',
'age_range': 'age_range',
'address1': 'address1',
'address2': 'address2',
'town': 'town',
'postcode': 'postcode',
}
RESULT_FIELD_MAPPING = {
'year': 'year',
'total_pupils': 'total_pupils',
'eligible_pupils': 'eligible_pupils',
# Expected Standard
'rwm_expected_pct': 'rwm_expected_pct',
'reading_expected_pct': 'reading_expected_pct',
'writing_expected_pct': 'writing_expected_pct',
'maths_expected_pct': 'maths_expected_pct',
'gps_expected_pct': 'gps_expected_pct',
'science_expected_pct': 'science_expected_pct',
# Higher Standard
'rwm_high_pct': 'rwm_high_pct',
'reading_high_pct': 'reading_high_pct',
'writing_high_pct': 'writing_high_pct',
'maths_high_pct': 'maths_high_pct',
'gps_high_pct': 'gps_high_pct',
# Progress
'reading_progress': 'reading_progress',
'writing_progress': 'writing_progress',
'maths_progress': 'maths_progress',
# Averages
'reading_avg_score': 'reading_avg_score',
'maths_avg_score': 'maths_avg_score',
'gps_avg_score': 'gps_avg_score',
# Context
'disadvantaged_pct': 'disadvantaged_pct',
'eal_pct': 'eal_pct',
'sen_support_pct': 'sen_support_pct',
'sen_ehcp_pct': 'sen_ehcp_pct',
'stability_pct': 'stability_pct',
# Absence
'reading_absence_pct': 'reading_absence_pct',
'gps_absence_pct': 'gps_absence_pct',
'maths_absence_pct': 'maths_absence_pct',
'writing_absence_pct': 'writing_absence_pct',
'science_absence_pct': 'science_absence_pct',
# Gender
'rwm_expected_boys_pct': 'rwm_expected_boys_pct',
'rwm_expected_girls_pct': 'rwm_expected_girls_pct',
'rwm_high_boys_pct': 'rwm_high_boys_pct',
'rwm_high_girls_pct': 'rwm_high_girls_pct',
# Disadvantaged
'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct',
'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct',
'disadvantaged_gap': 'disadvantaged_gap',
# 3-Year
'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct',
'reading_avg_3yr': 'reading_avg_3yr',
'maths_avg_3yr': 'maths_avg_3yr',
}