""" SQLAlchemy database models for school data. Normalized schema with separate tables for schools and yearly results. """ from datetime import datetime from sqlalchemy import ( Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint, Text, Boolean, DateTime, Date ) from sqlalchemy.orm import relationship from .database import Base class School(Base): """ Core school information - relatively static data. """ __tablename__ = "schools" id = Column(Integer, primary_key=True, autoincrement=True) urn = Column(Integer, unique=True, nullable=False, index=True) school_name = Column(String(255), nullable=False) local_authority = Column(String(100)) local_authority_code = Column(Integer) school_type = Column(String(100)) school_type_code = Column(String(10)) religious_denomination = Column(String(100)) age_range = Column(String(20)) # Address address1 = Column(String(255)) address2 = Column(String(255)) town = Column(String(100)) postcode = Column(String(20), index=True) # Geocoding (cached) latitude = Column(Float) longitude = Column(Float) # GIAS enrichment fields website = Column(String(255)) headteacher_name = Column(String(200)) capacity = Column(Integer) trust_name = Column(String(255)) trust_uid = Column(String(20)) gender = Column(String(20)) # Mixed / Girls / Boys nursery_provision = Column(Boolean) # Relationships results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan") def __repr__(self): return f"" @property def address(self): """Combine address fields into single string.""" parts = [self.address1, self.address2, self.town, self.postcode] return ", ".join(p for p in parts if p) class SchoolResult(Base): """ Yearly KS2 results for a school. Each school can have multiple years of results. """ __tablename__ = "school_results" id = Column(Integer, primary_key=True, autoincrement=True) school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False) year = Column(Integer, nullable=False, index=True) # Pupil numbers total_pupils = Column(Integer) eligible_pupils = Column(Integer) # Core KS2 metrics - Expected Standard rwm_expected_pct = Column(Float) reading_expected_pct = Column(Float) writing_expected_pct = Column(Float) maths_expected_pct = Column(Float) gps_expected_pct = Column(Float) science_expected_pct = Column(Float) # Higher Standard rwm_high_pct = Column(Float) reading_high_pct = Column(Float) writing_high_pct = Column(Float) maths_high_pct = Column(Float) gps_high_pct = Column(Float) # Progress Scores reading_progress = Column(Float) writing_progress = Column(Float) maths_progress = Column(Float) # Average Scores reading_avg_score = Column(Float) maths_avg_score = Column(Float) gps_avg_score = Column(Float) # School Context disadvantaged_pct = Column(Float) eal_pct = Column(Float) sen_support_pct = Column(Float) sen_ehcp_pct = Column(Float) stability_pct = Column(Float) # Pupil Absence from Tests reading_absence_pct = Column(Float) gps_absence_pct = Column(Float) maths_absence_pct = Column(Float) writing_absence_pct = Column(Float) science_absence_pct = Column(Float) # Gender Breakdown rwm_expected_boys_pct = Column(Float) rwm_expected_girls_pct = Column(Float) rwm_high_boys_pct = Column(Float) rwm_high_girls_pct = Column(Float) # Disadvantaged Performance rwm_expected_disadvantaged_pct = Column(Float) rwm_expected_non_disadvantaged_pct = Column(Float) disadvantaged_gap = Column(Float) # 3-Year Averages rwm_expected_3yr_pct = Column(Float) reading_avg_3yr = Column(Float) maths_avg_3yr = Column(Float) # Relationship school = relationship("School", back_populates="results") # Constraints __table_args__ = ( UniqueConstraint('school_id', 'year', name='uq_school_year'), Index('ix_school_results_school_year', 'school_id', 'year'), ) def __repr__(self): return f"" class SchemaVersion(Base): """ Tracks database schema version for automatic migrations. Single-row table that stores the current schema version. """ __tablename__ = "schema_version" id = Column(Integer, primary_key=True, default=1) version = Column(Integer, nullable=False) migrated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) def __repr__(self): return f"" # --------------------------------------------------------------------------- # Supplementary data tables (populated by the Kestra data integrator) # --------------------------------------------------------------------------- class OfstedInspection(Base): """Latest Ofsted inspection judgement per school.""" __tablename__ = "ofsted_inspections" urn = Column(Integer, primary_key=True) inspection_date = Column(Date) publication_date = Column(Date) inspection_type = Column(String(100)) # Section 5 / Section 8 etc. # 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate overall_effectiveness = Column(Integer) quality_of_education = Column(Integer) behaviour_attitudes = Column(Integer) personal_development = Column(Integer) leadership_management = Column(Integer) early_years_provision = Column(Integer) # nullable — not all schools previous_overall = Column(Integer) # for trend display def __repr__(self): return f"" class OfstedParentView(Base): """Ofsted Parent View survey — latest per school. 14 questions, % saying Yes.""" __tablename__ = "ofsted_parent_view" urn = Column(Integer, primary_key=True) survey_date = Column(Date) total_responses = Column(Integer) q_happy_pct = Column(Float) # My child is happy at this school q_safe_pct = Column(Float) # My child feels safe at this school q_bullying_pct = Column(Float) # School deals with bullying well q_communication_pct = Column(Float) # School keeps me informed q_progress_pct = Column(Float) # My child does well / good progress q_teaching_pct = Column(Float) # Teaching is good q_information_pct = Column(Float) # I receive valuable info about progress q_curriculum_pct = Column(Float) # Broad range of subjects taught q_future_pct = Column(Float) # Prepares child well for the future q_leadership_pct = Column(Float) # Led and managed effectively q_wellbeing_pct = Column(Float) # Supports wider personal development q_behaviour_pct = Column(Float) # Pupils are well behaved q_recommend_pct = Column(Float) # I would recommend this school q_sen_pct = Column(Float) # Good information about child's SEN (where applicable) def __repr__(self): return f"" class SchoolCensus(Base): """Annual school census snapshot — class sizes and ethnicity breakdown.""" __tablename__ = "school_census" urn = Column(Integer, primary_key=True) year = Column(Integer, primary_key=True) class_size_avg = Column(Float) ethnicity_white_pct = Column(Float) ethnicity_asian_pct = Column(Float) ethnicity_black_pct = Column(Float) ethnicity_mixed_pct = Column(Float) ethnicity_other_pct = Column(Float) __table_args__ = ( Index('ix_school_census_urn_year', 'urn', 'year'), ) def __repr__(self): return f"" class SchoolAdmissions(Base): """Annual admissions statistics per school.""" __tablename__ = "school_admissions" urn = Column(Integer, primary_key=True) year = Column(Integer, primary_key=True) published_admission_number = Column(Integer) # PAN total_applications = Column(Integer) first_preference_offers_pct = Column(Float) # % receiving 1st choice oversubscribed = Column(Boolean) __table_args__ = ( Index('ix_school_admissions_urn_year', 'urn', 'year'), ) def __repr__(self): return f"" class SenDetail(Base): """SEN primary need type breakdown — more granular than school_results context fields.""" __tablename__ = "sen_detail" urn = Column(Integer, primary_key=True) year = Column(Integer, primary_key=True) primary_need_speech_pct = Column(Float) # SLCN primary_need_autism_pct = Column(Float) # ASD primary_need_mld_pct = Column(Float) # Moderate learning difficulty primary_need_spld_pct = Column(Float) # Specific learning difficulty (dyslexia etc.) primary_need_semh_pct = Column(Float) # Social, emotional, mental health primary_need_physical_pct = Column(Float) # Physical/sensory primary_need_other_pct = Column(Float) __table_args__ = ( Index('ix_sen_detail_urn_year', 'urn', 'year'), ) def __repr__(self): return f"" class Phonics(Base): """Phonics Screening Check pass rates.""" __tablename__ = "phonics" urn = Column(Integer, primary_key=True) year = Column(Integer, primary_key=True) year1_phonics_pct = Column(Float) # % reaching expected standard in Year 1 year2_phonics_pct = Column(Float) # % reaching standard in Year 2 (re-takers) __table_args__ = ( Index('ix_phonics_urn_year', 'urn', 'year'), ) def __repr__(self): return f"" class SchoolDeprivation(Base): """IDACI deprivation index — derived via postcode → LSOA lookup.""" __tablename__ = "school_deprivation" urn = Column(Integer, primary_key=True) lsoa_code = Column(String(20)) idaci_score = Column(Float) # 0–1, higher = more deprived idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived def __repr__(self): return f"" class SchoolFinance(Base): """FBIT financial benchmarking data.""" __tablename__ = "school_finance" urn = Column(Integer, primary_key=True) year = Column(Integer, primary_key=True) per_pupil_spend = Column(Float) # £ total expenditure per pupil staff_cost_pct = Column(Float) # % of budget on all staff teacher_cost_pct = Column(Float) # % on teachers specifically support_staff_cost_pct = Column(Float) premises_cost_pct = Column(Float) __table_args__ = ( Index('ix_school_finance_urn_year', 'urn', 'year'), ) def __repr__(self): return f"" # Mapping from CSV columns to model fields SCHOOL_FIELD_MAPPING = { 'urn': 'urn', 'school_name': 'school_name', 'local_authority': 'local_authority', 'local_authority_code': 'local_authority_code', 'school_type': 'school_type', 'school_type_code': 'school_type_code', 'religious_denomination': 'religious_denomination', 'age_range': 'age_range', 'address1': 'address1', 'address2': 'address2', 'town': 'town', 'postcode': 'postcode', } RESULT_FIELD_MAPPING = { 'year': 'year', 'total_pupils': 'total_pupils', 'eligible_pupils': 'eligible_pupils', # Expected Standard 'rwm_expected_pct': 'rwm_expected_pct', 'reading_expected_pct': 'reading_expected_pct', 'writing_expected_pct': 'writing_expected_pct', 'maths_expected_pct': 'maths_expected_pct', 'gps_expected_pct': 'gps_expected_pct', 'science_expected_pct': 'science_expected_pct', # Higher Standard 'rwm_high_pct': 'rwm_high_pct', 'reading_high_pct': 'reading_high_pct', 'writing_high_pct': 'writing_high_pct', 'maths_high_pct': 'maths_high_pct', 'gps_high_pct': 'gps_high_pct', # Progress 'reading_progress': 'reading_progress', 'writing_progress': 'writing_progress', 'maths_progress': 'maths_progress', # Averages 'reading_avg_score': 'reading_avg_score', 'maths_avg_score': 'maths_avg_score', 'gps_avg_score': 'gps_avg_score', # Context 'disadvantaged_pct': 'disadvantaged_pct', 'eal_pct': 'eal_pct', 'sen_support_pct': 'sen_support_pct', 'sen_ehcp_pct': 'sen_ehcp_pct', 'stability_pct': 'stability_pct', # Absence 'reading_absence_pct': 'reading_absence_pct', 'gps_absence_pct': 'gps_absence_pct', 'maths_absence_pct': 'maths_absence_pct', 'writing_absence_pct': 'writing_absence_pct', 'science_absence_pct': 'science_absence_pct', # Gender 'rwm_expected_boys_pct': 'rwm_expected_boys_pct', 'rwm_expected_girls_pct': 'rwm_expected_girls_pct', 'rwm_high_boys_pct': 'rwm_high_boys_pct', 'rwm_high_girls_pct': 'rwm_high_girls_pct', # Disadvantaged 'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap': 'disadvantaged_gap', # 3-Year 'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct', 'reading_avg_3yr': 'reading_avg_3yr', 'maths_avg_3yr': 'maths_avg_3yr', }