""" SQLAlchemy database models for school data. Normalized schema with separate tables for schools and yearly results. """ from sqlalchemy import ( Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint, Text, Boolean ) from sqlalchemy.orm import relationship from .database import Base class School(Base): """ Core school information - relatively static data. """ __tablename__ = "schools" id = Column(Integer, primary_key=True, autoincrement=True) urn = Column(Integer, unique=True, nullable=False, index=True) school_name = Column(String(255), nullable=False) local_authority = Column(String(100)) local_authority_code = Column(Integer) school_type = Column(String(100)) school_type_code = Column(String(10)) religious_denomination = Column(String(100)) age_range = Column(String(20)) # Address address1 = Column(String(255)) address2 = Column(String(255)) town = Column(String(100)) postcode = Column(String(20), index=True) # Geocoding (cached) latitude = Column(Float) longitude = Column(Float) # Relationships results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan") def __repr__(self): return f"" @property def address(self): """Combine address fields into single string.""" parts = [self.address1, self.address2, self.town, self.postcode] return ", ".join(p for p in parts if p) class SchoolResult(Base): """ Yearly KS2 results for a school. Each school can have multiple years of results. """ __tablename__ = "school_results" id = Column(Integer, primary_key=True, autoincrement=True) school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False) year = Column(Integer, nullable=False, index=True) # Pupil numbers total_pupils = Column(Integer) eligible_pupils = Column(Integer) # Core KS2 metrics - Expected Standard rwm_expected_pct = Column(Float) reading_expected_pct = Column(Float) writing_expected_pct = Column(Float) maths_expected_pct = Column(Float) gps_expected_pct = Column(Float) science_expected_pct = Column(Float) # Higher Standard rwm_high_pct = Column(Float) reading_high_pct = Column(Float) writing_high_pct = Column(Float) maths_high_pct = Column(Float) gps_high_pct = Column(Float) # Progress Scores reading_progress = Column(Float) writing_progress = Column(Float) maths_progress = Column(Float) # Average Scores reading_avg_score = Column(Float) maths_avg_score = Column(Float) gps_avg_score = Column(Float) # School Context disadvantaged_pct = Column(Float) eal_pct = Column(Float) sen_support_pct = Column(Float) sen_ehcp_pct = Column(Float) stability_pct = Column(Float) # Pupil Absence from Tests reading_absence_pct = Column(Float) gps_absence_pct = Column(Float) maths_absence_pct = Column(Float) writing_absence_pct = Column(Float) science_absence_pct = Column(Float) # Gender Breakdown rwm_expected_boys_pct = Column(Float) rwm_expected_girls_pct = Column(Float) rwm_high_boys_pct = Column(Float) rwm_high_girls_pct = Column(Float) # Disadvantaged Performance rwm_expected_disadvantaged_pct = Column(Float) rwm_expected_non_disadvantaged_pct = Column(Float) disadvantaged_gap = Column(Float) # 3-Year Averages rwm_expected_3yr_pct = Column(Float) reading_avg_3yr = Column(Float) maths_avg_3yr = Column(Float) # Relationship school = relationship("School", back_populates="results") # Constraints __table_args__ = ( UniqueConstraint('school_id', 'year', name='uq_school_year'), Index('ix_school_results_school_year', 'school_id', 'year'), ) def __repr__(self): return f"" # Mapping from CSV columns to model fields SCHOOL_FIELD_MAPPING = { 'urn': 'urn', 'school_name': 'school_name', 'local_authority': 'local_authority', 'local_authority_code': 'local_authority_code', 'school_type': 'school_type', 'school_type_code': 'school_type_code', 'religious_denomination': 'religious_denomination', 'age_range': 'age_range', 'address1': 'address1', 'address2': 'address2', 'town': 'town', 'postcode': 'postcode', } RESULT_FIELD_MAPPING = { 'year': 'year', 'total_pupils': 'total_pupils', 'eligible_pupils': 'eligible_pupils', # Expected Standard 'rwm_expected_pct': 'rwm_expected_pct', 'reading_expected_pct': 'reading_expected_pct', 'writing_expected_pct': 'writing_expected_pct', 'maths_expected_pct': 'maths_expected_pct', 'gps_expected_pct': 'gps_expected_pct', 'science_expected_pct': 'science_expected_pct', # Higher Standard 'rwm_high_pct': 'rwm_high_pct', 'reading_high_pct': 'reading_high_pct', 'writing_high_pct': 'writing_high_pct', 'maths_high_pct': 'maths_high_pct', 'gps_high_pct': 'gps_high_pct', # Progress 'reading_progress': 'reading_progress', 'writing_progress': 'writing_progress', 'maths_progress': 'maths_progress', # Averages 'reading_avg_score': 'reading_avg_score', 'maths_avg_score': 'maths_avg_score', 'gps_avg_score': 'gps_avg_score', # Context 'disadvantaged_pct': 'disadvantaged_pct', 'eal_pct': 'eal_pct', 'sen_support_pct': 'sen_support_pct', 'sen_ehcp_pct': 'sen_ehcp_pct', 'stability_pct': 'stability_pct', # Absence 'reading_absence_pct': 'reading_absence_pct', 'gps_absence_pct': 'gps_absence_pct', 'maths_absence_pct': 'maths_absence_pct', 'writing_absence_pct': 'writing_absence_pct', 'science_absence_pct': 'science_absence_pct', # Gender 'rwm_expected_boys_pct': 'rwm_expected_boys_pct', 'rwm_expected_girls_pct': 'rwm_expected_girls_pct', 'rwm_high_boys_pct': 'rwm_high_boys_pct', 'rwm_high_girls_pct': 'rwm_high_girls_pct', # Disadvantaged 'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap': 'disadvantaged_gap', # 3-Year 'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct', 'reading_avg_3yr': 'reading_avg_3yr', 'maths_avg_3yr': 'maths_avg_3yr', }