diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..40d3e7e --- /dev/null +++ b/backend/database.py @@ -0,0 +1,67 @@ +""" +Database connection setup using SQLAlchemy. +""" + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, declarative_base +from contextlib import contextmanager + +from .config import settings + +# Create engine +engine = create_engine( + settings.database_url, + pool_size=10, + max_overflow=20, + pool_pre_ping=True, # Verify connections before use + echo=False, # Set to True for SQL debugging +) + +# Session factory +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# Base class for models +Base = declarative_base() + + +def get_db(): + """ + Dependency for FastAPI routes to get a database session. + """ + db = SessionLocal() + try: + yield db + finally: + db.close() + + +@contextmanager +def get_db_session(): + """ + Context manager for database sessions. + Use in non-FastAPI contexts (scripts, etc). + """ + db = SessionLocal() + try: + yield db + db.commit() + except Exception: + db.rollback() + raise + finally: + db.close() + + +def init_db(): + """ + Initialize database - create all tables. + """ + Base.metadata.create_all(bind=engine) + + +def drop_db(): + """ + Drop all tables - use with caution! + """ + Base.metadata.drop_all(bind=engine) + diff --git a/backend/models.py b/backend/models.py new file mode 100644 index 0000000..c57f8a0 --- /dev/null +++ b/backend/models.py @@ -0,0 +1,190 @@ +""" +SQLAlchemy database models for school data. +Normalized schema with separate tables for schools and yearly results. +""" + +from sqlalchemy import ( + Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint, + Text, Boolean +) +from sqlalchemy.orm import relationship +from .database import Base + + +class School(Base): + """ + Core school information - relatively static data. + """ + __tablename__ = "schools" + + id = Column(Integer, primary_key=True, autoincrement=True) + urn = Column(Integer, unique=True, nullable=False, index=True) + school_name = Column(String(255), nullable=False) + local_authority = Column(String(100)) + local_authority_code = Column(Integer) + school_type = Column(String(100)) + school_type_code = Column(String(10)) + religious_denomination = Column(String(100)) + age_range = Column(String(20)) + + # Address + address1 = Column(String(255)) + address2 = Column(String(255)) + town = Column(String(100)) + postcode = Column(String(20), index=True) + + # Geocoding (cached) + latitude = Column(Float) + longitude = Column(Float) + + # Relationships + results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan") + + def __repr__(self): + return f"" + + @property + def address(self): + """Combine address fields into single string.""" + parts = [self.address1, self.address2, self.town, self.postcode] + return ", ".join(p for p in parts if p) + + +class SchoolResult(Base): + """ + Yearly KS2 results for a school. + Each school can have multiple years of results. + """ + __tablename__ = "school_results" + + id = Column(Integer, primary_key=True, autoincrement=True) + school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False) + year = Column(Integer, nullable=False, index=True) + + # Pupil numbers + total_pupils = Column(Integer) + eligible_pupils = Column(Integer) + + # Core KS2 metrics - Expected Standard + rwm_expected_pct = Column(Float) + reading_expected_pct = Column(Float) + writing_expected_pct = Column(Float) + maths_expected_pct = Column(Float) + gps_expected_pct = Column(Float) + science_expected_pct = Column(Float) + + # Higher Standard + rwm_high_pct = Column(Float) + reading_high_pct = Column(Float) + writing_high_pct = Column(Float) + maths_high_pct = Column(Float) + gps_high_pct = Column(Float) + + # Progress Scores + reading_progress = Column(Float) + writing_progress = Column(Float) + maths_progress = Column(Float) + + # Average Scores + reading_avg_score = Column(Float) + maths_avg_score = Column(Float) + gps_avg_score = Column(Float) + + # School Context + disadvantaged_pct = Column(Float) + eal_pct = Column(Float) + sen_support_pct = Column(Float) + sen_ehcp_pct = Column(Float) + stability_pct = Column(Float) + + # Gender Breakdown + rwm_expected_boys_pct = Column(Float) + rwm_expected_girls_pct = Column(Float) + rwm_high_boys_pct = Column(Float) + rwm_high_girls_pct = Column(Float) + + # Disadvantaged Performance + rwm_expected_disadvantaged_pct = Column(Float) + rwm_expected_non_disadvantaged_pct = Column(Float) + disadvantaged_gap = Column(Float) + + # 3-Year Averages + rwm_expected_3yr_pct = Column(Float) + reading_avg_3yr = Column(Float) + maths_avg_3yr = Column(Float) + + # Relationship + school = relationship("School", back_populates="results") + + # Constraints + __table_args__ = ( + UniqueConstraint('school_id', 'year', name='uq_school_year'), + Index('ix_school_results_school_year', 'school_id', 'year'), + ) + + def __repr__(self): + return f"" + + +# Mapping from CSV columns to model fields +SCHOOL_FIELD_MAPPING = { + 'urn': 'urn', + 'school_name': 'school_name', + 'local_authority': 'local_authority', + 'local_authority_code': 'local_authority_code', + 'school_type': 'school_type', + 'school_type_code': 'school_type_code', + 'religious_denomination': 'religious_denomination', + 'age_range': 'age_range', + 'address1': 'address1', + 'address2': 'address2', + 'town': 'town', + 'postcode': 'postcode', +} + +RESULT_FIELD_MAPPING = { + 'year': 'year', + 'total_pupils': 'total_pupils', + 'eligible_pupils': 'eligible_pupils', + # Expected Standard + 'rwm_expected_pct': 'rwm_expected_pct', + 'reading_expected_pct': 'reading_expected_pct', + 'writing_expected_pct': 'writing_expected_pct', + 'maths_expected_pct': 'maths_expected_pct', + 'gps_expected_pct': 'gps_expected_pct', + 'science_expected_pct': 'science_expected_pct', + # Higher Standard + 'rwm_high_pct': 'rwm_high_pct', + 'reading_high_pct': 'reading_high_pct', + 'writing_high_pct': 'writing_high_pct', + 'maths_high_pct': 'maths_high_pct', + 'gps_high_pct': 'gps_high_pct', + # Progress + 'reading_progress': 'reading_progress', + 'writing_progress': 'writing_progress', + 'maths_progress': 'maths_progress', + # Averages + 'reading_avg_score': 'reading_avg_score', + 'maths_avg_score': 'maths_avg_score', + 'gps_avg_score': 'gps_avg_score', + # Context + 'disadvantaged_pct': 'disadvantaged_pct', + 'eal_pct': 'eal_pct', + 'sen_support_pct': 'sen_support_pct', + 'sen_ehcp_pct': 'sen_ehcp_pct', + 'stability_pct': 'stability_pct', + # Gender + 'rwm_expected_boys_pct': 'rwm_expected_boys_pct', + 'rwm_expected_girls_pct': 'rwm_expected_girls_pct', + 'rwm_high_boys_pct': 'rwm_high_boys_pct', + 'rwm_high_girls_pct': 'rwm_high_girls_pct', + # Disadvantaged + 'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct', + 'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct', + 'disadvantaged_gap': 'disadvantaged_gap', + # 3-Year + 'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct', + 'reading_avg_3yr': 'reading_avg_3yr', + 'maths_avg_3yr': 'maths_avg_3yr', +} +