Pipeline: - EES tap: split KS4 into performance + info streams, fix admissions filename (SchoolLevel keyword match), fix census filename (yearly suffix), remove phonics (no school-level data on EES), change endswith → in for matching - stg_ees_ks4: rewrite to filter long-format data and extract Attainment 8, Progress 8, EBacc, English/Maths metrics; join KS4 info for context - stg_ees_admissions: map real CSV columns (total_number_places_offered, etc.) - stg_ees_census: update source reference, stub with TODO for data columns - Remove stg_ees_phonics, fact_phonics (no school-level EES data) - Add ees_ks4_performance + ees_ks4_info sources, remove ees_ks4 + ees_phonics - Update int_ks4_with_lineage + fact_ks4_performance with new KS4 columns - Annual EES DAG: remove stg_ees_phonics+ from selector Backend: - models.py: replace all models to point at marts.* tables with schema='marts' (DimSchool, DimLocation, KS2Performance, FactOfstedInspection, etc.) - data_loader.py: rewrite load_school_data_as_dataframe() using raw SQL joining dim_school + dim_location + fact_ks2_performance; update get_supplementary_data() - database.py: remove migration machinery, keep only connection setup - app.py: remove check_and_migrate_if_needed, remove /api/admin/reimport-ks2 endpoints (pipeline handles all imports) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
217 lines
7.1 KiB
Python
217 lines
7.1 KiB
Python
"""
|
|
SQLAlchemy models — all tables live in the marts schema, built by dbt.
|
|
Read-only: the pipeline writes to these tables; the backend only reads.
|
|
"""
|
|
|
|
from sqlalchemy import Column, Integer, String, Float, Boolean, Date, Text, Index
|
|
|
|
from .database import Base
|
|
|
|
MARTS = {"schema": "marts"}
|
|
|
|
|
|
class DimSchool(Base):
|
|
"""Canonical school dimension — one row per active URN."""
|
|
__tablename__ = "dim_school"
|
|
__table_args__ = MARTS
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
school_name = Column(String(255), nullable=False)
|
|
phase = Column(String(100))
|
|
school_type = Column(String(100))
|
|
academy_trust_name = Column(String(255))
|
|
academy_trust_uid = Column(String(20))
|
|
religious_character = Column(String(100))
|
|
gender = Column(String(20))
|
|
age_range = Column(String(20))
|
|
capacity = Column(Integer)
|
|
total_pupils = Column(Integer)
|
|
headteacher_name = Column(String(200))
|
|
website = Column(String(255))
|
|
telephone = Column(String(30))
|
|
status = Column(String(50))
|
|
nursery_provision = Column(Boolean)
|
|
admissions_policy = Column(String(50))
|
|
# Denormalised Ofsted summary (updated by monthly pipeline)
|
|
ofsted_grade = Column(Integer)
|
|
ofsted_date = Column(Date)
|
|
ofsted_framework = Column(String(20))
|
|
|
|
|
|
class DimLocation(Base):
|
|
"""School location — address, lat/lng from easting/northing (BNG→WGS84)."""
|
|
__tablename__ = "dim_location"
|
|
__table_args__ = MARTS
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
address_line1 = Column(String(255))
|
|
address_line2 = Column(String(255))
|
|
town = Column(String(100))
|
|
county = Column(String(100))
|
|
postcode = Column(String(20))
|
|
local_authority_code = Column(Integer)
|
|
local_authority_name = Column(String(100))
|
|
parliamentary_constituency = Column(String(100))
|
|
urban_rural = Column(String(50))
|
|
easting = Column(Integer)
|
|
northing = Column(Integer)
|
|
latitude = Column(Float)
|
|
longitude = Column(Float)
|
|
# geom is a PostGIS geometry — not mapped to SQLAlchemy (accessed via raw SQL)
|
|
|
|
|
|
class KS2Performance(Base):
|
|
"""KS2 attainment — one row per URN per year (includes predecessor data)."""
|
|
__tablename__ = "fact_ks2_performance"
|
|
__table_args__ = (
|
|
Index("ix_ks2_urn_year", "urn", "year"),
|
|
MARTS,
|
|
)
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
year = Column(Integer, primary_key=True)
|
|
source_urn = Column(Integer)
|
|
total_pupils = Column(Integer)
|
|
eligible_pupils = Column(Integer)
|
|
# Core attainment
|
|
rwm_expected_pct = Column(Float)
|
|
rwm_high_pct = Column(Float)
|
|
reading_expected_pct = Column(Float)
|
|
reading_high_pct = Column(Float)
|
|
reading_avg_score = Column(Float)
|
|
reading_progress = Column(Float)
|
|
writing_expected_pct = Column(Float)
|
|
writing_high_pct = Column(Float)
|
|
writing_progress = Column(Float)
|
|
maths_expected_pct = Column(Float)
|
|
maths_high_pct = Column(Float)
|
|
maths_avg_score = Column(Float)
|
|
maths_progress = Column(Float)
|
|
gps_expected_pct = Column(Float)
|
|
gps_high_pct = Column(Float)
|
|
gps_avg_score = Column(Float)
|
|
science_expected_pct = Column(Float)
|
|
# Absence
|
|
reading_absence_pct = Column(Float)
|
|
writing_absence_pct = Column(Float)
|
|
maths_absence_pct = Column(Float)
|
|
gps_absence_pct = Column(Float)
|
|
science_absence_pct = Column(Float)
|
|
# Gender
|
|
rwm_expected_boys_pct = Column(Float)
|
|
rwm_high_boys_pct = Column(Float)
|
|
rwm_expected_girls_pct = Column(Float)
|
|
rwm_high_girls_pct = Column(Float)
|
|
# Disadvantaged
|
|
rwm_expected_disadvantaged_pct = Column(Float)
|
|
rwm_expected_non_disadvantaged_pct = Column(Float)
|
|
disadvantaged_gap = Column(Float)
|
|
# Context
|
|
disadvantaged_pct = Column(Float)
|
|
eal_pct = Column(Float)
|
|
sen_support_pct = Column(Float)
|
|
sen_ehcp_pct = Column(Float)
|
|
stability_pct = Column(Float)
|
|
|
|
|
|
class FactOfstedInspection(Base):
|
|
"""Full Ofsted inspection history — one row per inspection."""
|
|
__tablename__ = "fact_ofsted_inspection"
|
|
__table_args__ = (
|
|
Index("ix_ofsted_urn_date", "urn", "inspection_date"),
|
|
MARTS,
|
|
)
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
inspection_date = Column(Date, primary_key=True)
|
|
inspection_type = Column(String(100))
|
|
framework = Column(String(20))
|
|
overall_effectiveness = Column(Integer)
|
|
quality_of_education = Column(Integer)
|
|
behaviour_attitudes = Column(Integer)
|
|
personal_development = Column(Integer)
|
|
leadership_management = Column(Integer)
|
|
early_years_provision = Column(Integer)
|
|
sixth_form_provision = Column(Integer)
|
|
rc_safeguarding_met = Column(Boolean)
|
|
rc_inclusion = Column(Integer)
|
|
rc_curriculum_teaching = Column(Integer)
|
|
rc_achievement = Column(Integer)
|
|
rc_attendance_behaviour = Column(Integer)
|
|
rc_personal_development = Column(Integer)
|
|
rc_leadership_governance = Column(Integer)
|
|
rc_early_years = Column(Integer)
|
|
rc_sixth_form = Column(Integer)
|
|
report_url = Column(Text)
|
|
|
|
|
|
class FactParentView(Base):
|
|
"""Ofsted Parent View survey — latest per school."""
|
|
__tablename__ = "fact_parent_view"
|
|
__table_args__ = MARTS
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
survey_date = Column(Date)
|
|
total_responses = Column(Integer)
|
|
q_happy_pct = Column(Float)
|
|
q_safe_pct = Column(Float)
|
|
q_behaviour_pct = Column(Float)
|
|
q_bullying_pct = Column(Float)
|
|
q_communication_pct = Column(Float)
|
|
q_progress_pct = Column(Float)
|
|
q_teaching_pct = Column(Float)
|
|
q_information_pct = Column(Float)
|
|
q_curriculum_pct = Column(Float)
|
|
q_future_pct = Column(Float)
|
|
q_leadership_pct = Column(Float)
|
|
q_wellbeing_pct = Column(Float)
|
|
q_recommend_pct = Column(Float)
|
|
|
|
|
|
class FactAdmissions(Base):
|
|
"""School admissions — one row per URN per year."""
|
|
__tablename__ = "fact_admissions"
|
|
__table_args__ = (
|
|
Index("ix_admissions_urn_year", "urn", "year"),
|
|
MARTS,
|
|
)
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
year = Column(Integer, primary_key=True)
|
|
school_phase = Column(String(50))
|
|
published_admission_number = Column(Integer)
|
|
total_applications = Column(Integer)
|
|
first_preference_applications = Column(Integer)
|
|
first_preference_offers = Column(Integer)
|
|
first_preference_offer_pct = Column(Float)
|
|
oversubscribed = Column(Boolean)
|
|
admissions_policy = Column(String(100))
|
|
|
|
|
|
class FactDeprivation(Base):
|
|
"""IDACI deprivation index — one row per URN."""
|
|
__tablename__ = "fact_deprivation"
|
|
__table_args__ = MARTS
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
lsoa_code = Column(String(20))
|
|
idaci_score = Column(Float)
|
|
idaci_decile = Column(Integer)
|
|
|
|
|
|
class FactFinance(Base):
|
|
"""FBIT financial benchmarking — one row per URN per year."""
|
|
__tablename__ = "fact_finance"
|
|
__table_args__ = (
|
|
Index("ix_finance_urn_year", "urn", "year"),
|
|
MARTS,
|
|
)
|
|
|
|
urn = Column(Integer, primary_key=True)
|
|
year = Column(Integer, primary_key=True)
|
|
per_pupil_spend = Column(Float)
|
|
staff_cost_pct = Column(Float)
|
|
teacher_cost_pct = Column(Float)
|
|
support_staff_cost_pct = Column(Float)
|
|
premises_cost_pct = Column(Float)
|