feat: migrate backend to marts schema, update EES tap for verified datasets

Pipeline:
- EES tap: split KS4 into performance + info streams, fix admissions filename
  (SchoolLevel keyword match), fix census filename (yearly suffix), remove
  phonics (no school-level data on EES), change endswith → in for matching
- stg_ees_ks4: rewrite to filter long-format data and extract Attainment 8,
  Progress 8, EBacc, English/Maths metrics; join KS4 info for context
- stg_ees_admissions: map real CSV columns (total_number_places_offered, etc.)
- stg_ees_census: update source reference, stub with TODO for data columns
- Remove stg_ees_phonics, fact_phonics (no school-level EES data)
- Add ees_ks4_performance + ees_ks4_info sources, remove ees_ks4 + ees_phonics
- Update int_ks4_with_lineage + fact_ks4_performance with new KS4 columns
- Annual EES DAG: remove stg_ees_phonics+ from selector

Backend:
- models.py: replace all models to point at marts.* tables with schema='marts'
  (DimSchool, DimLocation, KS2Performance, FactOfstedInspection, etc.)
- data_loader.py: rewrite load_school_data_as_dataframe() using raw SQL joining
  dim_school + dim_location + fact_ks2_performance; update get_supplementary_data()
- database.py: remove migration machinery, keep only connection setup
- app.py: remove check_and_migrate_if_needed, remove /api/admin/reimport-ks2
  endpoints (pipeline handles all imports)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 09:29:27 +00:00
parent d82e36e7b2
commit ca351e9d73
18 changed files with 805 additions and 1245 deletions

View File

@@ -4,16 +4,14 @@ with current_ks4 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
progress_8_score,
year, total_pupils, eligible_pupils, prior_attainment_avg,
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
english_maths_strong_pass_pct,
staying_in_education_pct
progress_8_score, progress_8_lower_ci, progress_8_upper_ci,
progress_8_english, progress_8_maths, progress_8_ebacc, progress_8_open,
english_maths_strong_pass_pct, english_maths_standard_pass_pct,
ebacc_entry_pct, ebacc_strong_pass_pct, ebacc_standard_pass_pct, ebacc_avg_score,
gcse_grade_91_pct,
sen_pct, sen_ehcp_pct, sen_support_pct
from {{ ref('stg_ees_ks4') }}
),
@@ -21,16 +19,14 @@ predecessor_ks4 as (
select
lin.current_urn,
ks4.urn as source_urn,
ks4.year,
ks4.total_pupils,
ks4.progress_8_score,
ks4.year, ks4.total_pupils, ks4.eligible_pupils, ks4.prior_attainment_avg,
ks4.attainment_8_score,
ks4.ebacc_entry_pct,
ks4.ebacc_achievement_pct,
ks4.english_strong_pass_pct,
ks4.maths_strong_pass_pct,
ks4.english_maths_strong_pass_pct,
ks4.staying_in_education_pct
ks4.progress_8_score, ks4.progress_8_lower_ci, ks4.progress_8_upper_ci,
ks4.progress_8_english, ks4.progress_8_maths, ks4.progress_8_ebacc, ks4.progress_8_open,
ks4.english_maths_strong_pass_pct, ks4.english_maths_standard_pass_pct,
ks4.ebacc_entry_pct, ks4.ebacc_strong_pass_pct, ks4.ebacc_standard_pass_pct, ks4.ebacc_avg_score,
ks4.gcse_grade_91_pct,
ks4.sen_pct, ks4.sen_ehcp_pct, ks4.sen_support_pct
from {{ ref('stg_ees_ks4') }} ks4
inner join {{ ref('int_school_lineage') }} lin
on ks4.urn = lin.predecessor_urn

View File

@@ -1,18 +1,8 @@
-- Intermediate model: Merged pupil characteristics from census data
-- TODO: Expand once census data columns are verified and added to stg_ees_census
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
phase_type_grouping
from {{ ref('stg_ees_census') }}

View File

@@ -88,14 +88,6 @@ models:
- name: year
tests: [not_null]
- name: fact_phonics
description: Phonics screening results — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_parent_view
description: Parent View survey responses
columns:

View File

@@ -3,8 +3,12 @@
select
urn,
year,
school_phase,
published_admission_number,
total_applications,
first_preference_offers_pct,
oversubscribed
first_preference_applications,
first_preference_offers,
first_preference_offer_pct,
oversubscribed,
admissions_policy
from {{ ref('stg_ees_admissions') }}

View File

@@ -1,16 +1,42 @@
-- Mart: KS4 performance fact table — one row per URN per year
-- Includes predecessor data via lineage resolution
select
current_urn as urn,
source_urn,
year,
total_pupils,
progress_8_score,
eligible_pupils,
prior_attainment_avg,
-- Attainment 8
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
-- Progress 8
progress_8_score,
progress_8_lower_ci,
progress_8_upper_ci,
progress_8_english,
progress_8_maths,
progress_8_ebacc,
progress_8_open,
-- English & Maths
english_maths_strong_pass_pct,
staying_in_education_pct
english_maths_standard_pass_pct,
-- EBacc
ebacc_entry_pct,
ebacc_strong_pass_pct,
ebacc_standard_pass_pct,
ebacc_avg_score,
-- GCSE
gcse_grade_91_pct,
-- Context
sen_pct,
sen_ehcp_pct,
sen_support_pct
from {{ ref('int_ks4_with_lineage') }}

View File

@@ -1,8 +0,0 @@
-- Mart: Phonics screening results — one row per URN per year
select
urn,
year,
year1_phonics_pct,
year2_phonics_pct
from {{ ref('stg_ees_phonics') }}

View File

@@ -1,18 +1,8 @@
-- Mart: Pupil characteristics — one row per URN per year
-- TODO: Expand once census data columns are verified and added to staging
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
phase_type_grouping
from {{ ref('int_pupil_chars_merged') }}

View File

@@ -30,8 +30,11 @@ sources:
- name: ees_ks2_info
description: KS2 school information (wide format — context/demographics per school)
- name: ees_ks4
description: KS4 attainment data from Explore Education Statistics
- name: ees_ks4_performance
description: KS4 performance tables (long format — one row per school × breakdown × sex)
- name: ees_ks4_info
description: KS4 school information (wide format — context/demographics per school)
- name: ees_census
description: School census pupil characteristics
@@ -39,8 +42,7 @@ sources:
- name: ees_admissions
description: Primary and secondary school admissions data
- name: ees_phonics
description: Phonics screening check results
# Phonics: no school-level data on EES (only national/LA level)
- name: parent_view
description: Ofsted Parent View survey responses

View File

@@ -1,19 +1,48 @@
-- Staging model: Primary and secondary school admissions from EES
-- Wide format, one row per school per year. No geographic_level column.
-- File is in supporting-files/ subdirectory of the release ZIP.
with source as (
select * from {{ source('raw', 'ees_admissions') }}
where school_urn is not null
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(published_admission_number as integer) as published_admission_number,
cast(total_applications as integer) as total_applications,
cast(first_preference_offers_pct as numeric) as first_preference_offers_pct,
cast(oversubscribed as boolean) as oversubscribed
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
school_phase,
entry_year,
-- Places and offers
cast(nullif(total_number_places_offered, 'z') as integer) as published_admission_number,
cast(nullif(number_preferred_offers, 'z') as integer) as total_offers,
cast(nullif(number_1st_preference_offers, 'z') as integer) as first_preference_offers,
cast(nullif(number_2nd_preference_offers, 'z') as integer) as second_preference_offers,
cast(nullif(number_3rd_preference_offers, 'z') as integer) as third_preference_offers,
-- Applications
cast(nullif(times_put_as_any_preferred_school, 'z') as integer) as total_applications,
cast(nullif(times_put_as_1st_preference, 'z') as integer) as first_preference_applications,
-- Proportions
cast(nullif(proportion_1stprefs_v_totaloffers, 'z') as numeric) as first_preference_offer_pct,
-- Derived: oversubscribed if applications > places
case
when nullif(times_put_as_1st_preference, 'z') is not null
and nullif(total_number_places_offered, 'z') is not null
and cast(times_put_as_1st_preference as integer)
> cast(total_number_places_offered as integer)
then true
else false
end as oversubscribed,
-- Context
admissions_policy,
nullif(FSM_eligible_percent, 'z') as fsm_eligible_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -1,27 +1,30 @@
-- Staging model: School census pupil characteristics from EES
-- File: spc_school_level_underlying_data_YYYY.csv (269 cols, in supporting-files/)
-- Uses 'urn' column (not school_urn). Tap normalises to school_urn.
--
-- TODO: The CSV has 269 columns but only metadata columns have been verified.
-- Data columns (ethnicity %, FSM %, SEN %, class sizes) need to be discovered
-- by inspecting the CSV on the Airflow container. The column references below
-- are placeholders and will fail until the tap schema and this model are updated
-- with the actual column names.
with source as (
select * from {{ source('raw', 'ees_census') }}
where school_urn is not null
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(fsm_pct as numeric) as fsm_pct,
cast(sen_support_pct as numeric) as sen_support_pct,
cast(sen_ehcp_pct as numeric) as sen_ehcp_pct,
cast(eal_pct as numeric) as eal_pct,
cast(disadvantaged_pct as numeric) as disadvantaged_pct,
cast(ethnicity_white_pct as numeric) as ethnicity_white_pct,
cast(ethnicity_asian_pct as numeric) as ethnicity_asian_pct,
cast(ethnicity_black_pct as numeric) as ethnicity_black_pct,
cast(ethnicity_mixed_pct as numeric) as ethnicity_mixed_pct,
cast(ethnicity_other_pct as numeric) as ethnicity_other_pct,
cast(class_size_avg as numeric) as class_size_avg,
cast(stability_pct as numeric) as stability_pct
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
school_name,
phase_type_grouping
-- TODO: Add census data columns once verified:
-- fsm_pct, sen_support_pct, sen_ehcp_pct, eal_pct,
-- disadvantaged_pct, ethnicity_white_pct, ethnicity_asian_pct,
-- ethnicity_black_pct, ethnicity_mixed_pct, ethnicity_other_pct,
-- class_size_avg, stability_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -1,24 +1,102 @@
-- Staging model: KS4 attainment data from EES (secondary schools — NEW)
-- Staging model: KS4 attainment data from EES
-- KS4 performance data is long-format with breakdown dimensions (breakdown_topic,
-- breakdown, sex). Unlike KS2 which has a subject dimension, KS4 metrics are
-- already in separate columns — we just filter to the 'All pupils' breakdown.
-- EES uses 'z' for suppressed values — cast to null via nullif.
with source as (
select * from {{ source('raw', 'ees_ks4') }}
with performance as (
select * from {{ source('raw', 'ees_ks4_performance') }}
where school_urn is not null
),
renamed as (
-- Filter to all-pupils totals (one row per school per year)
all_pupils as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(t_pupils as integer) as total_pupils,
cast(progress_8_score as numeric) as progress_8_score,
cast(attainment_8_score as numeric) as attainment_8_score,
cast(ebacc_entry_pct as numeric) as ebacc_entry_pct,
cast(ebacc_achievement_pct as numeric) as ebacc_achievement_pct,
cast(english_strong_pass_pct as numeric) as english_strong_pass_pct,
cast(maths_strong_pass_pct as numeric) as maths_strong_pass_pct,
cast(english_maths_strong_pass_pct as numeric) as english_maths_strong_pass_pct,
cast(staying_in_education_pct as numeric) as staying_in_education_pct
from source
where urn is not null
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(nullif(pupil_count, 'z') as integer) as total_pupils,
-- Attainment 8
cast(nullif(attainment8_average, 'z') as numeric) as attainment_8_score,
-- Progress 8
cast(nullif(progress8_average, 'z') as numeric) as progress_8_score,
cast(nullif(progress8_lower_95_ci, 'z') as numeric) as progress_8_lower_ci,
cast(nullif(progress8_upper_95_ci, 'z') as numeric) as progress_8_upper_ci,
cast(nullif(progress8eng_average, 'z') as numeric) as progress_8_english,
cast(nullif(progress8mat_average, 'z') as numeric) as progress_8_maths,
cast(nullif(progress8ebacc_average, 'z') as numeric) as progress_8_ebacc,
cast(nullif(progress8open_average, 'z') as numeric) as progress_8_open,
-- English & Maths pass rates
cast(nullif(engmath_95_percent, 'z') as numeric) as english_maths_strong_pass_pct,
cast(nullif(engmath_94_percent, 'z') as numeric) as english_maths_standard_pass_pct,
-- EBacc
cast(nullif(ebacc_entering_percent, 'z') as numeric) as ebacc_entry_pct,
cast(nullif(ebacc_95_percent, 'z') as numeric) as ebacc_strong_pass_pct,
cast(nullif(ebacc_94_percent, 'z') as numeric) as ebacc_standard_pass_pct,
cast(nullif(ebacc_aps_average, 'z') as numeric) as ebacc_avg_score,
-- GCSE grade 9-1
cast(nullif(gcse_91_percent, 'z') as numeric) as gcse_grade_91_pct
from performance
where breakdown_topic = 'All pupils'
and breakdown = 'Total'
and sex = 'Total'
),
-- KS4 info table for context/demographics
info as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(nullif(endks4_pupil_count, 'z') as integer) as eligible_pupils,
cast(nullif(ks2_scaledscore_average, 'z') as numeric) as prior_attainment_avg,
cast(nullif(sen_pupil_percent, 'z') as numeric) as sen_pct,
cast(nullif(sen_with_ehcp_pupil_percent, 'z') as numeric) as sen_ehcp_pct,
cast(nullif(sen_no_ehcp_pupil_percent, 'z') as numeric) as sen_support_pct
from {{ source('raw', 'ees_ks4_info') }}
where school_urn is not null
)
select * from renamed
select
p.urn,
p.year,
p.total_pupils,
i.eligible_pupils,
i.prior_attainment_avg,
-- Attainment 8
p.attainment_8_score,
-- Progress 8
p.progress_8_score,
p.progress_8_lower_ci,
p.progress_8_upper_ci,
p.progress_8_english,
p.progress_8_maths,
p.progress_8_ebacc,
p.progress_8_open,
-- English & Maths
p.english_maths_strong_pass_pct,
p.english_maths_standard_pass_pct,
-- EBacc
p.ebacc_entry_pct,
p.ebacc_strong_pass_pct,
p.ebacc_standard_pass_pct,
p.ebacc_avg_score,
-- GCSE
p.gcse_grade_91_pct,
-- Context
i.sen_pct,
i.sen_ehcp_pct,
i.sen_support_pct
from all_pupils p
left join info i on p.urn = i.urn and p.year = i.year

View File

@@ -1,17 +0,0 @@
-- Staging model: Phonics screening check results from EES
with source as (
select * from {{ source('raw', 'ees_phonics') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(year1_phonics_pct as numeric) as year1_phonics_pct,
cast(year2_phonics_pct as numeric) as year2_phonics_pct
from source
where urn is not null
)
select * from renamed