feat(ees): rewrite EES tap and KS2 models for actual data structure
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 31s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m8s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m45s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

- Fix publication slugs (KS4, Phonics, Admissions were wrong)
- Split KS2 into two streams: ees_ks2_attainment (long format) and
  ees_ks2_info (wide format context data)
- Target specific filenames instead of keyword matching
- Handle school_urn vs urn column naming
- Pivot KS2 attainment from long to wide format in dbt staging
- Add all ~40 KS2 columns the backend needs (GPS, absence, gender,
  disadvantaged breakdowns, context demographics)
- Pass through all columns in int_ks2_with_lineage and fact_ks2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-26 23:08:50 +00:00
parent 719f06e480
commit d82e36e7b2
5 changed files with 354 additions and 99 deletions

View File

@@ -5,21 +5,16 @@ with current_ks2 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
rwm_expected_pct,
reading_expected_pct,
writing_expected_pct,
maths_expected_pct,
rwm_high_pct,
reading_high_pct,
writing_high_pct,
maths_high_pct,
reading_progress,
writing_progress,
maths_progress,
reading_avg_score,
maths_avg_score
year, total_pupils, eligible_pupils,
rwm_expected_pct, rwm_high_pct,
reading_expected_pct, reading_high_pct, reading_avg_score, reading_progress,
writing_expected_pct, writing_high_pct, writing_progress,
maths_expected_pct, maths_high_pct, maths_avg_score, maths_progress,
gps_expected_pct, gps_high_pct, gps_avg_score, science_expected_pct,
reading_absence_pct, writing_absence_pct, maths_absence_pct, gps_absence_pct, science_absence_pct,
rwm_expected_boys_pct, rwm_high_boys_pct, rwm_expected_girls_pct, rwm_high_girls_pct,
rwm_expected_disadvantaged_pct, rwm_expected_non_disadvantaged_pct, disadvantaged_gap,
disadvantaged_pct, eal_pct, sen_support_pct, sen_ehcp_pct, stability_pct
from {{ ref('stg_ees_ks2') }}
),
@@ -27,25 +22,19 @@ predecessor_ks2 as (
select
lin.current_urn,
ks2.urn as source_urn,
ks2.year,
ks2.total_pupils,
ks2.rwm_expected_pct,
ks2.reading_expected_pct,
ks2.writing_expected_pct,
ks2.maths_expected_pct,
ks2.rwm_high_pct,
ks2.reading_high_pct,
ks2.writing_high_pct,
ks2.maths_high_pct,
ks2.reading_progress,
ks2.writing_progress,
ks2.maths_progress,
ks2.reading_avg_score,
ks2.maths_avg_score
ks2.year, ks2.total_pupils, ks2.eligible_pupils,
ks2.rwm_expected_pct, ks2.rwm_high_pct,
ks2.reading_expected_pct, ks2.reading_high_pct, ks2.reading_avg_score, ks2.reading_progress,
ks2.writing_expected_pct, ks2.writing_high_pct, ks2.writing_progress,
ks2.maths_expected_pct, ks2.maths_high_pct, ks2.maths_avg_score, ks2.maths_progress,
ks2.gps_expected_pct, ks2.gps_high_pct, ks2.gps_avg_score, ks2.science_expected_pct,
ks2.reading_absence_pct, ks2.writing_absence_pct, ks2.maths_absence_pct, ks2.gps_absence_pct, ks2.science_absence_pct,
ks2.rwm_expected_boys_pct, ks2.rwm_high_boys_pct, ks2.rwm_expected_girls_pct, ks2.rwm_high_girls_pct,
ks2.rwm_expected_disadvantaged_pct, ks2.rwm_expected_non_disadvantaged_pct, ks2.disadvantaged_gap,
ks2.disadvantaged_pct, ks2.eal_pct, ks2.sen_support_pct, ks2.sen_ehcp_pct, ks2.stability_pct
from {{ ref('stg_ees_ks2') }} ks2
inner join {{ ref('int_school_lineage') }} lin
on ks2.urn = lin.predecessor_urn
-- Only include predecessor data for years before the current URN has data
where not exists (
select 1 from {{ ref('stg_ees_ks2') }} curr
where curr.urn = lin.current_urn