perf(dbt): collapse stg_ees_ks2 to single-pass pivot
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

Previous version scanned ees_ks2_attainment (1.2M rows) 5 times via
separate CTEs (all_pupils, gender_boys, gender_girls, disadv, not_disadv)
plus 5 LEFT JOINs. Rewritten as one GROUP BY with conditional aggregation
— single scan, no self-joins.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 11:42:40 +00:00
parent ae9fd26eba
commit 6e720feca4

View File

@@ -1,8 +1,9 @@
{{ config(materialized='table') }} {{ config(materialized='table') }}
-- Staging model: KS2 attainment + information -- Staging model: KS2 attainment + information
-- Pivots long-format attainment data (one row per subject × breakdown) into -- Single-pass pivot: one GROUP BY scan of 1.2M rows handles all subjects
-- wide format (one row per school per year) and joins context from info table. -- and all breakdowns (All pupils, Boys, Girls, Disadvantaged, Not disadvantaged)
-- instead of 5 separate CTE scans + 5 LEFT JOINs.
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes — -- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
-- safe_numeric handles both by treating any non-numeric string as NULL. -- safe_numeric handles both by treating any non-numeric string as NULL.
@@ -11,110 +12,109 @@ with attainment as (
where school_urn is not null where school_urn is not null
), ),
-- Pivot: extract metrics for each subject where breakdown = 'Total'
all_pupils as (
select
school_urn,
time_period,
subject,
{{ safe_numeric('expected_standard_pupil_percent') }} as expected_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as higher_pct,
{{ safe_numeric('average_scaled_score') }} as avg_score,
{{ safe_numeric('progress_measure_score') }} as progress,
{{ safe_numeric('absent_or_not_able_to_access_percent') }} as absence_pct
from attainment
where breakdown_topic = 'All pupils'
and breakdown = 'Total'
),
pivoted as ( pivoted as (
select select
cast(school_urn as integer) as urn, cast(school_urn as integer) as urn,
cast(time_period as integer) as year, cast(time_period as integer) as year,
-- RWM combined -- RWM combined (All pupils / Total)
max(case when subject = 'Reading, writing and maths' then expected_pct end) as rwm_expected_pct, max(case when subject = 'Reading, writing and maths'
max(case when subject = 'Reading, writing and maths' then higher_pct end) as rwm_high_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_pct,
max(case when subject = 'Reading, writing and maths'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_pct,
-- Reading -- Reading (All pupils / Total)
max(case when subject = 'Reading' then expected_pct end) as reading_expected_pct, max(case when subject = 'Reading'
max(case when subject = 'Reading' then higher_pct end) as reading_high_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
max(case when subject = 'Reading' then avg_score end) as reading_avg_score, then {{ safe_numeric('expected_standard_pupil_percent') }} end) as reading_expected_pct,
max(case when subject = 'Reading' then progress end) as reading_progress, max(case when subject = 'Reading'
max(case when subject = 'Reading' then absence_pct end) as reading_absence_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as reading_high_pct,
max(case when subject = 'Reading'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('average_scaled_score') }} end) as reading_avg_score,
max(case when subject = 'Reading'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('progress_measure_score') }} end) as reading_progress,
max(case when subject = 'Reading'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as reading_absence_pct,
-- Writing -- Writing (All pupils / Total)
max(case when subject = 'Writing' then expected_pct end) as writing_expected_pct, max(case when subject = 'Writing'
max(case when subject = 'Writing' then higher_pct end) as writing_high_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
max(case when subject = 'Writing' then progress end) as writing_progress, then {{ safe_numeric('expected_standard_pupil_percent') }} end) as writing_expected_pct,
max(case when subject = 'Writing' then absence_pct end) as writing_absence_pct, max(case when subject = 'Writing'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as writing_high_pct,
max(case when subject = 'Writing'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('progress_measure_score') }} end) as writing_progress,
max(case when subject = 'Writing'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as writing_absence_pct,
-- Maths -- Maths (All pupils / Total)
max(case when subject = 'Maths' then expected_pct end) as maths_expected_pct, max(case when subject = 'Maths'
max(case when subject = 'Maths' then higher_pct end) as maths_high_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
max(case when subject = 'Maths' then avg_score end) as maths_avg_score, then {{ safe_numeric('expected_standard_pupil_percent') }} end) as maths_expected_pct,
max(case when subject = 'Maths' then progress end) as maths_progress, max(case when subject = 'Maths'
max(case when subject = 'Maths' then absence_pct end) as maths_absence_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as maths_high_pct,
max(case when subject = 'Maths'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('average_scaled_score') }} end) as maths_avg_score,
max(case when subject = 'Maths'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('progress_measure_score') }} end) as maths_progress,
max(case when subject = 'Maths'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as maths_absence_pct,
-- GPS -- GPS (All pupils / Total)
max(case when subject ilike '%grammar%' or subject = 'GPS' then expected_pct end) as gps_expected_pct, max(case when (subject ilike '%grammar%' or subject = 'GPS')
max(case when subject ilike '%grammar%' or subject = 'GPS' then higher_pct end) as gps_high_pct, and breakdown_topic = 'All pupils' and breakdown = 'Total'
max(case when subject ilike '%grammar%' or subject = 'GPS' then avg_score end) as gps_avg_score, then {{ safe_numeric('expected_standard_pupil_percent') }} end) as gps_expected_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then absence_pct end) as gps_absence_pct, max(case when (subject ilike '%grammar%' or subject = 'GPS')
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as gps_high_pct,
max(case when (subject ilike '%grammar%' or subject = 'GPS')
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('average_scaled_score') }} end) as gps_avg_score,
max(case when (subject ilike '%grammar%' or subject = 'GPS')
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as gps_absence_pct,
-- Science -- Science (All pupils / Total)
max(case when subject = 'Science' then expected_pct end) as science_expected_pct, max(case when subject = 'Science'
max(case when subject = 'Science' then absence_pct end) as science_absence_pct and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as science_expected_pct,
max(case when subject = 'Science'
and breakdown_topic = 'All pupils' and breakdown = 'Total'
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as science_absence_pct,
from all_pupils -- Gender breakdown for RWM
max(case when subject = 'Reading, writing and maths' and breakdown = 'Boys'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_boys_pct,
max(case when subject = 'Reading, writing and maths' and breakdown = 'Boys'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_boys_pct,
max(case when subject = 'Reading, writing and maths' and breakdown = 'Girls'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_girls_pct,
max(case when subject = 'Reading, writing and maths' and breakdown = 'Girls'
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_girls_pct,
-- Disadvantaged breakdown for RWM
max(case when subject = 'Reading, writing and maths' and breakdown = 'Disadvantaged'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_disadvantaged_pct,
max(case when subject = 'Reading, writing and maths' and breakdown = 'Not disadvantaged'
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_non_disadvantaged_pct
from attainment
group by school_urn, time_period group by school_urn, time_period
), ),
-- Gender breakdown for RWM
gender_boys as (
select
school_urn,
time_period,
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_boys_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_boys_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Boys'
),
gender_girls as (
select
school_urn,
time_period,
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_girls_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_girls_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Girls'
),
-- Disadvantaged breakdown for RWM
disadv as (
select
school_urn,
time_period,
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_disadvantaged_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Disadvantaged'
),
not_disadv as (
select
school_urn,
time_period,
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_non_disadvantaged_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Not disadvantaged'
),
-- School info (context/demographics)
info as ( info as (
select select
cast(school_urn as integer) as urn, cast(school_urn as integer) as urn,
@@ -136,7 +136,6 @@ select
i.total_pupils, i.total_pupils,
i.eligible_pupils, i.eligible_pupils,
-- Core attainment
p.rwm_expected_pct, p.rwm_expected_pct,
p.rwm_high_pct, p.rwm_high_pct,
p.reading_expected_pct, p.reading_expected_pct,
@@ -155,25 +154,21 @@ select
p.gps_avg_score, p.gps_avg_score,
p.science_expected_pct, p.science_expected_pct,
-- Absence
p.reading_absence_pct, p.reading_absence_pct,
p.writing_absence_pct, p.writing_absence_pct,
p.maths_absence_pct, p.maths_absence_pct,
p.gps_absence_pct, p.gps_absence_pct,
p.science_absence_pct, p.science_absence_pct,
-- Gender p.rwm_expected_boys_pct,
gb.rwm_expected_boys_pct, p.rwm_high_boys_pct,
gb.rwm_high_boys_pct, p.rwm_expected_girls_pct,
gg.rwm_expected_girls_pct, p.rwm_high_girls_pct,
gg.rwm_high_girls_pct,
-- Disadvantaged p.rwm_expected_disadvantaged_pct,
d.rwm_expected_disadvantaged_pct, p.rwm_expected_non_disadvantaged_pct,
nd.rwm_expected_non_disadvantaged_pct, p.rwm_expected_disadvantaged_pct - p.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
d.rwm_expected_disadvantaged_pct - nd.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
-- Context
i.disadvantaged_pct, i.disadvantaged_pct,
i.eal_pct, i.eal_pct,
i.sen_support_pct, i.sen_support_pct,
@@ -182,7 +177,3 @@ select
from pivoted p from pivoted p
left join info i on p.urn = i.urn and p.year = i.year left join info i on p.urn = i.urn and p.year = i.year
left join gender_boys gb on p.urn = cast(gb.school_urn as integer) and p.year = cast(gb.time_period as integer)
left join gender_girls gg on p.urn = cast(gg.school_urn as integer) and p.year = cast(gg.time_period as integer)
left join disadv d on p.urn = cast(d.school_urn as integer) and p.year = cast(d.time_period as integer)
left join not_disadv nd on p.urn = cast(nd.school_urn as integer) and p.year = cast(nd.time_period as integer)