perf(dbt): collapse stg_ees_ks2 to single-pass pivot
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m31s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Previous version scanned ees_ks2_attainment (1.2M rows) 5 times via separate CTEs (all_pupils, gender_boys, gender_girls, disadv, not_disadv) plus 5 LEFT JOINs. Rewritten as one GROUP BY with conditional aggregation — single scan, no self-joins. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
-- Staging model: KS2 attainment + information
|
||||
-- Pivots long-format attainment data (one row per subject × breakdown) into
|
||||
-- wide format (one row per school per year) and joins context from info table.
|
||||
-- Single-pass pivot: one GROUP BY scan of 1.2M rows handles all subjects
|
||||
-- and all breakdowns (All pupils, Boys, Girls, Disadvantaged, Not disadvantaged)
|
||||
-- instead of 5 separate CTE scans + 5 LEFT JOINs.
|
||||
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
|
||||
-- safe_numeric handles both by treating any non-numeric string as NULL.
|
||||
|
||||
@@ -11,110 +12,109 @@ with attainment as (
|
||||
where school_urn is not null
|
||||
),
|
||||
|
||||
-- Pivot: extract metrics for each subject where breakdown = 'Total'
|
||||
all_pupils as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
subject,
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as expected_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as higher_pct,
|
||||
{{ safe_numeric('average_scaled_score') }} as avg_score,
|
||||
{{ safe_numeric('progress_measure_score') }} as progress,
|
||||
{{ safe_numeric('absent_or_not_able_to_access_percent') }} as absence_pct
|
||||
from attainment
|
||||
where breakdown_topic = 'All pupils'
|
||||
and breakdown = 'Total'
|
||||
),
|
||||
|
||||
pivoted as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
|
||||
-- RWM combined
|
||||
max(case when subject = 'Reading, writing and maths' then expected_pct end) as rwm_expected_pct,
|
||||
max(case when subject = 'Reading, writing and maths' then higher_pct end) as rwm_high_pct,
|
||||
-- RWM combined (All pupils / Total)
|
||||
max(case when subject = 'Reading, writing and maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_pct,
|
||||
max(case when subject = 'Reading, writing and maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_pct,
|
||||
|
||||
-- Reading
|
||||
max(case when subject = 'Reading' then expected_pct end) as reading_expected_pct,
|
||||
max(case when subject = 'Reading' then higher_pct end) as reading_high_pct,
|
||||
max(case when subject = 'Reading' then avg_score end) as reading_avg_score,
|
||||
max(case when subject = 'Reading' then progress end) as reading_progress,
|
||||
max(case when subject = 'Reading' then absence_pct end) as reading_absence_pct,
|
||||
-- Reading (All pupils / Total)
|
||||
max(case when subject = 'Reading'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as reading_expected_pct,
|
||||
max(case when subject = 'Reading'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as reading_high_pct,
|
||||
max(case when subject = 'Reading'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('average_scaled_score') }} end) as reading_avg_score,
|
||||
max(case when subject = 'Reading'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('progress_measure_score') }} end) as reading_progress,
|
||||
max(case when subject = 'Reading'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as reading_absence_pct,
|
||||
|
||||
-- Writing
|
||||
max(case when subject = 'Writing' then expected_pct end) as writing_expected_pct,
|
||||
max(case when subject = 'Writing' then higher_pct end) as writing_high_pct,
|
||||
max(case when subject = 'Writing' then progress end) as writing_progress,
|
||||
max(case when subject = 'Writing' then absence_pct end) as writing_absence_pct,
|
||||
-- Writing (All pupils / Total)
|
||||
max(case when subject = 'Writing'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as writing_expected_pct,
|
||||
max(case when subject = 'Writing'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as writing_high_pct,
|
||||
max(case when subject = 'Writing'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('progress_measure_score') }} end) as writing_progress,
|
||||
max(case when subject = 'Writing'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as writing_absence_pct,
|
||||
|
||||
-- Maths
|
||||
max(case when subject = 'Maths' then expected_pct end) as maths_expected_pct,
|
||||
max(case when subject = 'Maths' then higher_pct end) as maths_high_pct,
|
||||
max(case when subject = 'Maths' then avg_score end) as maths_avg_score,
|
||||
max(case when subject = 'Maths' then progress end) as maths_progress,
|
||||
max(case when subject = 'Maths' then absence_pct end) as maths_absence_pct,
|
||||
-- Maths (All pupils / Total)
|
||||
max(case when subject = 'Maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as maths_expected_pct,
|
||||
max(case when subject = 'Maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as maths_high_pct,
|
||||
max(case when subject = 'Maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('average_scaled_score') }} end) as maths_avg_score,
|
||||
max(case when subject = 'Maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('progress_measure_score') }} end) as maths_progress,
|
||||
max(case when subject = 'Maths'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as maths_absence_pct,
|
||||
|
||||
-- GPS
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then expected_pct end) as gps_expected_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then higher_pct end) as gps_high_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then avg_score end) as gps_avg_score,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then absence_pct end) as gps_absence_pct,
|
||||
-- GPS (All pupils / Total)
|
||||
max(case when (subject ilike '%grammar%' or subject = 'GPS')
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as gps_expected_pct,
|
||||
max(case when (subject ilike '%grammar%' or subject = 'GPS')
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as gps_high_pct,
|
||||
max(case when (subject ilike '%grammar%' or subject = 'GPS')
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('average_scaled_score') }} end) as gps_avg_score,
|
||||
max(case when (subject ilike '%grammar%' or subject = 'GPS')
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as gps_absence_pct,
|
||||
|
||||
-- Science
|
||||
max(case when subject = 'Science' then expected_pct end) as science_expected_pct,
|
||||
max(case when subject = 'Science' then absence_pct end) as science_absence_pct
|
||||
-- Science (All pupils / Total)
|
||||
max(case when subject = 'Science'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as science_expected_pct,
|
||||
max(case when subject = 'Science'
|
||||
and breakdown_topic = 'All pupils' and breakdown = 'Total'
|
||||
then {{ safe_numeric('absent_or_not_able_to_access_percent') }} end) as science_absence_pct,
|
||||
|
||||
from all_pupils
|
||||
-- Gender breakdown for RWM
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Boys'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_boys_pct,
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Boys'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_boys_pct,
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Girls'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_girls_pct,
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Girls'
|
||||
then {{ safe_numeric('higher_standard_pupil_percent') }} end) as rwm_high_girls_pct,
|
||||
|
||||
-- Disadvantaged breakdown for RWM
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Disadvantaged'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_disadvantaged_pct,
|
||||
max(case when subject = 'Reading, writing and maths' and breakdown = 'Not disadvantaged'
|
||||
then {{ safe_numeric('expected_standard_pupil_percent') }} end) as rwm_expected_non_disadvantaged_pct
|
||||
|
||||
from attainment
|
||||
group by school_urn, time_period
|
||||
),
|
||||
|
||||
-- Gender breakdown for RWM
|
||||
gender_boys as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_boys_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_boys_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Boys'
|
||||
),
|
||||
|
||||
gender_girls as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_girls_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_girls_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Girls'
|
||||
),
|
||||
|
||||
-- Disadvantaged breakdown for RWM
|
||||
disadv as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_disadvantaged_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Disadvantaged'
|
||||
),
|
||||
|
||||
not_disadv as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_non_disadvantaged_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Not disadvantaged'
|
||||
),
|
||||
|
||||
-- School info (context/demographics)
|
||||
info as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
@@ -136,7 +136,6 @@ select
|
||||
i.total_pupils,
|
||||
i.eligible_pupils,
|
||||
|
||||
-- Core attainment
|
||||
p.rwm_expected_pct,
|
||||
p.rwm_high_pct,
|
||||
p.reading_expected_pct,
|
||||
@@ -155,25 +154,21 @@ select
|
||||
p.gps_avg_score,
|
||||
p.science_expected_pct,
|
||||
|
||||
-- Absence
|
||||
p.reading_absence_pct,
|
||||
p.writing_absence_pct,
|
||||
p.maths_absence_pct,
|
||||
p.gps_absence_pct,
|
||||
p.science_absence_pct,
|
||||
|
||||
-- Gender
|
||||
gb.rwm_expected_boys_pct,
|
||||
gb.rwm_high_boys_pct,
|
||||
gg.rwm_expected_girls_pct,
|
||||
gg.rwm_high_girls_pct,
|
||||
p.rwm_expected_boys_pct,
|
||||
p.rwm_high_boys_pct,
|
||||
p.rwm_expected_girls_pct,
|
||||
p.rwm_high_girls_pct,
|
||||
|
||||
-- Disadvantaged
|
||||
d.rwm_expected_disadvantaged_pct,
|
||||
nd.rwm_expected_non_disadvantaged_pct,
|
||||
d.rwm_expected_disadvantaged_pct - nd.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
|
||||
p.rwm_expected_disadvantaged_pct,
|
||||
p.rwm_expected_non_disadvantaged_pct,
|
||||
p.rwm_expected_disadvantaged_pct - p.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
|
||||
|
||||
-- Context
|
||||
i.disadvantaged_pct,
|
||||
i.eal_pct,
|
||||
i.sen_support_pct,
|
||||
@@ -182,7 +177,3 @@ select
|
||||
|
||||
from pivoted p
|
||||
left join info i on p.urn = i.urn and p.year = i.year
|
||||
left join gender_boys gb on p.urn = cast(gb.school_urn as integer) and p.year = cast(gb.time_period as integer)
|
||||
left join gender_girls gg on p.urn = cast(gg.school_urn as integer) and p.year = cast(gg.time_period as integer)
|
||||
left join disadv d on p.urn = cast(d.school_urn as integer) and p.year = cast(d.time_period as integer)
|
||||
left join not_disadv nd on p.urn = cast(nd.school_urn as integer) and p.year = cast(nd.time_period as integer)
|
||||
|
||||
Reference in New Issue
Block a user