fix(dbt): apply safe_numeric macro to fix EES suppression code 'c' errors
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m14s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m14s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
Replace nullif(col, 'z') casts with safe_numeric macro across KS2, KS4, and admissions staging models. The regex-based macro treats any non-numeric string (z, c, x, q, u, etc.) as NULL without needing an explicit list. Also fix FSM_eligible_percent column quoting in stg_ees_admissions — target- postgres stores mixed-case column names quoted, so unquoted references were being folded to fsm_eligible_percent by PostgreSQL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
9
pipeline/transform/macros/safe_numeric.sql
Normal file
9
pipeline/transform/macros/safe_numeric.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
{#
|
||||
safe_numeric(col)
|
||||
Casts a string column to numeric, treating any non-numeric value as NULL.
|
||||
Handles all EES suppression codes (z, c, x, q, u, etc.) without needing
|
||||
an explicit list — any string that doesn't look like a number becomes NULL.
|
||||
#}
|
||||
{% macro safe_numeric(col) -%}
|
||||
CASE WHEN {{ col }} ~ '^-?[0-9]+(\.[0-9]+)?$' THEN {{ col }}::numeric ELSE NULL END
|
||||
{%- endmacro %}
|
||||
@@ -1,6 +1,7 @@
|
||||
-- Staging model: Primary and secondary school admissions from EES
|
||||
-- Wide format, one row per school per year. No geographic_level column.
|
||||
-- File is in supporting-files/ subdirectory of the release ZIP.
|
||||
-- Note: FSM_eligible_percent is stored with mixed case by target-postgres and
|
||||
-- must be double-quoted in SQL to avoid case-folding to fsm_eligible_percent.
|
||||
|
||||
with source as (
|
||||
select * from {{ source('raw', 'ees_admissions') }}
|
||||
@@ -15,32 +16,32 @@ renamed as (
|
||||
entry_year,
|
||||
|
||||
-- Places and offers
|
||||
cast(nullif(total_number_places_offered, 'z') as integer) as published_admission_number,
|
||||
cast(nullif(number_preferred_offers, 'z') as integer) as total_offers,
|
||||
cast(nullif(number_1st_preference_offers, 'z') as integer) as first_preference_offers,
|
||||
cast(nullif(number_2nd_preference_offers, 'z') as integer) as second_preference_offers,
|
||||
cast(nullif(number_3rd_preference_offers, 'z') as integer) as third_preference_offers,
|
||||
{{ safe_numeric('total_number_places_offered') }}::integer as published_admission_number,
|
||||
{{ safe_numeric('number_preferred_offers') }}::integer as total_offers,
|
||||
{{ safe_numeric('number_1st_preference_offers') }}::integer as first_preference_offers,
|
||||
{{ safe_numeric('number_2nd_preference_offers') }}::integer as second_preference_offers,
|
||||
{{ safe_numeric('number_3rd_preference_offers') }}::integer as third_preference_offers,
|
||||
|
||||
-- Applications
|
||||
cast(nullif(times_put_as_any_preferred_school, 'z') as integer) as total_applications,
|
||||
cast(nullif(times_put_as_1st_preference, 'z') as integer) as first_preference_applications,
|
||||
{{ safe_numeric('times_put_as_any_preferred_school') }}::integer as total_applications,
|
||||
{{ safe_numeric('times_put_as_1st_preference') }}::integer as first_preference_applications,
|
||||
|
||||
-- Proportions
|
||||
cast(nullif(proportion_1stprefs_v_totaloffers, 'z') as numeric) as first_preference_offer_pct,
|
||||
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
|
||||
|
||||
-- Derived: oversubscribed if applications > places
|
||||
-- Derived: oversubscribed if 1st-preference applications > places offered
|
||||
case
|
||||
when nullif(times_put_as_1st_preference, 'z') is not null
|
||||
and nullif(total_number_places_offered, 'z') is not null
|
||||
and cast(times_put_as_1st_preference as integer)
|
||||
> cast(total_number_places_offered as integer)
|
||||
when {{ safe_numeric('times_put_as_1st_preference') }} is not null
|
||||
and {{ safe_numeric('total_number_places_offered') }} is not null
|
||||
and {{ safe_numeric('times_put_as_1st_preference') }}
|
||||
> {{ safe_numeric('total_number_places_offered') }}
|
||||
then true
|
||||
else false
|
||||
end as oversubscribed,
|
||||
|
||||
-- Context
|
||||
admissions_policy,
|
||||
nullif(FSM_eligible_percent, 'z') as fsm_eligible_pct
|
||||
{{ safe_numeric('"FSM_eligible_percent"') }} as fsm_eligible_pct
|
||||
|
||||
from source
|
||||
)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
-- Staging model: KS2 attainment + information
|
||||
-- Pivots long-format attainment data (one row per subject × breakdown) into
|
||||
-- wide format (one row per school per year) and joins context from info table.
|
||||
-- EES uses 'z' for suppressed values — cast to null via nullif.
|
||||
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
|
||||
-- safe_numeric handles both by treating any non-numeric string as NULL.
|
||||
|
||||
with attainment as (
|
||||
select * from {{ source('raw', 'ees_ks2_attainment') }}
|
||||
@@ -14,11 +15,11 @@ all_pupils as (
|
||||
school_urn,
|
||||
time_period,
|
||||
subject,
|
||||
nullif(expected_standard_pupil_percent, 'z') as expected_pct,
|
||||
nullif(higher_standard_pupil_percent, 'z') as higher_pct,
|
||||
nullif(average_scaled_score, 'z') as avg_score,
|
||||
nullif(progress_measure_score, 'z') as progress,
|
||||
nullif(absent_or_not_able_to_access_percent, 'z') as absence_pct
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as expected_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as higher_pct,
|
||||
{{ safe_numeric('average_scaled_score') }} as avg_score,
|
||||
{{ safe_numeric('progress_measure_score') }} as progress,
|
||||
{{ safe_numeric('absent_or_not_able_to_access_percent') }} as absence_pct
|
||||
from attainment
|
||||
where breakdown_topic = 'All pupils'
|
||||
and breakdown = 'Total'
|
||||
@@ -30,38 +31,38 @@ pivoted as (
|
||||
cast(time_period as integer) as year,
|
||||
|
||||
-- RWM combined
|
||||
max(case when subject = 'Reading, writing and maths' then cast(expected_pct as numeric) end) as rwm_expected_pct,
|
||||
max(case when subject = 'Reading, writing and maths' then cast(higher_pct as numeric) end) as rwm_high_pct,
|
||||
max(case when subject = 'Reading, writing and maths' then expected_pct end) as rwm_expected_pct,
|
||||
max(case when subject = 'Reading, writing and maths' then higher_pct end) as rwm_high_pct,
|
||||
|
||||
-- Reading
|
||||
max(case when subject = 'Reading' then cast(expected_pct as numeric) end) as reading_expected_pct,
|
||||
max(case when subject = 'Reading' then cast(higher_pct as numeric) end) as reading_high_pct,
|
||||
max(case when subject = 'Reading' then cast(avg_score as numeric) end) as reading_avg_score,
|
||||
max(case when subject = 'Reading' then cast(progress as numeric) end) as reading_progress,
|
||||
max(case when subject = 'Reading' then cast(absence_pct as numeric) end) as reading_absence_pct,
|
||||
max(case when subject = 'Reading' then expected_pct end) as reading_expected_pct,
|
||||
max(case when subject = 'Reading' then higher_pct end) as reading_high_pct,
|
||||
max(case when subject = 'Reading' then avg_score end) as reading_avg_score,
|
||||
max(case when subject = 'Reading' then progress end) as reading_progress,
|
||||
max(case when subject = 'Reading' then absence_pct end) as reading_absence_pct,
|
||||
|
||||
-- Writing
|
||||
max(case when subject = 'Writing' then cast(expected_pct as numeric) end) as writing_expected_pct,
|
||||
max(case when subject = 'Writing' then cast(higher_pct as numeric) end) as writing_high_pct,
|
||||
max(case when subject = 'Writing' then cast(progress as numeric) end) as writing_progress,
|
||||
max(case when subject = 'Writing' then cast(absence_pct as numeric) end) as writing_absence_pct,
|
||||
max(case when subject = 'Writing' then expected_pct end) as writing_expected_pct,
|
||||
max(case when subject = 'Writing' then higher_pct end) as writing_high_pct,
|
||||
max(case when subject = 'Writing' then progress end) as writing_progress,
|
||||
max(case when subject = 'Writing' then absence_pct end) as writing_absence_pct,
|
||||
|
||||
-- Maths
|
||||
max(case when subject = 'Maths' then cast(expected_pct as numeric) end) as maths_expected_pct,
|
||||
max(case when subject = 'Maths' then cast(higher_pct as numeric) end) as maths_high_pct,
|
||||
max(case when subject = 'Maths' then cast(avg_score as numeric) end) as maths_avg_score,
|
||||
max(case when subject = 'Maths' then cast(progress as numeric) end) as maths_progress,
|
||||
max(case when subject = 'Maths' then cast(absence_pct as numeric) end) as maths_absence_pct,
|
||||
max(case when subject = 'Maths' then expected_pct end) as maths_expected_pct,
|
||||
max(case when subject = 'Maths' then higher_pct end) as maths_high_pct,
|
||||
max(case when subject = 'Maths' then avg_score end) as maths_avg_score,
|
||||
max(case when subject = 'Maths' then progress end) as maths_progress,
|
||||
max(case when subject = 'Maths' then absence_pct end) as maths_absence_pct,
|
||||
|
||||
-- GPS
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(expected_pct as numeric) end) as gps_expected_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(higher_pct as numeric) end) as gps_high_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(avg_score as numeric) end) as gps_avg_score,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(absence_pct as numeric) end) as gps_absence_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then expected_pct end) as gps_expected_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then higher_pct end) as gps_high_pct,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then avg_score end) as gps_avg_score,
|
||||
max(case when subject ilike '%grammar%' or subject = 'GPS' then absence_pct end) as gps_absence_pct,
|
||||
|
||||
-- Science
|
||||
max(case when subject = 'Science' then cast(expected_pct as numeric) end) as science_expected_pct,
|
||||
max(case when subject = 'Science' then cast(absence_pct as numeric) end) as science_absence_pct
|
||||
max(case when subject = 'Science' then expected_pct end) as science_expected_pct,
|
||||
max(case when subject = 'Science' then absence_pct end) as science_absence_pct
|
||||
|
||||
from all_pupils
|
||||
group by school_urn, time_period
|
||||
@@ -72,8 +73,8 @@ gender_boys as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_boys_pct,
|
||||
nullif(higher_standard_pupil_percent, 'z') as rwm_high_boys_pct
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_boys_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_boys_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Boys'
|
||||
@@ -83,8 +84,8 @@ gender_girls as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_girls_pct,
|
||||
nullif(higher_standard_pupil_percent, 'z') as rwm_high_girls_pct
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_girls_pct,
|
||||
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_girls_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Girls'
|
||||
@@ -95,7 +96,7 @@ disadv as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_disadvantaged_pct
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_disadvantaged_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Disadvantaged'
|
||||
@@ -105,7 +106,7 @@ not_disadv as (
|
||||
select
|
||||
school_urn,
|
||||
time_period,
|
||||
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_non_disadvantaged_pct
|
||||
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_non_disadvantaged_pct
|
||||
from attainment
|
||||
where subject = 'Reading, writing and maths'
|
||||
and breakdown = 'Not disadvantaged'
|
||||
@@ -116,13 +117,13 @@ info as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(nullif(totpups, 'z') as integer) as total_pupils,
|
||||
cast(nullif(telig, 'z') as integer) as eligible_pupils,
|
||||
cast(nullif(ptfsm6cla1a, 'z') as numeric) as disadvantaged_pct,
|
||||
cast(nullif(ptealgrp2, 'z') as numeric) as eal_pct,
|
||||
cast(nullif(psenelk, 'z') as numeric) as sen_support_pct,
|
||||
cast(nullif(psenele, 'z') as numeric) as sen_ehcp_pct,
|
||||
cast(nullif(ptmobn, 'z') as numeric) as stability_pct
|
||||
{{ safe_numeric('totpups') }}::integer as total_pupils,
|
||||
{{ safe_numeric('telig') }}::integer as eligible_pupils,
|
||||
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,
|
||||
{{ safe_numeric('ptealgrp2') }} as eal_pct,
|
||||
{{ safe_numeric('psenelk') }} as sen_support_pct,
|
||||
{{ safe_numeric('psenele') }} as sen_ehcp_pct,
|
||||
{{ safe_numeric('ptmobn') }} as stability_pct
|
||||
from {{ source('raw', 'ees_ks2_info') }}
|
||||
where school_urn is not null
|
||||
)
|
||||
@@ -160,15 +161,15 @@ select
|
||||
p.science_absence_pct,
|
||||
|
||||
-- Gender
|
||||
cast(gb.rwm_expected_boys_pct as numeric) as rwm_expected_boys_pct,
|
||||
cast(gb.rwm_high_boys_pct as numeric) as rwm_high_boys_pct,
|
||||
cast(gg.rwm_expected_girls_pct as numeric) as rwm_expected_girls_pct,
|
||||
cast(gg.rwm_high_girls_pct as numeric) as rwm_high_girls_pct,
|
||||
gb.rwm_expected_boys_pct,
|
||||
gb.rwm_high_boys_pct,
|
||||
gg.rwm_expected_girls_pct,
|
||||
gg.rwm_high_girls_pct,
|
||||
|
||||
-- Disadvantaged
|
||||
cast(d.rwm_expected_disadvantaged_pct as numeric) as rwm_expected_disadvantaged_pct,
|
||||
cast(nd.rwm_expected_non_disadvantaged_pct as numeric) as rwm_expected_non_disadvantaged_pct,
|
||||
cast(d.rwm_expected_disadvantaged_pct as numeric) - cast(nd.rwm_expected_non_disadvantaged_pct as numeric) as disadvantaged_gap,
|
||||
d.rwm_expected_disadvantaged_pct,
|
||||
nd.rwm_expected_non_disadvantaged_pct,
|
||||
d.rwm_expected_disadvantaged_pct - nd.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
|
||||
|
||||
-- Context
|
||||
i.disadvantaged_pct,
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
-- KS4 performance data is long-format with breakdown dimensions (breakdown_topic,
|
||||
-- breakdown, sex). Unlike KS2 which has a subject dimension, KS4 metrics are
|
||||
-- already in separate columns — we just filter to the 'All pupils' breakdown.
|
||||
-- EES uses 'z' for suppressed values — cast to null via nullif.
|
||||
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
|
||||
-- safe_numeric handles both by treating any non-numeric string as NULL.
|
||||
|
||||
with performance as (
|
||||
select * from {{ source('raw', 'ees_ks4_performance') }}
|
||||
@@ -14,32 +15,32 @@ all_pupils as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(nullif(pupil_count, 'z') as integer) as total_pupils,
|
||||
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
|
||||
|
||||
-- Attainment 8
|
||||
cast(nullif(attainment8_average, 'z') as numeric) as attainment_8_score,
|
||||
{{ safe_numeric('attainment8_average') }} as attainment_8_score,
|
||||
|
||||
-- Progress 8
|
||||
cast(nullif(progress8_average, 'z') as numeric) as progress_8_score,
|
||||
cast(nullif(progress8_lower_95_ci, 'z') as numeric) as progress_8_lower_ci,
|
||||
cast(nullif(progress8_upper_95_ci, 'z') as numeric) as progress_8_upper_ci,
|
||||
cast(nullif(progress8eng_average, 'z') as numeric) as progress_8_english,
|
||||
cast(nullif(progress8mat_average, 'z') as numeric) as progress_8_maths,
|
||||
cast(nullif(progress8ebacc_average, 'z') as numeric) as progress_8_ebacc,
|
||||
cast(nullif(progress8open_average, 'z') as numeric) as progress_8_open,
|
||||
{{ safe_numeric('progress8_average') }} as progress_8_score,
|
||||
{{ safe_numeric('progress8_lower_95_ci') }} as progress_8_lower_ci,
|
||||
{{ safe_numeric('progress8_upper_95_ci') }} as progress_8_upper_ci,
|
||||
{{ safe_numeric('progress8eng_average') }} as progress_8_english,
|
||||
{{ safe_numeric('progress8mat_average') }} as progress_8_maths,
|
||||
{{ safe_numeric('progress8ebacc_average') }} as progress_8_ebacc,
|
||||
{{ safe_numeric('progress8open_average') }} as progress_8_open,
|
||||
|
||||
-- English & Maths pass rates
|
||||
cast(nullif(engmath_95_percent, 'z') as numeric) as english_maths_strong_pass_pct,
|
||||
cast(nullif(engmath_94_percent, 'z') as numeric) as english_maths_standard_pass_pct,
|
||||
{{ safe_numeric('engmath_95_percent') }} as english_maths_strong_pass_pct,
|
||||
{{ safe_numeric('engmath_94_percent') }} as english_maths_standard_pass_pct,
|
||||
|
||||
-- EBacc
|
||||
cast(nullif(ebacc_entering_percent, 'z') as numeric) as ebacc_entry_pct,
|
||||
cast(nullif(ebacc_95_percent, 'z') as numeric) as ebacc_strong_pass_pct,
|
||||
cast(nullif(ebacc_94_percent, 'z') as numeric) as ebacc_standard_pass_pct,
|
||||
cast(nullif(ebacc_aps_average, 'z') as numeric) as ebacc_avg_score,
|
||||
{{ safe_numeric('ebacc_entering_percent') }} as ebacc_entry_pct,
|
||||
{{ safe_numeric('ebacc_95_percent') }} as ebacc_strong_pass_pct,
|
||||
{{ safe_numeric('ebacc_94_percent') }} as ebacc_standard_pass_pct,
|
||||
{{ safe_numeric('ebacc_aps_average') }} as ebacc_avg_score,
|
||||
|
||||
-- GCSE grade 9-1
|
||||
cast(nullif(gcse_91_percent, 'z') as numeric) as gcse_grade_91_pct
|
||||
{{ safe_numeric('gcse_91_percent') }} as gcse_grade_91_pct
|
||||
|
||||
from performance
|
||||
where breakdown_topic = 'All pupils'
|
||||
@@ -52,11 +53,11 @@ info as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(nullif(endks4_pupil_count, 'z') as integer) as eligible_pupils,
|
||||
cast(nullif(ks2_scaledscore_average, 'z') as numeric) as prior_attainment_avg,
|
||||
cast(nullif(sen_pupil_percent, 'z') as numeric) as sen_pct,
|
||||
cast(nullif(sen_with_ehcp_pupil_percent, 'z') as numeric) as sen_ehcp_pct,
|
||||
cast(nullif(sen_no_ehcp_pupil_percent, 'z') as numeric) as sen_support_pct
|
||||
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
|
||||
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
|
||||
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,
|
||||
{{ safe_numeric('sen_with_ehcp_pupil_percent') }} as sen_ehcp_pct,
|
||||
{{ safe_numeric('sen_no_ehcp_pupil_percent') }} as sen_support_pct
|
||||
from {{ source('raw', 'ees_ks4_info') }}
|
||||
where school_urn is not null
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user