fix(dbt): apply safe_numeric macro to fix EES suppression code 'c' errors
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 33s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m14s
Build and Push Docker Images / Build Integrator (push) Successful in 58s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s

Replace nullif(col, 'z') casts with safe_numeric macro across KS2, KS4,
and admissions staging models. The regex-based macro treats any non-numeric
string (z, c, x, q, u, etc.) as NULL without needing an explicit list.

Also fix FSM_eligible_percent column quoting in stg_ees_admissions — target-
postgres stores mixed-case column names quoted, so unquoted references were
being folded to fsm_eligible_percent by PostgreSQL.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 10:41:27 +00:00
parent 8e8d1bd8c5
commit 33b395d2bd
4 changed files with 97 additions and 85 deletions

View File

@@ -0,0 +1,9 @@
{#
safe_numeric(col)
Casts a string column to numeric, treating any non-numeric value as NULL.
Handles all EES suppression codes (z, c, x, q, u, etc.) without needing
an explicit list any string that doesn't look like a number becomes NULL.
#}
{% macro safe_numeric(col) -%}
CASE WHEN {{ col }} ~ '^-?[0-9]+(\.[0-9]+)?$' THEN {{ col }}::numeric ELSE NULL END
{%- endmacro %}

View File

@@ -1,6 +1,7 @@
-- Staging model: Primary and secondary school admissions from EES
-- Wide format, one row per school per year. No geographic_level column.
-- File is in supporting-files/ subdirectory of the release ZIP.
-- Note: FSM_eligible_percent is stored with mixed case by target-postgres and
-- must be double-quoted in SQL to avoid case-folding to fsm_eligible_percent.
with source as (
select * from {{ source('raw', 'ees_admissions') }}
@@ -15,32 +16,32 @@ renamed as (
entry_year,
-- Places and offers
cast(nullif(total_number_places_offered, 'z') as integer) as published_admission_number,
cast(nullif(number_preferred_offers, 'z') as integer) as total_offers,
cast(nullif(number_1st_preference_offers, 'z') as integer) as first_preference_offers,
cast(nullif(number_2nd_preference_offers, 'z') as integer) as second_preference_offers,
cast(nullif(number_3rd_preference_offers, 'z') as integer) as third_preference_offers,
{{ safe_numeric('total_number_places_offered') }}::integer as published_admission_number,
{{ safe_numeric('number_preferred_offers') }}::integer as total_offers,
{{ safe_numeric('number_1st_preference_offers') }}::integer as first_preference_offers,
{{ safe_numeric('number_2nd_preference_offers') }}::integer as second_preference_offers,
{{ safe_numeric('number_3rd_preference_offers') }}::integer as third_preference_offers,
-- Applications
cast(nullif(times_put_as_any_preferred_school, 'z') as integer) as total_applications,
cast(nullif(times_put_as_1st_preference, 'z') as integer) as first_preference_applications,
{{ safe_numeric('times_put_as_any_preferred_school') }}::integer as total_applications,
{{ safe_numeric('times_put_as_1st_preference') }}::integer as first_preference_applications,
-- Proportions
cast(nullif(proportion_1stprefs_v_totaloffers, 'z') as numeric) as first_preference_offer_pct,
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
-- Derived: oversubscribed if applications > places
-- Derived: oversubscribed if 1st-preference applications > places offered
case
when nullif(times_put_as_1st_preference, 'z') is not null
and nullif(total_number_places_offered, 'z') is not null
and cast(times_put_as_1st_preference as integer)
> cast(total_number_places_offered as integer)
when {{ safe_numeric('times_put_as_1st_preference') }} is not null
and {{ safe_numeric('total_number_places_offered') }} is not null
and {{ safe_numeric('times_put_as_1st_preference') }}
> {{ safe_numeric('total_number_places_offered') }}
then true
else false
end as oversubscribed,
-- Context
admissions_policy,
nullif(FSM_eligible_percent, 'z') as fsm_eligible_pct
{{ safe_numeric('"FSM_eligible_percent"') }} as fsm_eligible_pct
from source
)

View File

@@ -1,7 +1,8 @@
-- Staging model: KS2 attainment + information
-- Pivots long-format attainment data (one row per subject × breakdown) into
-- wide format (one row per school per year) and joins context from info table.
-- EES uses 'z' for suppressed values — cast to null via nullif.
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
-- safe_numeric handles both by treating any non-numeric string as NULL.
with attainment as (
select * from {{ source('raw', 'ees_ks2_attainment') }}
@@ -14,11 +15,11 @@ all_pupils as (
school_urn,
time_period,
subject,
nullif(expected_standard_pupil_percent, 'z') as expected_pct,
nullif(higher_standard_pupil_percent, 'z') as higher_pct,
nullif(average_scaled_score, 'z') as avg_score,
nullif(progress_measure_score, 'z') as progress,
nullif(absent_or_not_able_to_access_percent, 'z') as absence_pct
{{ safe_numeric('expected_standard_pupil_percent') }} as expected_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as higher_pct,
{{ safe_numeric('average_scaled_score') }} as avg_score,
{{ safe_numeric('progress_measure_score') }} as progress,
{{ safe_numeric('absent_or_not_able_to_access_percent') }} as absence_pct
from attainment
where breakdown_topic = 'All pupils'
and breakdown = 'Total'
@@ -30,38 +31,38 @@ pivoted as (
cast(time_period as integer) as year,
-- RWM combined
max(case when subject = 'Reading, writing and maths' then cast(expected_pct as numeric) end) as rwm_expected_pct,
max(case when subject = 'Reading, writing and maths' then cast(higher_pct as numeric) end) as rwm_high_pct,
max(case when subject = 'Reading, writing and maths' then expected_pct end) as rwm_expected_pct,
max(case when subject = 'Reading, writing and maths' then higher_pct end) as rwm_high_pct,
-- Reading
max(case when subject = 'Reading' then cast(expected_pct as numeric) end) as reading_expected_pct,
max(case when subject = 'Reading' then cast(higher_pct as numeric) end) as reading_high_pct,
max(case when subject = 'Reading' then cast(avg_score as numeric) end) as reading_avg_score,
max(case when subject = 'Reading' then cast(progress as numeric) end) as reading_progress,
max(case when subject = 'Reading' then cast(absence_pct as numeric) end) as reading_absence_pct,
max(case when subject = 'Reading' then expected_pct end) as reading_expected_pct,
max(case when subject = 'Reading' then higher_pct end) as reading_high_pct,
max(case when subject = 'Reading' then avg_score end) as reading_avg_score,
max(case when subject = 'Reading' then progress end) as reading_progress,
max(case when subject = 'Reading' then absence_pct end) as reading_absence_pct,
-- Writing
max(case when subject = 'Writing' then cast(expected_pct as numeric) end) as writing_expected_pct,
max(case when subject = 'Writing' then cast(higher_pct as numeric) end) as writing_high_pct,
max(case when subject = 'Writing' then cast(progress as numeric) end) as writing_progress,
max(case when subject = 'Writing' then cast(absence_pct as numeric) end) as writing_absence_pct,
max(case when subject = 'Writing' then expected_pct end) as writing_expected_pct,
max(case when subject = 'Writing' then higher_pct end) as writing_high_pct,
max(case when subject = 'Writing' then progress end) as writing_progress,
max(case when subject = 'Writing' then absence_pct end) as writing_absence_pct,
-- Maths
max(case when subject = 'Maths' then cast(expected_pct as numeric) end) as maths_expected_pct,
max(case when subject = 'Maths' then cast(higher_pct as numeric) end) as maths_high_pct,
max(case when subject = 'Maths' then cast(avg_score as numeric) end) as maths_avg_score,
max(case when subject = 'Maths' then cast(progress as numeric) end) as maths_progress,
max(case when subject = 'Maths' then cast(absence_pct as numeric) end) as maths_absence_pct,
max(case when subject = 'Maths' then expected_pct end) as maths_expected_pct,
max(case when subject = 'Maths' then higher_pct end) as maths_high_pct,
max(case when subject = 'Maths' then avg_score end) as maths_avg_score,
max(case when subject = 'Maths' then progress end) as maths_progress,
max(case when subject = 'Maths' then absence_pct end) as maths_absence_pct,
-- GPS
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(expected_pct as numeric) end) as gps_expected_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(higher_pct as numeric) end) as gps_high_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(avg_score as numeric) end) as gps_avg_score,
max(case when subject ilike '%grammar%' or subject = 'GPS' then cast(absence_pct as numeric) end) as gps_absence_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then expected_pct end) as gps_expected_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then higher_pct end) as gps_high_pct,
max(case when subject ilike '%grammar%' or subject = 'GPS' then avg_score end) as gps_avg_score,
max(case when subject ilike '%grammar%' or subject = 'GPS' then absence_pct end) as gps_absence_pct,
-- Science
max(case when subject = 'Science' then cast(expected_pct as numeric) end) as science_expected_pct,
max(case when subject = 'Science' then cast(absence_pct as numeric) end) as science_absence_pct
max(case when subject = 'Science' then expected_pct end) as science_expected_pct,
max(case when subject = 'Science' then absence_pct end) as science_absence_pct
from all_pupils
group by school_urn, time_period
@@ -72,8 +73,8 @@ gender_boys as (
select
school_urn,
time_period,
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_boys_pct,
nullif(higher_standard_pupil_percent, 'z') as rwm_high_boys_pct
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_boys_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_boys_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Boys'
@@ -83,8 +84,8 @@ gender_girls as (
select
school_urn,
time_period,
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_girls_pct,
nullif(higher_standard_pupil_percent, 'z') as rwm_high_girls_pct
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_girls_pct,
{{ safe_numeric('higher_standard_pupil_percent') }} as rwm_high_girls_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Girls'
@@ -95,7 +96,7 @@ disadv as (
select
school_urn,
time_period,
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_disadvantaged_pct
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_disadvantaged_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Disadvantaged'
@@ -105,7 +106,7 @@ not_disadv as (
select
school_urn,
time_period,
nullif(expected_standard_pupil_percent, 'z') as rwm_expected_non_disadvantaged_pct
{{ safe_numeric('expected_standard_pupil_percent') }} as rwm_expected_non_disadvantaged_pct
from attainment
where subject = 'Reading, writing and maths'
and breakdown = 'Not disadvantaged'
@@ -116,13 +117,13 @@ info as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(nullif(totpups, 'z') as integer) as total_pupils,
cast(nullif(telig, 'z') as integer) as eligible_pupils,
cast(nullif(ptfsm6cla1a, 'z') as numeric) as disadvantaged_pct,
cast(nullif(ptealgrp2, 'z') as numeric) as eal_pct,
cast(nullif(psenelk, 'z') as numeric) as sen_support_pct,
cast(nullif(psenele, 'z') as numeric) as sen_ehcp_pct,
cast(nullif(ptmobn, 'z') as numeric) as stability_pct
{{ safe_numeric('totpups') }}::integer as total_pupils,
{{ safe_numeric('telig') }}::integer as eligible_pupils,
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,
{{ safe_numeric('ptealgrp2') }} as eal_pct,
{{ safe_numeric('psenelk') }} as sen_support_pct,
{{ safe_numeric('psenele') }} as sen_ehcp_pct,
{{ safe_numeric('ptmobn') }} as stability_pct
from {{ source('raw', 'ees_ks2_info') }}
where school_urn is not null
)
@@ -160,15 +161,15 @@ select
p.science_absence_pct,
-- Gender
cast(gb.rwm_expected_boys_pct as numeric) as rwm_expected_boys_pct,
cast(gb.rwm_high_boys_pct as numeric) as rwm_high_boys_pct,
cast(gg.rwm_expected_girls_pct as numeric) as rwm_expected_girls_pct,
cast(gg.rwm_high_girls_pct as numeric) as rwm_high_girls_pct,
gb.rwm_expected_boys_pct,
gb.rwm_high_boys_pct,
gg.rwm_expected_girls_pct,
gg.rwm_high_girls_pct,
-- Disadvantaged
cast(d.rwm_expected_disadvantaged_pct as numeric) as rwm_expected_disadvantaged_pct,
cast(nd.rwm_expected_non_disadvantaged_pct as numeric) as rwm_expected_non_disadvantaged_pct,
cast(d.rwm_expected_disadvantaged_pct as numeric) - cast(nd.rwm_expected_non_disadvantaged_pct as numeric) as disadvantaged_gap,
d.rwm_expected_disadvantaged_pct,
nd.rwm_expected_non_disadvantaged_pct,
d.rwm_expected_disadvantaged_pct - nd.rwm_expected_non_disadvantaged_pct as disadvantaged_gap,
-- Context
i.disadvantaged_pct,

View File

@@ -2,7 +2,8 @@
-- KS4 performance data is long-format with breakdown dimensions (breakdown_topic,
-- breakdown, sex). Unlike KS2 which has a subject dimension, KS4 metrics are
-- already in separate columns — we just filter to the 'All pupils' breakdown.
-- EES uses 'z' for suppressed values — cast to null via nullif.
-- EES uses 'z' (not applicable) and 'c' (confidential) as suppression codes —
-- safe_numeric handles both by treating any non-numeric string as NULL.
with performance as (
select * from {{ source('raw', 'ees_ks4_performance') }}
@@ -14,32 +15,32 @@ all_pupils as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(nullif(pupil_count, 'z') as integer) as total_pupils,
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
-- Attainment 8
cast(nullif(attainment8_average, 'z') as numeric) as attainment_8_score,
{{ safe_numeric('attainment8_average') }} as attainment_8_score,
-- Progress 8
cast(nullif(progress8_average, 'z') as numeric) as progress_8_score,
cast(nullif(progress8_lower_95_ci, 'z') as numeric) as progress_8_lower_ci,
cast(nullif(progress8_upper_95_ci, 'z') as numeric) as progress_8_upper_ci,
cast(nullif(progress8eng_average, 'z') as numeric) as progress_8_english,
cast(nullif(progress8mat_average, 'z') as numeric) as progress_8_maths,
cast(nullif(progress8ebacc_average, 'z') as numeric) as progress_8_ebacc,
cast(nullif(progress8open_average, 'z') as numeric) as progress_8_open,
{{ safe_numeric('progress8_average') }} as progress_8_score,
{{ safe_numeric('progress8_lower_95_ci') }} as progress_8_lower_ci,
{{ safe_numeric('progress8_upper_95_ci') }} as progress_8_upper_ci,
{{ safe_numeric('progress8eng_average') }} as progress_8_english,
{{ safe_numeric('progress8mat_average') }} as progress_8_maths,
{{ safe_numeric('progress8ebacc_average') }} as progress_8_ebacc,
{{ safe_numeric('progress8open_average') }} as progress_8_open,
-- English & Maths pass rates
cast(nullif(engmath_95_percent, 'z') as numeric) as english_maths_strong_pass_pct,
cast(nullif(engmath_94_percent, 'z') as numeric) as english_maths_standard_pass_pct,
{{ safe_numeric('engmath_95_percent') }} as english_maths_strong_pass_pct,
{{ safe_numeric('engmath_94_percent') }} as english_maths_standard_pass_pct,
-- EBacc
cast(nullif(ebacc_entering_percent, 'z') as numeric) as ebacc_entry_pct,
cast(nullif(ebacc_95_percent, 'z') as numeric) as ebacc_strong_pass_pct,
cast(nullif(ebacc_94_percent, 'z') as numeric) as ebacc_standard_pass_pct,
cast(nullif(ebacc_aps_average, 'z') as numeric) as ebacc_avg_score,
{{ safe_numeric('ebacc_entering_percent') }} as ebacc_entry_pct,
{{ safe_numeric('ebacc_95_percent') }} as ebacc_strong_pass_pct,
{{ safe_numeric('ebacc_94_percent') }} as ebacc_standard_pass_pct,
{{ safe_numeric('ebacc_aps_average') }} as ebacc_avg_score,
-- GCSE grade 9-1
cast(nullif(gcse_91_percent, 'z') as numeric) as gcse_grade_91_pct
{{ safe_numeric('gcse_91_percent') }} as gcse_grade_91_pct
from performance
where breakdown_topic = 'All pupils'
@@ -52,11 +53,11 @@ info as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(nullif(endks4_pupil_count, 'z') as integer) as eligible_pupils,
cast(nullif(ks2_scaledscore_average, 'z') as numeric) as prior_attainment_avg,
cast(nullif(sen_pupil_percent, 'z') as numeric) as sen_pct,
cast(nullif(sen_with_ehcp_pupil_percent, 'z') as numeric) as sen_ehcp_pct,
cast(nullif(sen_no_ehcp_pupil_percent, 'z') as numeric) as sen_support_pct
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,
{{ safe_numeric('sen_with_ehcp_pupil_percent') }} as sen_ehcp_pct,
{{ safe_numeric('sen_no_ehcp_pupil_percent') }} as sen_support_pct
from {{ source('raw', 'ees_ks4_info') }}
where school_urn is not null
)