fix(dbt): filter non-numeric URNs and trim whitespace in EES staging models
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m30s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s

- Filter school_urn/time_period to '^[0-9]+$' to exclude "n/a" and other
  non-numeric values that caused integer cast failures in fact_admissions
- Add trim() to all school_urn/time_period casts to prevent whitespace
  variants producing duplicate urn+year rows in fact_ks2_performance

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 12:00:30 +00:00
parent 6e720feca4
commit b41e6c250e
3 changed files with 22 additions and 21 deletions

View File

@@ -5,13 +5,15 @@
with source as (
select * from {{ source('raw', 'ees_admissions') }}
where school_urn is not null
-- Exclude rows where school_urn is null, empty, or non-numeric (e.g. "n/a" LA aggregates)
where school_urn ~ '^[0-9]+$'
and time_period ~ '^[0-9]+$'
),
renamed as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
school_phase,
entry_year,
@@ -30,14 +32,11 @@ renamed as (
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
-- Derived: oversubscribed if 1st-preference applications > places offered
case
when {{ safe_numeric('times_put_as_1st_preference') }} is not null
and {{ safe_numeric('total_number_places_offered') }} is not null
and {{ safe_numeric('times_put_as_1st_preference') }}
> {{ safe_numeric('total_number_places_offered') }}
then true
else false
end as oversubscribed,
-- Use already-cast columns to avoid repeating the regex expression
(
{{ safe_numeric('times_put_as_1st_preference') }}
> {{ safe_numeric('total_number_places_offered') }}
) as oversubscribed,
-- Context
admissions_policy,

View File

@@ -9,13 +9,14 @@
with attainment as (
select * from {{ source('raw', 'ees_ks2_attainment') }}
where school_urn is not null
where school_urn ~ '^[0-9]+$'
and time_period ~ '^[0-9]+$'
),
pivoted as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
-- RWM combined (All pupils / Total)
max(case when subject = 'Reading, writing and maths'
@@ -117,8 +118,8 @@ pivoted as (
info as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
{{ safe_numeric('totpups') }}::integer as total_pupils,
{{ safe_numeric('telig') }}::integer as eligible_pupils,
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,

View File

@@ -9,14 +9,15 @@
with performance as (
select * from {{ source('raw', 'ees_ks4_performance') }}
where school_urn is not null
where school_urn ~ '^[0-9]+$'
and time_period ~ '^[0-9]+$'
),
-- Filter to all-pupils totals (one row per school per year)
all_pupils as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
-- Attainment 8
@@ -53,8 +54,8 @@ all_pupils as (
-- KS4 info table for context/demographics
info as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,