fix(dbt): filter non-numeric URNs and trim whitespace in EES staging models
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m30s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m30s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
- Filter school_urn/time_period to '^[0-9]+$' to exclude "n/a" and other non-numeric values that caused integer cast failures in fact_admissions - Add trim() to all school_urn/time_period casts to prevent whitespace variants producing duplicate urn+year rows in fact_ks2_performance Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,13 +5,15 @@
|
||||
|
||||
with source as (
|
||||
select * from {{ source('raw', 'ees_admissions') }}
|
||||
where school_urn is not null
|
||||
-- Exclude rows where school_urn is null, empty, or non-numeric (e.g. "n/a" LA aggregates)
|
||||
where school_urn ~ '^[0-9]+$'
|
||||
and time_period ~ '^[0-9]+$'
|
||||
),
|
||||
|
||||
renamed as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
school_phase,
|
||||
entry_year,
|
||||
|
||||
@@ -30,14 +32,11 @@ renamed as (
|
||||
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
|
||||
|
||||
-- Derived: oversubscribed if 1st-preference applications > places offered
|
||||
case
|
||||
when {{ safe_numeric('times_put_as_1st_preference') }} is not null
|
||||
and {{ safe_numeric('total_number_places_offered') }} is not null
|
||||
and {{ safe_numeric('times_put_as_1st_preference') }}
|
||||
> {{ safe_numeric('total_number_places_offered') }}
|
||||
then true
|
||||
else false
|
||||
end as oversubscribed,
|
||||
-- Use already-cast columns to avoid repeating the regex expression
|
||||
(
|
||||
{{ safe_numeric('times_put_as_1st_preference') }}
|
||||
> {{ safe_numeric('total_number_places_offered') }}
|
||||
) as oversubscribed,
|
||||
|
||||
-- Context
|
||||
admissions_policy,
|
||||
|
||||
@@ -9,13 +9,14 @@
|
||||
|
||||
with attainment as (
|
||||
select * from {{ source('raw', 'ees_ks2_attainment') }}
|
||||
where school_urn is not null
|
||||
where school_urn ~ '^[0-9]+$'
|
||||
and time_period ~ '^[0-9]+$'
|
||||
),
|
||||
|
||||
pivoted as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
|
||||
-- RWM combined (All pupils / Total)
|
||||
max(case when subject = 'Reading, writing and maths'
|
||||
@@ -117,8 +118,8 @@ pivoted as (
|
||||
|
||||
info as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
{{ safe_numeric('totpups') }}::integer as total_pupils,
|
||||
{{ safe_numeric('telig') }}::integer as eligible_pupils,
|
||||
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,
|
||||
|
||||
@@ -9,14 +9,15 @@
|
||||
|
||||
with performance as (
|
||||
select * from {{ source('raw', 'ees_ks4_performance') }}
|
||||
where school_urn is not null
|
||||
where school_urn ~ '^[0-9]+$'
|
||||
and time_period ~ '^[0-9]+$'
|
||||
),
|
||||
|
||||
-- Filter to all-pupils totals (one row per school per year)
|
||||
all_pupils as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
|
||||
|
||||
-- Attainment 8
|
||||
@@ -53,8 +54,8 @@ all_pupils as (
|
||||
-- KS4 info table for context/demographics
|
||||
info as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
|
||||
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
|
||||
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,
|
||||
|
||||
Reference in New Issue
Block a user