fix(dbt): filter non-numeric URNs and trim whitespace in EES staging models
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m30s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m30s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
- Filter school_urn/time_period to '^[0-9]+$' to exclude "n/a" and other non-numeric values that caused integer cast failures in fact_admissions - Add trim() to all school_urn/time_period casts to prevent whitespace variants producing duplicate urn+year rows in fact_ks2_performance Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,13 +5,15 @@
|
|||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('raw', 'ees_admissions') }}
|
select * from {{ source('raw', 'ees_admissions') }}
|
||||||
where school_urn is not null
|
-- Exclude rows where school_urn is null, empty, or non-numeric (e.g. "n/a" LA aggregates)
|
||||||
|
where school_urn ~ '^[0-9]+$'
|
||||||
|
and time_period ~ '^[0-9]+$'
|
||||||
),
|
),
|
||||||
|
|
||||||
renamed as (
|
renamed as (
|
||||||
select
|
select
|
||||||
cast(school_urn as integer) as urn,
|
cast(trim(school_urn) as integer) as urn,
|
||||||
cast(time_period as integer) as year,
|
cast(trim(time_period) as integer) as year,
|
||||||
school_phase,
|
school_phase,
|
||||||
entry_year,
|
entry_year,
|
||||||
|
|
||||||
@@ -30,14 +32,11 @@ renamed as (
|
|||||||
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
|
{{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct,
|
||||||
|
|
||||||
-- Derived: oversubscribed if 1st-preference applications > places offered
|
-- Derived: oversubscribed if 1st-preference applications > places offered
|
||||||
case
|
-- Use already-cast columns to avoid repeating the regex expression
|
||||||
when {{ safe_numeric('times_put_as_1st_preference') }} is not null
|
(
|
||||||
and {{ safe_numeric('total_number_places_offered') }} is not null
|
{{ safe_numeric('times_put_as_1st_preference') }}
|
||||||
and {{ safe_numeric('times_put_as_1st_preference') }}
|
> {{ safe_numeric('total_number_places_offered') }}
|
||||||
> {{ safe_numeric('total_number_places_offered') }}
|
) as oversubscribed,
|
||||||
then true
|
|
||||||
else false
|
|
||||||
end as oversubscribed,
|
|
||||||
|
|
||||||
-- Context
|
-- Context
|
||||||
admissions_policy,
|
admissions_policy,
|
||||||
|
|||||||
@@ -9,13 +9,14 @@
|
|||||||
|
|
||||||
with attainment as (
|
with attainment as (
|
||||||
select * from {{ source('raw', 'ees_ks2_attainment') }}
|
select * from {{ source('raw', 'ees_ks2_attainment') }}
|
||||||
where school_urn is not null
|
where school_urn ~ '^[0-9]+$'
|
||||||
|
and time_period ~ '^[0-9]+$'
|
||||||
),
|
),
|
||||||
|
|
||||||
pivoted as (
|
pivoted as (
|
||||||
select
|
select
|
||||||
cast(school_urn as integer) as urn,
|
cast(trim(school_urn) as integer) as urn,
|
||||||
cast(time_period as integer) as year,
|
cast(trim(time_period) as integer) as year,
|
||||||
|
|
||||||
-- RWM combined (All pupils / Total)
|
-- RWM combined (All pupils / Total)
|
||||||
max(case when subject = 'Reading, writing and maths'
|
max(case when subject = 'Reading, writing and maths'
|
||||||
@@ -117,8 +118,8 @@ pivoted as (
|
|||||||
|
|
||||||
info as (
|
info as (
|
||||||
select
|
select
|
||||||
cast(school_urn as integer) as urn,
|
cast(trim(school_urn) as integer) as urn,
|
||||||
cast(time_period as integer) as year,
|
cast(trim(time_period) as integer) as year,
|
||||||
{{ safe_numeric('totpups') }}::integer as total_pupils,
|
{{ safe_numeric('totpups') }}::integer as total_pupils,
|
||||||
{{ safe_numeric('telig') }}::integer as eligible_pupils,
|
{{ safe_numeric('telig') }}::integer as eligible_pupils,
|
||||||
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,
|
{{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct,
|
||||||
|
|||||||
@@ -9,14 +9,15 @@
|
|||||||
|
|
||||||
with performance as (
|
with performance as (
|
||||||
select * from {{ source('raw', 'ees_ks4_performance') }}
|
select * from {{ source('raw', 'ees_ks4_performance') }}
|
||||||
where school_urn is not null
|
where school_urn ~ '^[0-9]+$'
|
||||||
|
and time_period ~ '^[0-9]+$'
|
||||||
),
|
),
|
||||||
|
|
||||||
-- Filter to all-pupils totals (one row per school per year)
|
-- Filter to all-pupils totals (one row per school per year)
|
||||||
all_pupils as (
|
all_pupils as (
|
||||||
select
|
select
|
||||||
cast(school_urn as integer) as urn,
|
cast(trim(school_urn) as integer) as urn,
|
||||||
cast(time_period as integer) as year,
|
cast(trim(time_period) as integer) as year,
|
||||||
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
|
{{ safe_numeric('pupil_count') }}::integer as total_pupils,
|
||||||
|
|
||||||
-- Attainment 8
|
-- Attainment 8
|
||||||
@@ -53,8 +54,8 @@ all_pupils as (
|
|||||||
-- KS4 info table for context/demographics
|
-- KS4 info table for context/demographics
|
||||||
info as (
|
info as (
|
||||||
select
|
select
|
||||||
cast(school_urn as integer) as urn,
|
cast(trim(school_urn) as integer) as urn,
|
||||||
cast(time_period as integer) as year,
|
cast(trim(time_period) as integer) as year,
|
||||||
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
|
{{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils,
|
||||||
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
|
{{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg,
|
||||||
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,
|
{{ safe_numeric('sen_pupil_percent') }} as sen_pct,
|
||||||
|
|||||||
Reference in New Issue
Block a user