diff --git a/pipeline/transform/models/staging/stg_ees_admissions.sql b/pipeline/transform/models/staging/stg_ees_admissions.sql index 6ee07a1..3ca26aa 100644 --- a/pipeline/transform/models/staging/stg_ees_admissions.sql +++ b/pipeline/transform/models/staging/stg_ees_admissions.sql @@ -5,13 +5,15 @@ with source as ( select * from {{ source('raw', 'ees_admissions') }} - where school_urn is not null + -- Exclude rows where school_urn is null, empty, or non-numeric (e.g. "n/a" LA aggregates) + where school_urn ~ '^[0-9]+$' + and time_period ~ '^[0-9]+$' ), renamed as ( select - cast(school_urn as integer) as urn, - cast(time_period as integer) as year, + cast(trim(school_urn) as integer) as urn, + cast(trim(time_period) as integer) as year, school_phase, entry_year, @@ -30,14 +32,11 @@ renamed as ( {{ safe_numeric('proportion_1stprefs_v_totaloffers') }} as first_preference_offer_pct, -- Derived: oversubscribed if 1st-preference applications > places offered - case - when {{ safe_numeric('times_put_as_1st_preference') }} is not null - and {{ safe_numeric('total_number_places_offered') }} is not null - and {{ safe_numeric('times_put_as_1st_preference') }} - > {{ safe_numeric('total_number_places_offered') }} - then true - else false - end as oversubscribed, + -- Use already-cast columns to avoid repeating the regex expression + ( + {{ safe_numeric('times_put_as_1st_preference') }} + > {{ safe_numeric('total_number_places_offered') }} + ) as oversubscribed, -- Context admissions_policy, diff --git a/pipeline/transform/models/staging/stg_ees_ks2.sql b/pipeline/transform/models/staging/stg_ees_ks2.sql index 061168e..92868ee 100644 --- a/pipeline/transform/models/staging/stg_ees_ks2.sql +++ b/pipeline/transform/models/staging/stg_ees_ks2.sql @@ -9,13 +9,14 @@ with attainment as ( select * from {{ source('raw', 'ees_ks2_attainment') }} - where school_urn is not null + where school_urn ~ '^[0-9]+$' + and time_period ~ '^[0-9]+$' ), pivoted as ( select - cast(school_urn as integer) as urn, - cast(time_period as integer) as year, + cast(trim(school_urn) as integer) as urn, + cast(trim(time_period) as integer) as year, -- RWM combined (All pupils / Total) max(case when subject = 'Reading, writing and maths' @@ -117,8 +118,8 @@ pivoted as ( info as ( select - cast(school_urn as integer) as urn, - cast(time_period as integer) as year, + cast(trim(school_urn) as integer) as urn, + cast(trim(time_period) as integer) as year, {{ safe_numeric('totpups') }}::integer as total_pupils, {{ safe_numeric('telig') }}::integer as eligible_pupils, {{ safe_numeric('ptfsm6cla1a') }} as disadvantaged_pct, diff --git a/pipeline/transform/models/staging/stg_ees_ks4.sql b/pipeline/transform/models/staging/stg_ees_ks4.sql index 0c62220..169fe68 100644 --- a/pipeline/transform/models/staging/stg_ees_ks4.sql +++ b/pipeline/transform/models/staging/stg_ees_ks4.sql @@ -9,14 +9,15 @@ with performance as ( select * from {{ source('raw', 'ees_ks4_performance') }} - where school_urn is not null + where school_urn ~ '^[0-9]+$' + and time_period ~ '^[0-9]+$' ), -- Filter to all-pupils totals (one row per school per year) all_pupils as ( select - cast(school_urn as integer) as urn, - cast(time_period as integer) as year, + cast(trim(school_urn) as integer) as urn, + cast(trim(time_period) as integer) as year, {{ safe_numeric('pupil_count') }}::integer as total_pupils, -- Attainment 8 @@ -53,8 +54,8 @@ all_pupils as ( -- KS4 info table for context/demographics info as ( select - cast(school_urn as integer) as urn, - cast(time_period as integer) as year, + cast(trim(school_urn) as integer) as urn, + cast(trim(time_period) as integer) as year, {{ safe_numeric('endks4_pupil_count') }}::integer as eligible_pupils, {{ safe_numeric('ks2_scaledscore_average') }} as prior_attainment_avg, {{ safe_numeric('sen_pupil_percent') }} as sen_pct,