feat(census): add demographic columns to EES census tap and staging models
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

tap-uk-ees: EESCensusStream now declares 27 data columns (FSM %, EAL %,
ethnicity breakdowns, pupil counts) with clean Singer field names mapped
from the verbose CSV column names (e.g. '% of pupils known to be eligible
for free school meals' → fsm_pct) via a new _column_renames mechanism on
the base stream class.

stg_ees_census: materialised as table, applies safe_numeric to all
percentage/count columns, filters to numeric URNs.

int_pupil_chars_merged + fact_pupil_characteristics: pass all columns
through from staging (previously stubs with only 3 columns).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 14:07:48 +00:00
parent 4b02ab3d8a
commit 668e234eb2
4 changed files with 176 additions and 32 deletions

View File

@@ -1,8 +1,34 @@
-- Intermediate model: Merged pupil characteristics from census data
-- TODO: Expand once census data columns are verified and added to stg_ees_census
select
urn,
year,
phase_type_grouping
phase_type_grouping,
total_pupils,
female_pupils,
male_pupils,
full_time_pupils,
part_time_pupils,
total_boarders,
fsm_eligible_n,
fsm_pct,
eal_pct,
young_carer_pct,
ethnicity_white_british_pct,
ethnicity_white_other_pct,
ethnicity_gypsy_roma_pct,
ethnicity_mixed_wbc_pct,
ethnicity_mixed_wba_pct,
ethnicity_mixed_wa_pct,
ethnicity_mixed_other_pct,
ethnicity_asian_indian_pct,
ethnicity_asian_pakistani_pct,
ethnicity_asian_bangladeshi_pct,
ethnicity_asian_other_pct,
ethnicity_black_caribbean_pct,
ethnicity_black_african_pct,
ethnicity_black_other_pct,
ethnicity_chinese_pct,
ethnicity_other_pct,
ethnicity_unclassified_pct
from {{ ref('stg_ees_census') }}

View File

@@ -1,8 +1,34 @@
-- Mart: Pupil characteristics — one row per URN per year
-- TODO: Expand once census data columns are verified and added to staging
select
urn,
year,
phase_type_grouping
phase_type_grouping,
total_pupils,
female_pupils,
male_pupils,
full_time_pupils,
part_time_pupils,
total_boarders,
fsm_eligible_n,
fsm_pct,
eal_pct,
young_carer_pct,
ethnicity_white_british_pct,
ethnicity_white_other_pct,
ethnicity_gypsy_roma_pct,
ethnicity_mixed_wbc_pct,
ethnicity_mixed_wba_pct,
ethnicity_mixed_wa_pct,
ethnicity_mixed_other_pct,
ethnicity_asian_indian_pct,
ethnicity_asian_pakistani_pct,
ethnicity_asian_bangladeshi_pct,
ethnicity_asian_other_pct,
ethnicity_black_caribbean_pct,
ethnicity_black_african_pct,
ethnicity_black_other_pct,
ethnicity_chinese_pct,
ethnicity_other_pct,
ethnicity_unclassified_pct
from {{ ref('int_pupil_chars_merged') }}

View File

@@ -1,30 +1,55 @@
-- Staging model: School census pupil characteristics from EES
-- File: spc_school_level_underlying_data_YYYY.csv (269 cols, in supporting-files/)
-- Uses 'urn' column (not school_urn). Tap normalises to school_urn.
--
-- TODO: The CSV has 269 columns but only metadata columns have been verified.
-- Data columns (ethnicity %, FSM %, SEN %, class sizes) need to be discovered
-- by inspecting the CSV on the Airflow container. The column references below
-- are placeholders and will fail until the tap schema and this model are updated
-- with the actual column names.
{{ config(materialized='table') }}
-- Staging model: School Pupils and Characteristics (census)
-- One row per school per year. CSV has 269 columns; we extract the
-- aggregate demographic summaries only (not per-age/year-group breakdowns).
-- Column renames happen in the tap before Singer persists to raw.ees_census.
with source as (
select * from {{ source('raw', 'ees_census') }}
where school_urn is not null
),
renamed as (
select
cast(school_urn as integer) as urn,
cast(time_period as integer) as year,
school_name,
phase_type_grouping
-- TODO: Add census data columns once verified:
-- fsm_pct, sen_support_pct, sen_ehcp_pct, eal_pct,
-- disadvantaged_pct, ethnicity_white_pct, ethnicity_asian_pct,
-- ethnicity_black_pct, ethnicity_mixed_pct, ethnicity_other_pct,
-- class_size_avg, stability_pct
from source
where school_urn ~ '^[0-9]+$'
and time_period ~ '^[0-9]+$'
)
select * from renamed
select
cast(trim(school_urn) as integer) as urn,
cast(trim(time_period) as integer) as year,
school_name,
phase_type_grouping,
-- Pupil counts
{{ safe_numeric('total_pupils') }}::integer as total_pupils,
{{ safe_numeric('female_pupils') }}::integer as female_pupils,
{{ safe_numeric('male_pupils') }}::integer as male_pupils,
{{ safe_numeric('full_time_pupils') }}::integer as full_time_pupils,
{{ safe_numeric('part_time_pupils') }}::integer as part_time_pupils,
{{ safe_numeric('total_boarders') }}::integer as total_boarders,
-- FSM
{{ safe_numeric('fsm_eligible_n') }}::integer as fsm_eligible_n,
{{ safe_numeric('fsm_pct') }} as fsm_pct,
-- EAL & young carers
{{ safe_numeric('eal_pct') }} as eal_pct,
{{ safe_numeric('young_carer_pct') }} as young_carer_pct,
-- Ethnicity
{{ safe_numeric('ethnicity_white_british_pct') }} as ethnicity_white_british_pct,
{{ safe_numeric('ethnicity_white_other_pct') }} as ethnicity_white_other_pct,
{{ safe_numeric('ethnicity_gypsy_roma_pct') }} as ethnicity_gypsy_roma_pct,
{{ safe_numeric('ethnicity_mixed_wbc_pct') }} as ethnicity_mixed_wbc_pct,
{{ safe_numeric('ethnicity_mixed_wba_pct') }} as ethnicity_mixed_wba_pct,
{{ safe_numeric('ethnicity_mixed_wa_pct') }} as ethnicity_mixed_wa_pct,
{{ safe_numeric('ethnicity_mixed_other_pct') }} as ethnicity_mixed_other_pct,
{{ safe_numeric('ethnicity_asian_indian_pct') }} as ethnicity_asian_indian_pct,
{{ safe_numeric('ethnicity_asian_pakistani_pct') }} as ethnicity_asian_pakistani_pct,
{{ safe_numeric('ethnicity_asian_bangladeshi_pct') }} as ethnicity_asian_bangladeshi_pct,
{{ safe_numeric('ethnicity_asian_other_pct') }} as ethnicity_asian_other_pct,
{{ safe_numeric('ethnicity_black_caribbean_pct') }} as ethnicity_black_caribbean_pct,
{{ safe_numeric('ethnicity_black_african_pct') }} as ethnicity_black_african_pct,
{{ safe_numeric('ethnicity_black_other_pct') }} as ethnicity_black_other_pct,
{{ safe_numeric('ethnicity_chinese_pct') }} as ethnicity_chinese_pct,
{{ safe_numeric('ethnicity_other_pct') }} as ethnicity_other_pct,
{{ safe_numeric('ethnicity_unclassified_pct') }} as ethnicity_unclassified_pct
from source