feat(census): add demographic columns to EES census tap and staging models
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
tap-uk-ees: EESCensusStream now declares 27 data columns (FSM %, EAL %, ethnicity breakdowns, pupil counts) with clean Singer field names mapped from the verbose CSV column names (e.g. '% of pupils known to be eligible for free school meals' → fsm_pct) via a new _column_renames mechanism on the base stream class. stg_ees_census: materialised as table, applies safe_numeric to all percentage/count columns, filters to numeric URNs. int_pupil_chars_merged + fact_pupil_characteristics: pass all columns through from staging (previously stubs with only 3 columns). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,34 @@
|
||||
-- Intermediate model: Merged pupil characteristics from census data
|
||||
-- TODO: Expand once census data columns are verified and added to stg_ees_census
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
phase_type_grouping
|
||||
phase_type_grouping,
|
||||
total_pupils,
|
||||
female_pupils,
|
||||
male_pupils,
|
||||
full_time_pupils,
|
||||
part_time_pupils,
|
||||
total_boarders,
|
||||
fsm_eligible_n,
|
||||
fsm_pct,
|
||||
eal_pct,
|
||||
young_carer_pct,
|
||||
ethnicity_white_british_pct,
|
||||
ethnicity_white_other_pct,
|
||||
ethnicity_gypsy_roma_pct,
|
||||
ethnicity_mixed_wbc_pct,
|
||||
ethnicity_mixed_wba_pct,
|
||||
ethnicity_mixed_wa_pct,
|
||||
ethnicity_mixed_other_pct,
|
||||
ethnicity_asian_indian_pct,
|
||||
ethnicity_asian_pakistani_pct,
|
||||
ethnicity_asian_bangladeshi_pct,
|
||||
ethnicity_asian_other_pct,
|
||||
ethnicity_black_caribbean_pct,
|
||||
ethnicity_black_african_pct,
|
||||
ethnicity_black_other_pct,
|
||||
ethnicity_chinese_pct,
|
||||
ethnicity_other_pct,
|
||||
ethnicity_unclassified_pct
|
||||
from {{ ref('stg_ees_census') }}
|
||||
|
||||
@@ -1,8 +1,34 @@
|
||||
-- Mart: Pupil characteristics — one row per URN per year
|
||||
-- TODO: Expand once census data columns are verified and added to staging
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
phase_type_grouping
|
||||
phase_type_grouping,
|
||||
total_pupils,
|
||||
female_pupils,
|
||||
male_pupils,
|
||||
full_time_pupils,
|
||||
part_time_pupils,
|
||||
total_boarders,
|
||||
fsm_eligible_n,
|
||||
fsm_pct,
|
||||
eal_pct,
|
||||
young_carer_pct,
|
||||
ethnicity_white_british_pct,
|
||||
ethnicity_white_other_pct,
|
||||
ethnicity_gypsy_roma_pct,
|
||||
ethnicity_mixed_wbc_pct,
|
||||
ethnicity_mixed_wba_pct,
|
||||
ethnicity_mixed_wa_pct,
|
||||
ethnicity_mixed_other_pct,
|
||||
ethnicity_asian_indian_pct,
|
||||
ethnicity_asian_pakistani_pct,
|
||||
ethnicity_asian_bangladeshi_pct,
|
||||
ethnicity_asian_other_pct,
|
||||
ethnicity_black_caribbean_pct,
|
||||
ethnicity_black_african_pct,
|
||||
ethnicity_black_other_pct,
|
||||
ethnicity_chinese_pct,
|
||||
ethnicity_other_pct,
|
||||
ethnicity_unclassified_pct
|
||||
from {{ ref('int_pupil_chars_merged') }}
|
||||
|
||||
@@ -1,30 +1,55 @@
|
||||
-- Staging model: School census pupil characteristics from EES
|
||||
-- File: spc_school_level_underlying_data_YYYY.csv (269 cols, in supporting-files/)
|
||||
-- Uses 'urn' column (not school_urn). Tap normalises to school_urn.
|
||||
--
|
||||
-- TODO: The CSV has 269 columns but only metadata columns have been verified.
|
||||
-- Data columns (ethnicity %, FSM %, SEN %, class sizes) need to be discovered
|
||||
-- by inspecting the CSV on the Airflow container. The column references below
|
||||
-- are placeholders and will fail until the tap schema and this model are updated
|
||||
-- with the actual column names.
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
-- Staging model: School Pupils and Characteristics (census)
|
||||
-- One row per school per year. CSV has 269 columns; we extract the
|
||||
-- aggregate demographic summaries only (not per-age/year-group breakdowns).
|
||||
-- Column renames happen in the tap before Singer persists to raw.ees_census.
|
||||
|
||||
with source as (
|
||||
select * from {{ source('raw', 'ees_census') }}
|
||||
where school_urn is not null
|
||||
),
|
||||
|
||||
renamed as (
|
||||
select
|
||||
cast(school_urn as integer) as urn,
|
||||
cast(time_period as integer) as year,
|
||||
school_name,
|
||||
phase_type_grouping
|
||||
-- TODO: Add census data columns once verified:
|
||||
-- fsm_pct, sen_support_pct, sen_ehcp_pct, eal_pct,
|
||||
-- disadvantaged_pct, ethnicity_white_pct, ethnicity_asian_pct,
|
||||
-- ethnicity_black_pct, ethnicity_mixed_pct, ethnicity_other_pct,
|
||||
-- class_size_avg, stability_pct
|
||||
from source
|
||||
where school_urn ~ '^[0-9]+$'
|
||||
and time_period ~ '^[0-9]+$'
|
||||
)
|
||||
|
||||
select * from renamed
|
||||
select
|
||||
cast(trim(school_urn) as integer) as urn,
|
||||
cast(trim(time_period) as integer) as year,
|
||||
school_name,
|
||||
phase_type_grouping,
|
||||
|
||||
-- Pupil counts
|
||||
{{ safe_numeric('total_pupils') }}::integer as total_pupils,
|
||||
{{ safe_numeric('female_pupils') }}::integer as female_pupils,
|
||||
{{ safe_numeric('male_pupils') }}::integer as male_pupils,
|
||||
{{ safe_numeric('full_time_pupils') }}::integer as full_time_pupils,
|
||||
{{ safe_numeric('part_time_pupils') }}::integer as part_time_pupils,
|
||||
{{ safe_numeric('total_boarders') }}::integer as total_boarders,
|
||||
|
||||
-- FSM
|
||||
{{ safe_numeric('fsm_eligible_n') }}::integer as fsm_eligible_n,
|
||||
{{ safe_numeric('fsm_pct') }} as fsm_pct,
|
||||
|
||||
-- EAL & young carers
|
||||
{{ safe_numeric('eal_pct') }} as eal_pct,
|
||||
{{ safe_numeric('young_carer_pct') }} as young_carer_pct,
|
||||
|
||||
-- Ethnicity
|
||||
{{ safe_numeric('ethnicity_white_british_pct') }} as ethnicity_white_british_pct,
|
||||
{{ safe_numeric('ethnicity_white_other_pct') }} as ethnicity_white_other_pct,
|
||||
{{ safe_numeric('ethnicity_gypsy_roma_pct') }} as ethnicity_gypsy_roma_pct,
|
||||
{{ safe_numeric('ethnicity_mixed_wbc_pct') }} as ethnicity_mixed_wbc_pct,
|
||||
{{ safe_numeric('ethnicity_mixed_wba_pct') }} as ethnicity_mixed_wba_pct,
|
||||
{{ safe_numeric('ethnicity_mixed_wa_pct') }} as ethnicity_mixed_wa_pct,
|
||||
{{ safe_numeric('ethnicity_mixed_other_pct') }} as ethnicity_mixed_other_pct,
|
||||
{{ safe_numeric('ethnicity_asian_indian_pct') }} as ethnicity_asian_indian_pct,
|
||||
{{ safe_numeric('ethnicity_asian_pakistani_pct') }} as ethnicity_asian_pakistani_pct,
|
||||
{{ safe_numeric('ethnicity_asian_bangladeshi_pct') }} as ethnicity_asian_bangladeshi_pct,
|
||||
{{ safe_numeric('ethnicity_asian_other_pct') }} as ethnicity_asian_other_pct,
|
||||
{{ safe_numeric('ethnicity_black_caribbean_pct') }} as ethnicity_black_caribbean_pct,
|
||||
{{ safe_numeric('ethnicity_black_african_pct') }} as ethnicity_black_african_pct,
|
||||
{{ safe_numeric('ethnicity_black_other_pct') }} as ethnicity_black_other_pct,
|
||||
{{ safe_numeric('ethnicity_chinese_pct') }} as ethnicity_chinese_pct,
|
||||
{{ safe_numeric('ethnicity_other_pct') }} as ethnicity_other_pct,
|
||||
{{ safe_numeric('ethnicity_unclassified_pct') }} as ethnicity_unclassified_pct
|
||||
|
||||
from source
|
||||
|
||||
Reference in New Issue
Block a user