feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

Replaces the hand-rolled integrator with a production-grade ELT pipeline
using Meltano (Singer taps), dbt Core (medallion architecture), and
Apache Airflow (orchestration). Adds Typesense for search and PostGIS
for geospatial queries.

- 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI)
- dbt project: 12 staging, 5 intermediate, 12 mart models
- 3 Airflow DAGs (daily/monthly/annual schedules)
- Typesense sync + batch geocoding scripts
- docker-compose: add Airflow, Typesense; upgrade to PostGIS
- Portainer stack definition matching live deployment topology

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-26 08:37:53 +00:00
parent 8aca0a7a53
commit 8f02b5125e
65 changed files with 2822 additions and 72 deletions

View File

@@ -0,0 +1,28 @@
name: school_compare
version: "1.0.0"
config-version: 2
profile: school_compare
model-paths: ["models"]
macro-paths: ["macros"]
test-paths: ["tests"]
seed-paths: ["seeds"]
target-path: "target"
clean-targets: ["target", "dbt_packages"]
models:
school_compare:
staging:
+materialized: view
+schema: staging
intermediate:
+materialized: view
+schema: intermediate
marts:
+materialized: table
+schema: marts
seeds:
school_compare:
+schema: seeds

View File

@@ -0,0 +1,36 @@
-- Macro: Generate a CTE that unions current and predecessor data for a given source
{% macro chain_lineage(source_ref, urn_col='urn', year_col='year') %}
with current_data as (
select
{{ urn_col }} as current_urn,
{{ urn_col }} as source_urn,
*
from {{ source_ref }}
),
predecessor_data as (
select
lin.current_urn,
src.{{ urn_col }} as source_urn,
src.*
from {{ source_ref }} src
inner join {{ ref('int_school_lineage') }} lin
on src.{{ urn_col }} = lin.predecessor_urn
where not exists (
select 1 from {{ source_ref }} curr
where curr.{{ urn_col }} = lin.current_urn
and curr.{{ year_col }} = src.{{ year_col }}
)
),
combined as (
select * from current_data
union all
select * from predecessor_data
)
select * from combined
{% endmacro %}

View File

@@ -0,0 +1,13 @@
-- Macro: Parse Ofsted grade from various text/numeric representations
{% macro parse_ofsted_grade(column) %}
case
when {{ column }}::text in ('1', 'Outstanding') then 1
when {{ column }}::text in ('2', 'Good') then 2
when {{ column }}::text in ('3', 'Requires improvement', 'Requires Improvement', 'Satisfactory') then 3
when {{ column }}::text in ('4', 'Inadequate') then 4
when {{ column }}::text in ('9', 'SWK', 'Serious Weaknesses') then 4
when {{ column }}::text in ('SM', 'Special Measures') then 4
else null
end
{% endmacro %}

View File

@@ -0,0 +1,8 @@
-- Macro: Validate that latitude/longitude fall within UK bounding box
{% macro validate_uk_coordinates(lat_col, lng_col) %}
(
{{ lat_col }} between 49.0 and 61.0
and {{ lng_col }} between -8.0 and 2.0
)
{% endmacro %}

View File

@@ -0,0 +1,62 @@
-- Intermediate model: KS2 data chained across academy conversions
-- Maps predecessor URN data to the current active URN
with current_ks2 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
rwm_expected_pct,
reading_expected_pct,
writing_expected_pct,
maths_expected_pct,
rwm_high_pct,
reading_high_pct,
writing_high_pct,
maths_high_pct,
reading_progress,
writing_progress,
maths_progress,
reading_avg_score,
maths_avg_score
from {{ ref('stg_ees_ks2') }}
),
predecessor_ks2 as (
select
lin.current_urn,
ks2.urn as source_urn,
ks2.year,
ks2.total_pupils,
ks2.rwm_expected_pct,
ks2.reading_expected_pct,
ks2.writing_expected_pct,
ks2.maths_expected_pct,
ks2.rwm_high_pct,
ks2.reading_high_pct,
ks2.writing_high_pct,
ks2.maths_high_pct,
ks2.reading_progress,
ks2.writing_progress,
ks2.maths_progress,
ks2.reading_avg_score,
ks2.maths_avg_score
from {{ ref('stg_ees_ks2') }} ks2
inner join {{ ref('int_school_lineage') }} lin
on ks2.urn = lin.predecessor_urn
-- Only include predecessor data for years before the current URN has data
where not exists (
select 1 from {{ ref('stg_ees_ks2') }} curr
where curr.urn = lin.current_urn
and curr.year = ks2.year
)
),
combined as (
select * from current_ks2
union all
select * from predecessor_ks2
)
select * from combined

View File

@@ -0,0 +1,50 @@
-- Intermediate model: KS4 data chained across academy conversions
with current_ks4 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
progress_8_score,
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
english_maths_strong_pass_pct,
staying_in_education_pct
from {{ ref('stg_ees_ks4') }}
),
predecessor_ks4 as (
select
lin.current_urn,
ks4.urn as source_urn,
ks4.year,
ks4.total_pupils,
ks4.progress_8_score,
ks4.attainment_8_score,
ks4.ebacc_entry_pct,
ks4.ebacc_achievement_pct,
ks4.english_strong_pass_pct,
ks4.maths_strong_pass_pct,
ks4.english_maths_strong_pass_pct,
ks4.staying_in_education_pct
from {{ ref('stg_ees_ks4') }} ks4
inner join {{ ref('int_school_lineage') }} lin
on ks4.urn = lin.predecessor_urn
where not exists (
select 1 from {{ ref('stg_ees_ks4') }} curr
where curr.urn = lin.current_urn
and curr.year = ks4.year
)
),
combined as (
select * from current_ks4
union all
select * from predecessor_ks4
)
select * from combined

View File

@@ -0,0 +1,37 @@
-- Intermediate model: Latest Ofsted inspection per URN
-- Picks the most recent inspection for each school
with ranked as (
select
*,
row_number() over (
partition by urn
order by inspection_date desc
) as rn
from {{ ref('stg_ofsted_inspections') }}
)
select
urn,
inspection_date,
inspection_type,
framework,
overall_effectiveness,
quality_of_education,
behaviour_attitudes,
personal_development,
leadership_management,
early_years_provision,
sixth_form_provision,
rc_safeguarding_met,
rc_inclusion,
rc_curriculum_teaching,
rc_achievement,
rc_attendance_behaviour,
rc_personal_development,
rc_leadership_governance,
rc_early_years,
rc_sixth_form,
report_url
from ranked
where rn = 1

View File

@@ -0,0 +1,18 @@
-- Intermediate model: Merged pupil characteristics from census data
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
from {{ ref('stg_ees_census') }}

View File

@@ -0,0 +1,48 @@
-- Intermediate model: Recursive predecessor mapping
-- Resolves academy conversion chains so historical data can be attributed
-- to the current (active) URN.
with recursive lineage as (
-- Base: schools that are predecessors (linked via academy conversion, amalgamation, etc.)
select
urn,
linked_urn as predecessor_urn,
link_type,
link_date,
1 as depth
from {{ ref('stg_gias_links') }}
where link_type in (
'Predecessor',
'Predecessor - Loss of academy converter',
'Predecessor - amalgamated',
'Predecessor - Fresh Start'
)
union all
-- Recursive step: follow the chain
select
l.urn,
links.linked_urn as predecessor_urn,
links.link_type,
links.link_date,
l.depth + 1
from lineage l
inner join {{ ref('stg_gias_links') }} links
on l.predecessor_urn = links.urn
where links.link_type in (
'Predecessor',
'Predecessor - Loss of academy converter',
'Predecessor - amalgamated',
'Predecessor - Fresh Start'
)
and l.depth < 5 -- safety limit
)
select
urn as current_urn,
predecessor_urn,
link_type,
link_date,
depth
from lineage

View File

@@ -0,0 +1,110 @@
version: 2
models:
- name: dim_school
description: Canonical school dimension — one row per active URN
columns:
- name: urn
tests: [not_null, unique]
- name: school_name
tests: [not_null]
- name: phase
tests: [not_null]
- name: status
tests:
- accepted_values:
values: ["Open"]
- name: dim_location
description: School location dimension with PostGIS geometry
columns:
- name: urn
tests:
- not_null
- unique
- relationships:
to: ref('dim_school')
field: urn
- name: postcode
tests: [not_null]
- name: map_school_lineage
description: Predecessor/successor lineage map
columns:
- name: urn
tests:
- not_null
- relationships:
to: ref('dim_school')
field: urn
- name: fact_ks2_performance
description: KS2 attainment — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
tests:
- unique:
column_name: "urn || '-' || year"
- name: fact_ks4_performance
description: KS4 attainment — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
tests:
- unique:
column_name: "urn || '-' || year"
- name: fact_ofsted_inspection
description: Full Ofsted inspection history
columns:
- name: urn
tests: [not_null]
- name: inspection_date
tests: [not_null]
- name: fact_pupil_characteristics
description: Pupil demographics — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_admissions
description: School admissions — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_finance
description: School financial data — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_phonics
description: Phonics screening results — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_parent_view
description: Parent View survey responses
columns:
- name: urn
tests: [not_null]
- name: fact_deprivation
description: IDACI deprivation index

View File

@@ -0,0 +1,19 @@
-- Mart: School location dimension — one row per URN, PostGIS-enabled
-- The geom column is populated by a post-hook or the geocode script.
select
s.urn,
s.address_line1,
s.address_line2,
s.town,
s.county,
s.postcode,
s.local_authority_code,
s.local_authority_name,
s.parliamentary_constituency,
s.urban_rural,
s.easting,
s.northing
from {{ ref('stg_gias_establishments') }} s
where s.status = 'Open'
and s.postcode is not null

View File

@@ -0,0 +1,40 @@
-- Mart: Canonical school dimension — one row per active URN
with schools as (
select * from {{ ref('stg_gias_establishments') }}
),
latest_ofsted as (
select * from {{ ref('int_ofsted_latest') }}
)
select
s.urn,
s.local_authority_code * 1000 + s.establishment_number as laestab,
s.school_name,
s.phase,
s.school_type,
s.academy_trust_name,
s.academy_trust_uid,
s.religious_character,
s.gender,
s.statutory_low_age || '-' || s.statutory_high_age as age_range,
s.capacity,
s.total_pupils,
concat_ws(' ', s.head_title, s.head_first_name, s.head_last_name) as headteacher_name,
s.website,
s.telephone,
s.open_date,
s.close_date,
s.status,
s.nursery_provision,
s.admissions_policy,
-- Latest Ofsted
o.overall_effectiveness as ofsted_grade,
o.inspection_date as ofsted_date,
o.framework as ofsted_framework
from schools s
left join latest_ofsted o on s.urn = o.urn
where s.status = 'Open'

View File

@@ -0,0 +1,10 @@
-- Mart: School admissions — one row per URN per year
select
urn,
year,
published_admission_number,
total_applications,
first_preference_offers_pct,
oversubscribed
from {{ ref('stg_ees_admissions') }}

View File

@@ -0,0 +1,22 @@
-- Mart: Deprivation index — one row per URN
-- Joins school postcode → LSOA → IDACI score
with school_postcodes as (
select
urn,
postcode
from {{ ref('stg_gias_establishments') }}
where status = 'Open'
and postcode is not null
)
-- Note: The join between postcode and LSOA requires a postcode-to-LSOA
-- lookup table. This will be populated by the geocode script or a seed.
-- For now, this model serves as a placeholder that will be completed
-- once the IDACI tap provides the postcode→LSOA mapping.
select
i.lsoa_code,
i.idaci_score,
i.idaci_decile
from {{ ref('stg_idaci') }} i

View File

@@ -0,0 +1,11 @@
-- Mart: School financial data — one row per URN per year
select
urn,
year,
per_pupil_spend,
staff_cost_pct,
teacher_cost_pct,
support_staff_cost_pct,
premises_cost_pct
from {{ ref('stg_fbit_finance') }}

View File

@@ -0,0 +1,22 @@
-- Mart: KS2 performance fact table — one row per URN per year
-- Includes predecessor data via lineage resolution
select
current_urn as urn,
source_urn,
year,
total_pupils,
rwm_expected_pct,
reading_expected_pct,
writing_expected_pct,
maths_expected_pct,
rwm_high_pct,
reading_high_pct,
writing_high_pct,
maths_high_pct,
reading_progress,
writing_progress,
maths_progress,
reading_avg_score,
maths_avg_score
from {{ ref('int_ks2_with_lineage') }}

View File

@@ -0,0 +1,16 @@
-- Mart: KS4 performance fact table — one row per URN per year
select
current_urn as urn,
source_urn,
year,
total_pupils,
progress_8_score,
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
english_maths_strong_pass_pct,
staying_in_education_pct
from {{ ref('int_ks4_with_lineage') }}

View File

@@ -0,0 +1,25 @@
-- Mart: Full Ofsted inspection history — one row per inspection
select
urn,
inspection_date,
inspection_type,
framework,
overall_effectiveness,
quality_of_education,
behaviour_attitudes,
personal_development,
leadership_management,
early_years_provision,
sixth_form_provision,
rc_safeguarding_met,
rc_inclusion,
rc_curriculum_teaching,
rc_achievement,
rc_attendance_behaviour,
rc_personal_development,
rc_leadership_governance,
rc_early_years,
rc_sixth_form,
report_url
from {{ ref('stg_ofsted_inspections') }}

View File

@@ -0,0 +1,15 @@
-- Mart: Parent View survey responses — one row per URN (latest survey)
select
urn,
survey_date,
total_responses,
q_happy_pct,
q_safe_pct,
q_progress_pct,
q_well_taught_pct,
q_well_led_pct,
q_behaviour_pct,
q_bullying_pct,
q_recommend_pct
from {{ ref('stg_parent_view') }}

View File

@@ -0,0 +1,8 @@
-- Mart: Phonics screening results — one row per URN per year
select
urn,
year,
year1_phonics_pct,
year2_phonics_pct
from {{ ref('stg_ees_phonics') }}

View File

@@ -0,0 +1,18 @@
-- Mart: Pupil characteristics — one row per URN per year
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
from {{ ref('int_pupil_chars_merged') }}

View File

@@ -0,0 +1,9 @@
-- Mart: School predecessor/successor lineage map
select
current_urn as urn,
predecessor_urn,
link_type,
link_date,
depth
from {{ ref('int_school_lineage') }}

View File

@@ -0,0 +1,69 @@
version: 2
sources:
- name: raw
description: Raw data loaded by Meltano Singer taps into the raw schema
schema: raw
tables:
- name: gias_establishments
description: GIAS bulk establishment data (one row per URN)
columns:
- name: urn
tests: [not_null, unique]
- name: gias_links
description: GIAS predecessor/successor links between schools
columns:
- name: urn
tests: [not_null]
- name: ofsted_inspections
description: Ofsted Management Information inspection records
columns:
- name: urn
tests: [not_null]
- name: ees_ks2
description: KS2 attainment data from Explore Education Statistics
columns:
- name: urn
tests: [not_null]
- name: ees_ks4
description: KS4 attainment data from Explore Education Statistics
columns:
- name: urn
tests: [not_null]
- name: ees_census
description: School census pupil characteristics
columns:
- name: urn
tests: [not_null]
- name: ees_admissions
description: Primary and secondary school admissions data
columns:
- name: urn
tests: [not_null]
- name: ees_phonics
description: Phonics screening check results
columns:
- name: urn
tests: [not_null]
- name: parent_view
description: Ofsted Parent View survey responses
columns:
- name: urn
tests: [not_null]
- name: fbit_finance
description: Financial benchmarking data from FBIT API
columns:
- name: urn
tests: [not_null]
- name: idaci
description: Income Deprivation Affecting Children Index lookups

View File

@@ -0,0 +1,19 @@
-- Staging model: Primary and secondary school admissions from EES
with source as (
select * from {{ source('raw', 'ees_admissions') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(published_admission_number as integer) as published_admission_number,
cast(total_applications as integer) as total_applications,
cast(first_preference_offers_pct as numeric) as first_preference_offers_pct,
cast(oversubscribed as boolean) as oversubscribed
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,27 @@
-- Staging model: School census pupil characteristics from EES
with source as (
select * from {{ source('raw', 'ees_census') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(fsm_pct as numeric) as fsm_pct,
cast(sen_support_pct as numeric) as sen_support_pct,
cast(sen_ehcp_pct as numeric) as sen_ehcp_pct,
cast(eal_pct as numeric) as eal_pct,
cast(disadvantaged_pct as numeric) as disadvantaged_pct,
cast(ethnicity_white_pct as numeric) as ethnicity_white_pct,
cast(ethnicity_asian_pct as numeric) as ethnicity_asian_pct,
cast(ethnicity_black_pct as numeric) as ethnicity_black_pct,
cast(ethnicity_mixed_pct as numeric) as ethnicity_mixed_pct,
cast(ethnicity_other_pct as numeric) as ethnicity_other_pct,
cast(class_size_avg as numeric) as class_size_avg,
cast(stability_pct as numeric) as stability_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,31 @@
-- Staging model: KS2 attainment data from EES
-- Column names depend on the EES dataset schema; these will be finalised
-- once the tap-uk-ees extractor resolves the actual column names.
with source as (
select * from {{ source('raw', 'ees_ks2') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(t_pupils as integer) as total_pupils,
cast(pt_rwm_met_expected_standard as numeric) as rwm_expected_pct,
cast(pt_read_met_expected_standard as numeric) as reading_expected_pct,
cast(pt_write_met_expected_standard as numeric) as writing_expected_pct,
cast(pt_maths_met_expected_standard as numeric) as maths_expected_pct,
cast(pt_rwm_met_higher_standard as numeric) as rwm_high_pct,
cast(pt_read_met_higher_standard as numeric) as reading_high_pct,
cast(pt_write_met_higher_standard as numeric) as writing_high_pct,
cast(pt_maths_met_higher_standard as numeric) as maths_high_pct,
cast(read_progress as numeric) as reading_progress,
cast(write_progress as numeric) as writing_progress,
cast(maths_progress as numeric) as maths_progress,
cast(read_average_score as numeric) as reading_avg_score,
cast(maths_average_score as numeric) as maths_avg_score
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,24 @@
-- Staging model: KS4 attainment data from EES (secondary schools — NEW)
with source as (
select * from {{ source('raw', 'ees_ks4') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(t_pupils as integer) as total_pupils,
cast(progress_8_score as numeric) as progress_8_score,
cast(attainment_8_score as numeric) as attainment_8_score,
cast(ebacc_entry_pct as numeric) as ebacc_entry_pct,
cast(ebacc_achievement_pct as numeric) as ebacc_achievement_pct,
cast(english_strong_pass_pct as numeric) as english_strong_pass_pct,
cast(maths_strong_pass_pct as numeric) as maths_strong_pass_pct,
cast(english_maths_strong_pass_pct as numeric) as english_maths_strong_pass_pct,
cast(staying_in_education_pct as numeric) as staying_in_education_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,17 @@
-- Staging model: Phonics screening check results from EES
with source as (
select * from {{ source('raw', 'ees_phonics') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(time_period as integer) as year,
cast(year1_phonics_pct as numeric) as year1_phonics_pct,
cast(year2_phonics_pct as numeric) as year2_phonics_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,20 @@
-- Staging model: Financial benchmarking data from FBIT API
with source as (
select * from {{ source('raw', 'fbit_finance') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(year as integer) as year,
cast(per_pupil_spend as numeric) as per_pupil_spend,
cast(staff_cost_pct as numeric) as staff_cost_pct,
cast(teacher_cost_pct as numeric) as teacher_cost_pct,
cast(support_staff_cost_pct as numeric) as support_staff_cost_pct,
cast(premises_cost_pct as numeric) as premises_cost_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,49 @@
-- Staging model: GIAS establishments
-- Light cleaning, type casting, column renaming from raw GIAS bulk CSV
with source as (
select * from {{ source('raw', 'gias_establishments') }}
),
renamed as (
select
cast("URN" as integer) as urn,
cast("LA (code)" as integer) as local_authority_code,
"LA (name)" as local_authority_name,
cast("EstablishmentNumber" as integer) as establishment_number,
"EstablishmentName" as school_name,
"TypeOfEstablishment (name)" as school_type,
"PhaseOfEducation (name)" as phase,
"Gender (name)" as gender,
"ReligiousCharacter (name)" as religious_character,
"AdmissionsPolicy (name)" as admissions_policy,
"SchoolCapacity" as capacity,
cast("NumberOfPupils" as integer) as total_pupils,
"HeadTitle (name)" as head_title,
"HeadFirstName" as head_first_name,
"HeadLastName" as head_last_name,
"TelephoneNum" as telephone,
"SchoolWebsite" as website,
"Street" as address_line1,
"Locality" as address_line2,
"Town" as town,
"County (name)" as county,
"Postcode" as postcode,
"EstablishmentStatus (name)" as status,
cast("OpenDate" as date) as open_date,
cast("CloseDate" as date) as close_date,
"Trusts (name)" as academy_trust_name,
cast("Trusts (code)" as integer) as academy_trust_uid,
"UrbanRural (name)" as urban_rural,
"ParliamentaryConstituency (name)" as parliamentary_constituency,
"NurseryProvision (name)" as nursery_provision,
cast("Easting" as integer) as easting,
cast("Northing" as integer) as northing,
-- Age range
cast("StatutoryLowAge" as integer) as statutory_low_age,
cast("StatutoryHighAge" as integer) as statutory_high_age
from source
where "URN" is not null
)
select * from renamed

View File

@@ -0,0 +1,18 @@
-- Staging model: GIAS school links (predecessor/successor chains)
with source as (
select * from {{ source('raw', 'gias_links') }}
),
renamed as (
select
cast("URN" as integer) as urn,
cast("LinkURN" as integer) as linked_urn,
"LinkType" as link_type,
cast("LinkEstablishedDate" as date) as link_date
from source
where "URN" is not null
and "LinkURN" is not null
)
select * from renamed

View File

@@ -0,0 +1,15 @@
-- Staging model: Income Deprivation Affecting Children Index
with source as (
select * from {{ source('raw', 'idaci') }}
),
renamed as (
select
lsoa_code,
cast(idaci_score as numeric) as idaci_score,
cast(idaci_decile as integer) as idaci_decile
from source
)
select * from renamed

View File

@@ -0,0 +1,40 @@
-- Staging model: Ofsted inspection records
-- Handles both OEIF (pre-Nov 2025) and Report Card (post-Nov 2025) frameworks
with source as (
select * from {{ source('raw', 'ofsted_inspections') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(inspection_date as date) as inspection_date,
inspection_type,
event_type_grouping as framework,
-- OEIF grades (1-4 scale)
cast(overall_effectiveness as integer) as overall_effectiveness,
cast(quality_of_education as integer) as quality_of_education,
cast(behaviour_and_attitudes as integer) as behaviour_attitudes,
cast(personal_development as integer) as personal_development,
cast(effectiveness_of_leadership_and_management as integer) as leadership_management,
cast(early_years_provision as integer) as early_years_provision,
cast(sixth_form_provision as integer) as sixth_form_provision,
-- Report Card fields (populated for post-Nov 2025 inspections)
rc_safeguarding_met,
rc_inclusion,
rc_curriculum_teaching,
rc_achievement,
rc_attendance_behaviour,
rc_personal_development,
rc_leadership_governance,
rc_early_years,
rc_sixth_form,
report_url
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,24 @@
-- Staging model: Ofsted Parent View survey responses
with source as (
select * from {{ source('raw', 'parent_view') }}
),
renamed as (
select
cast(urn as integer) as urn,
cast(survey_date as date) as survey_date,
cast(total_responses as integer) as total_responses,
cast(q_happy_pct as numeric) as q_happy_pct,
cast(q_safe_pct as numeric) as q_safe_pct,
cast(q_progress_pct as numeric) as q_progress_pct,
cast(q_well_taught_pct as numeric) as q_well_taught_pct,
cast(q_well_led_pct as numeric) as q_well_led_pct,
cast(q_behaviour_pct as numeric) as q_behaviour_pct,
cast(q_bullying_pct as numeric) as q_bullying_pct,
cast(q_recommend_pct as numeric) as q_recommend_pct
from source
where urn is not null
)
select * from renamed

View File

@@ -0,0 +1,22 @@
school_compare:
target: dev
outputs:
dev:
type: postgres
host: "{{ env_var('PG_HOST', 'localhost') }}"
port: "{{ env_var('PG_PORT', '5432') | int }}"
user: "{{ env_var('PG_USER', 'postgres') }}"
password: "{{ env_var('PG_PASSWORD', 'postgres') }}"
dbname: "{{ env_var('PG_DATABASE', 'school_compare') }}"
schema: public
threads: 4
production:
type: postgres
host: "{{ env_var('PG_HOST') }}"
port: "{{ env_var('PG_PORT') | int }}"
user: "{{ env_var('PG_USER') }}"
password: "{{ env_var('PG_PASSWORD') }}"
dbname: "{{ env_var('PG_DATABASE') }}"
schema: public
threads: 4

View File

@@ -0,0 +1,66 @@
la_code,la_name
201,City of London
202,Camden
203,Greenwich
204,Hackney
205,Hammersmith and Fulham
206,Islington
207,Kensington and Chelsea
208,Lambeth
209,Lewisham
210,Merton
211,Newham
212,Tower Hamlets
213,Wandsworth
214,Westminster
301,Barking and Dagenham
302,Barnet
303,Bexley
304,Brent
305,Bromley
306,Croydon
307,Ealing
308,Enfield
309,Haringey
310,Harrow
311,Havering
312,Hillingdon
313,Hounslow
314,Kingston upon Thames
315,Redbridge
316,Richmond upon Thames
317,Sutton
318,Waltham Forest
319,City of London
320,City of London
330,Birmingham
331,Coventry
332,Dudley
333,Sandwell
334,Solihull
335,Walsall
336,Wolverhampton
340,Knowsley
341,Liverpool
342,St Helens
343,Sefton
344,Wirral
350,Bolton
351,Bury
352,Manchester
353,Oldham
354,Rochdale
355,Salford
356,Stockport
357,Tameside
358,Trafford
359,Wigan
370,Barnsley
371,Doncaster
372,Rotherham
373,Sheffield
380,Bradford
381,Calderdale
382,Kirklees
383,Leeds
384,Wakefield
1 la_code la_name
2 201 City of London
3 202 Camden
4 203 Greenwich
5 204 Hackney
6 205 Hammersmith and Fulham
7 206 Islington
8 207 Kensington and Chelsea
9 208 Lambeth
10 209 Lewisham
11 210 Merton
12 211 Newham
13 212 Tower Hamlets
14 213 Wandsworth
15 214 Westminster
16 301 Barking and Dagenham
17 302 Barnet
18 303 Bexley
19 304 Brent
20 305 Bromley
21 306 Croydon
22 307 Ealing
23 308 Enfield
24 309 Haringey
25 310 Harrow
26 311 Havering
27 312 Hillingdon
28 313 Hounslow
29 314 Kingston upon Thames
30 315 Redbridge
31 316 Richmond upon Thames
32 317 Sutton
33 318 Waltham Forest
34 319 City of London
35 320 City of London
36 330 Birmingham
37 331 Coventry
38 332 Dudley
39 333 Sandwell
40 334 Solihull
41 335 Walsall
42 336 Wolverhampton
43 340 Knowsley
44 341 Liverpool
45 342 St Helens
46 343 Sefton
47 344 Wirral
48 350 Bolton
49 351 Bury
50 352 Manchester
51 353 Oldham
52 354 Rochdale
53 355 Salford
54 356 Stockport
55 357 Tameside
56 358 Trafford
57 359 Wigan
58 370 Barnsley
59 371 Doncaster
60 372 Rotherham
61 373 Sheffield
62 380 Bradford
63 381 Calderdale
64 382 Kirklees
65 383 Leeds
66 384 Wakefield

View File

@@ -0,0 +1,30 @@
type_code,type_name,type_group
1,Community school,Maintained
2,Voluntary aided school,Maintained
3,Voluntary controlled school,Maintained
5,Foundation school,Maintained
6,City technology college,Independent
7,Community special school,Special
8,Non-maintained special school,Special
10,Other independent school,Independent
11,Other independent special school,Independent
12,Foundation special school,Special
14,Pupil referral unit,PRU
24,Secure unit,Other
25,Offshore school,Other
26,Service children's education,Other
28,Academy sponsor led,Academy
33,Academy special sponsor led,Academy
34,Academy converter,Academy
35,Free schools,Academy
36,Free schools special,Academy
37,British schools overseas,Other
38,Free schools - alternative provision,Academy
39,Free schools - 16-19,Academy
40,University technical college,Academy
41,Studio school,Academy
42,Academy alternative provision converter,Academy
43,Academy alternative provision sponsor led,Academy
44,Academy special converter,Academy
46,Academy 16-19 converter,Academy
47,Academy 16-19 sponsor led,Academy
1 type_code type_name type_group
2 1 Community school Maintained
3 2 Voluntary aided school Maintained
4 3 Voluntary controlled school Maintained
5 5 Foundation school Maintained
6 6 City technology college Independent
7 7 Community special school Special
8 8 Non-maintained special school Special
9 10 Other independent school Independent
10 11 Other independent special school Independent
11 12 Foundation special school Special
12 14 Pupil referral unit PRU
13 24 Secure unit Other
14 25 Offshore school Other
15 26 Service children's education Other
16 28 Academy sponsor led Academy
17 33 Academy special sponsor led Academy
18 34 Academy converter Academy
19 35 Free schools Academy
20 36 Free schools special Academy
21 37 British schools overseas Other
22 38 Free schools - alternative provision Academy
23 39 Free schools - 16-19 Academy
24 40 University technical college Academy
25 41 Studio school Academy
26 42 Academy alternative provision converter Academy
27 43 Academy alternative provision sponsor led Academy
28 44 Academy special converter Academy
29 46 Academy 16-19 converter Academy
30 47 Academy 16-19 sponsor led Academy

View File

@@ -0,0 +1,7 @@
-- Custom test: All fact table URNs should exist in dim_school
select f.urn
from {{ ref('fact_ks2_performance') }} f
left join {{ ref('dim_school') }} d on f.urn = d.urn
where d.urn is null
limit 10

View File

@@ -0,0 +1,13 @@
-- Custom test: All geocoded schools should have coordinates within the UK
select
urn,
easting,
northing
from {{ ref('dim_location') }}
where easting is not null
and northing is not null
and (
easting < 0 or easting > 700000
or northing < 0 or northing > 1300000
)