feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

Replaces the hand-rolled integrator with a production-grade ELT pipeline
using Meltano (Singer taps), dbt Core (medallion architecture), and
Apache Airflow (orchestration). Adds Typesense for search and PostGIS
for geospatial queries.

- 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI)
- dbt project: 12 staging, 5 intermediate, 12 mart models
- 3 Airflow DAGs (daily/monthly/annual schedules)
- Typesense sync + batch geocoding scripts
- docker-compose: add Airflow, Typesense; upgrade to PostGIS
- Portainer stack definition matching live deployment topology

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-26 08:37:53 +00:00
parent 8aca0a7a53
commit 8f02b5125e
65 changed files with 2822 additions and 72 deletions

View File

@@ -0,0 +1,110 @@
version: 2
models:
- name: dim_school
description: Canonical school dimension — one row per active URN
columns:
- name: urn
tests: [not_null, unique]
- name: school_name
tests: [not_null]
- name: phase
tests: [not_null]
- name: status
tests:
- accepted_values:
values: ["Open"]
- name: dim_location
description: School location dimension with PostGIS geometry
columns:
- name: urn
tests:
- not_null
- unique
- relationships:
to: ref('dim_school')
field: urn
- name: postcode
tests: [not_null]
- name: map_school_lineage
description: Predecessor/successor lineage map
columns:
- name: urn
tests:
- not_null
- relationships:
to: ref('dim_school')
field: urn
- name: fact_ks2_performance
description: KS2 attainment — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
tests:
- unique:
column_name: "urn || '-' || year"
- name: fact_ks4_performance
description: KS4 attainment — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
tests:
- unique:
column_name: "urn || '-' || year"
- name: fact_ofsted_inspection
description: Full Ofsted inspection history
columns:
- name: urn
tests: [not_null]
- name: inspection_date
tests: [not_null]
- name: fact_pupil_characteristics
description: Pupil demographics — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_admissions
description: School admissions — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_finance
description: School financial data — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_phonics
description: Phonics screening results — one row per URN per year
columns:
- name: urn
tests: [not_null]
- name: year
tests: [not_null]
- name: fact_parent_view
description: Parent View survey responses
columns:
- name: urn
tests: [not_null]
- name: fact_deprivation
description: IDACI deprivation index

View File

@@ -0,0 +1,19 @@
-- Mart: School location dimension — one row per URN, PostGIS-enabled
-- The geom column is populated by a post-hook or the geocode script.
select
s.urn,
s.address_line1,
s.address_line2,
s.town,
s.county,
s.postcode,
s.local_authority_code,
s.local_authority_name,
s.parliamentary_constituency,
s.urban_rural,
s.easting,
s.northing
from {{ ref('stg_gias_establishments') }} s
where s.status = 'Open'
and s.postcode is not null

View File

@@ -0,0 +1,40 @@
-- Mart: Canonical school dimension — one row per active URN
with schools as (
select * from {{ ref('stg_gias_establishments') }}
),
latest_ofsted as (
select * from {{ ref('int_ofsted_latest') }}
)
select
s.urn,
s.local_authority_code * 1000 + s.establishment_number as laestab,
s.school_name,
s.phase,
s.school_type,
s.academy_trust_name,
s.academy_trust_uid,
s.religious_character,
s.gender,
s.statutory_low_age || '-' || s.statutory_high_age as age_range,
s.capacity,
s.total_pupils,
concat_ws(' ', s.head_title, s.head_first_name, s.head_last_name) as headteacher_name,
s.website,
s.telephone,
s.open_date,
s.close_date,
s.status,
s.nursery_provision,
s.admissions_policy,
-- Latest Ofsted
o.overall_effectiveness as ofsted_grade,
o.inspection_date as ofsted_date,
o.framework as ofsted_framework
from schools s
left join latest_ofsted o on s.urn = o.urn
where s.status = 'Open'

View File

@@ -0,0 +1,10 @@
-- Mart: School admissions — one row per URN per year
select
urn,
year,
published_admission_number,
total_applications,
first_preference_offers_pct,
oversubscribed
from {{ ref('stg_ees_admissions') }}

View File

@@ -0,0 +1,22 @@
-- Mart: Deprivation index — one row per URN
-- Joins school postcode → LSOA → IDACI score
with school_postcodes as (
select
urn,
postcode
from {{ ref('stg_gias_establishments') }}
where status = 'Open'
and postcode is not null
)
-- Note: The join between postcode and LSOA requires a postcode-to-LSOA
-- lookup table. This will be populated by the geocode script or a seed.
-- For now, this model serves as a placeholder that will be completed
-- once the IDACI tap provides the postcode→LSOA mapping.
select
i.lsoa_code,
i.idaci_score,
i.idaci_decile
from {{ ref('stg_idaci') }} i

View File

@@ -0,0 +1,11 @@
-- Mart: School financial data — one row per URN per year
select
urn,
year,
per_pupil_spend,
staff_cost_pct,
teacher_cost_pct,
support_staff_cost_pct,
premises_cost_pct
from {{ ref('stg_fbit_finance') }}

View File

@@ -0,0 +1,22 @@
-- Mart: KS2 performance fact table — one row per URN per year
-- Includes predecessor data via lineage resolution
select
current_urn as urn,
source_urn,
year,
total_pupils,
rwm_expected_pct,
reading_expected_pct,
writing_expected_pct,
maths_expected_pct,
rwm_high_pct,
reading_high_pct,
writing_high_pct,
maths_high_pct,
reading_progress,
writing_progress,
maths_progress,
reading_avg_score,
maths_avg_score
from {{ ref('int_ks2_with_lineage') }}

View File

@@ -0,0 +1,16 @@
-- Mart: KS4 performance fact table — one row per URN per year
select
current_urn as urn,
source_urn,
year,
total_pupils,
progress_8_score,
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
english_maths_strong_pass_pct,
staying_in_education_pct
from {{ ref('int_ks4_with_lineage') }}

View File

@@ -0,0 +1,25 @@
-- Mart: Full Ofsted inspection history — one row per inspection
select
urn,
inspection_date,
inspection_type,
framework,
overall_effectiveness,
quality_of_education,
behaviour_attitudes,
personal_development,
leadership_management,
early_years_provision,
sixth_form_provision,
rc_safeguarding_met,
rc_inclusion,
rc_curriculum_teaching,
rc_achievement,
rc_attendance_behaviour,
rc_personal_development,
rc_leadership_governance,
rc_early_years,
rc_sixth_form,
report_url
from {{ ref('stg_ofsted_inspections') }}

View File

@@ -0,0 +1,15 @@
-- Mart: Parent View survey responses — one row per URN (latest survey)
select
urn,
survey_date,
total_responses,
q_happy_pct,
q_safe_pct,
q_progress_pct,
q_well_taught_pct,
q_well_led_pct,
q_behaviour_pct,
q_bullying_pct,
q_recommend_pct
from {{ ref('stg_parent_view') }}

View File

@@ -0,0 +1,8 @@
-- Mart: Phonics screening results — one row per URN per year
select
urn,
year,
year1_phonics_pct,
year2_phonics_pct
from {{ ref('stg_ees_phonics') }}

View File

@@ -0,0 +1,18 @@
-- Mart: Pupil characteristics — one row per URN per year
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
from {{ ref('int_pupil_chars_merged') }}

View File

@@ -0,0 +1,9 @@
-- Mart: School predecessor/successor lineage map
select
current_urn as urn,
predecessor_urn,
link_type,
link_date,
depth
from {{ ref('int_school_lineage') }}