feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

Replaces the hand-rolled integrator with a production-grade ELT pipeline
using Meltano (Singer taps), dbt Core (medallion architecture), and
Apache Airflow (orchestration). Adds Typesense for search and PostGIS
for geospatial queries.

- 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI)
- dbt project: 12 staging, 5 intermediate, 12 mart models
- 3 Airflow DAGs (daily/monthly/annual schedules)
- Typesense sync + batch geocoding scripts
- docker-compose: add Airflow, Typesense; upgrade to PostGIS
- Portainer stack definition matching live deployment topology

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-26 08:37:53 +00:00
parent 8aca0a7a53
commit 8f02b5125e
65 changed files with 2822 additions and 72 deletions

View File

@@ -0,0 +1,62 @@
-- Intermediate model: KS2 data chained across academy conversions
-- Maps predecessor URN data to the current active URN
with current_ks2 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
rwm_expected_pct,
reading_expected_pct,
writing_expected_pct,
maths_expected_pct,
rwm_high_pct,
reading_high_pct,
writing_high_pct,
maths_high_pct,
reading_progress,
writing_progress,
maths_progress,
reading_avg_score,
maths_avg_score
from {{ ref('stg_ees_ks2') }}
),
predecessor_ks2 as (
select
lin.current_urn,
ks2.urn as source_urn,
ks2.year,
ks2.total_pupils,
ks2.rwm_expected_pct,
ks2.reading_expected_pct,
ks2.writing_expected_pct,
ks2.maths_expected_pct,
ks2.rwm_high_pct,
ks2.reading_high_pct,
ks2.writing_high_pct,
ks2.maths_high_pct,
ks2.reading_progress,
ks2.writing_progress,
ks2.maths_progress,
ks2.reading_avg_score,
ks2.maths_avg_score
from {{ ref('stg_ees_ks2') }} ks2
inner join {{ ref('int_school_lineage') }} lin
on ks2.urn = lin.predecessor_urn
-- Only include predecessor data for years before the current URN has data
where not exists (
select 1 from {{ ref('stg_ees_ks2') }} curr
where curr.urn = lin.current_urn
and curr.year = ks2.year
)
),
combined as (
select * from current_ks2
union all
select * from predecessor_ks2
)
select * from combined

View File

@@ -0,0 +1,50 @@
-- Intermediate model: KS4 data chained across academy conversions
with current_ks4 as (
select
urn as current_urn,
urn as source_urn,
year,
total_pupils,
progress_8_score,
attainment_8_score,
ebacc_entry_pct,
ebacc_achievement_pct,
english_strong_pass_pct,
maths_strong_pass_pct,
english_maths_strong_pass_pct,
staying_in_education_pct
from {{ ref('stg_ees_ks4') }}
),
predecessor_ks4 as (
select
lin.current_urn,
ks4.urn as source_urn,
ks4.year,
ks4.total_pupils,
ks4.progress_8_score,
ks4.attainment_8_score,
ks4.ebacc_entry_pct,
ks4.ebacc_achievement_pct,
ks4.english_strong_pass_pct,
ks4.maths_strong_pass_pct,
ks4.english_maths_strong_pass_pct,
ks4.staying_in_education_pct
from {{ ref('stg_ees_ks4') }} ks4
inner join {{ ref('int_school_lineage') }} lin
on ks4.urn = lin.predecessor_urn
where not exists (
select 1 from {{ ref('stg_ees_ks4') }} curr
where curr.urn = lin.current_urn
and curr.year = ks4.year
)
),
combined as (
select * from current_ks4
union all
select * from predecessor_ks4
)
select * from combined

View File

@@ -0,0 +1,37 @@
-- Intermediate model: Latest Ofsted inspection per URN
-- Picks the most recent inspection for each school
with ranked as (
select
*,
row_number() over (
partition by urn
order by inspection_date desc
) as rn
from {{ ref('stg_ofsted_inspections') }}
)
select
urn,
inspection_date,
inspection_type,
framework,
overall_effectiveness,
quality_of_education,
behaviour_attitudes,
personal_development,
leadership_management,
early_years_provision,
sixth_form_provision,
rc_safeguarding_met,
rc_inclusion,
rc_curriculum_teaching,
rc_achievement,
rc_attendance_behaviour,
rc_personal_development,
rc_leadership_governance,
rc_early_years,
rc_sixth_form,
report_url
from ranked
where rn = 1

View File

@@ -0,0 +1,18 @@
-- Intermediate model: Merged pupil characteristics from census data
select
urn,
year,
fsm_pct,
sen_support_pct,
sen_ehcp_pct,
eal_pct,
disadvantaged_pct,
ethnicity_white_pct,
ethnicity_asian_pct,
ethnicity_black_pct,
ethnicity_mixed_pct,
ethnicity_other_pct,
class_size_avg,
stability_pct
from {{ ref('stg_ees_census') }}

View File

@@ -0,0 +1,48 @@
-- Intermediate model: Recursive predecessor mapping
-- Resolves academy conversion chains so historical data can be attributed
-- to the current (active) URN.
with recursive lineage as (
-- Base: schools that are predecessors (linked via academy conversion, amalgamation, etc.)
select
urn,
linked_urn as predecessor_urn,
link_type,
link_date,
1 as depth
from {{ ref('stg_gias_links') }}
where link_type in (
'Predecessor',
'Predecessor - Loss of academy converter',
'Predecessor - amalgamated',
'Predecessor - Fresh Start'
)
union all
-- Recursive step: follow the chain
select
l.urn,
links.linked_urn as predecessor_urn,
links.link_type,
links.link_date,
l.depth + 1
from lineage l
inner join {{ ref('stg_gias_links') }} links
on l.predecessor_urn = links.urn
where links.link_type in (
'Predecessor',
'Predecessor - Loss of academy converter',
'Predecessor - amalgamated',
'Predecessor - Fresh Start'
)
and l.depth < 5 -- safety limit
)
select
urn as current_urn,
predecessor_urn,
link_type,
link_date,
depth
from lineage