feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
-- Intermediate model: KS2 data chained across academy conversions
|
||||
-- Maps predecessor URN data to the current active URN
|
||||
|
||||
with current_ks2 as (
|
||||
select
|
||||
urn as current_urn,
|
||||
urn as source_urn,
|
||||
year,
|
||||
total_pupils,
|
||||
rwm_expected_pct,
|
||||
reading_expected_pct,
|
||||
writing_expected_pct,
|
||||
maths_expected_pct,
|
||||
rwm_high_pct,
|
||||
reading_high_pct,
|
||||
writing_high_pct,
|
||||
maths_high_pct,
|
||||
reading_progress,
|
||||
writing_progress,
|
||||
maths_progress,
|
||||
reading_avg_score,
|
||||
maths_avg_score
|
||||
from {{ ref('stg_ees_ks2') }}
|
||||
),
|
||||
|
||||
predecessor_ks2 as (
|
||||
select
|
||||
lin.current_urn,
|
||||
ks2.urn as source_urn,
|
||||
ks2.year,
|
||||
ks2.total_pupils,
|
||||
ks2.rwm_expected_pct,
|
||||
ks2.reading_expected_pct,
|
||||
ks2.writing_expected_pct,
|
||||
ks2.maths_expected_pct,
|
||||
ks2.rwm_high_pct,
|
||||
ks2.reading_high_pct,
|
||||
ks2.writing_high_pct,
|
||||
ks2.maths_high_pct,
|
||||
ks2.reading_progress,
|
||||
ks2.writing_progress,
|
||||
ks2.maths_progress,
|
||||
ks2.reading_avg_score,
|
||||
ks2.maths_avg_score
|
||||
from {{ ref('stg_ees_ks2') }} ks2
|
||||
inner join {{ ref('int_school_lineage') }} lin
|
||||
on ks2.urn = lin.predecessor_urn
|
||||
-- Only include predecessor data for years before the current URN has data
|
||||
where not exists (
|
||||
select 1 from {{ ref('stg_ees_ks2') }} curr
|
||||
where curr.urn = lin.current_urn
|
||||
and curr.year = ks2.year
|
||||
)
|
||||
),
|
||||
|
||||
combined as (
|
||||
select * from current_ks2
|
||||
union all
|
||||
select * from predecessor_ks2
|
||||
)
|
||||
|
||||
select * from combined
|
||||
@@ -0,0 +1,50 @@
|
||||
-- Intermediate model: KS4 data chained across academy conversions
|
||||
|
||||
with current_ks4 as (
|
||||
select
|
||||
urn as current_urn,
|
||||
urn as source_urn,
|
||||
year,
|
||||
total_pupils,
|
||||
progress_8_score,
|
||||
attainment_8_score,
|
||||
ebacc_entry_pct,
|
||||
ebacc_achievement_pct,
|
||||
english_strong_pass_pct,
|
||||
maths_strong_pass_pct,
|
||||
english_maths_strong_pass_pct,
|
||||
staying_in_education_pct
|
||||
from {{ ref('stg_ees_ks4') }}
|
||||
),
|
||||
|
||||
predecessor_ks4 as (
|
||||
select
|
||||
lin.current_urn,
|
||||
ks4.urn as source_urn,
|
||||
ks4.year,
|
||||
ks4.total_pupils,
|
||||
ks4.progress_8_score,
|
||||
ks4.attainment_8_score,
|
||||
ks4.ebacc_entry_pct,
|
||||
ks4.ebacc_achievement_pct,
|
||||
ks4.english_strong_pass_pct,
|
||||
ks4.maths_strong_pass_pct,
|
||||
ks4.english_maths_strong_pass_pct,
|
||||
ks4.staying_in_education_pct
|
||||
from {{ ref('stg_ees_ks4') }} ks4
|
||||
inner join {{ ref('int_school_lineage') }} lin
|
||||
on ks4.urn = lin.predecessor_urn
|
||||
where not exists (
|
||||
select 1 from {{ ref('stg_ees_ks4') }} curr
|
||||
where curr.urn = lin.current_urn
|
||||
and curr.year = ks4.year
|
||||
)
|
||||
),
|
||||
|
||||
combined as (
|
||||
select * from current_ks4
|
||||
union all
|
||||
select * from predecessor_ks4
|
||||
)
|
||||
|
||||
select * from combined
|
||||
37
pipeline/transform/models/intermediate/int_ofsted_latest.sql
Normal file
37
pipeline/transform/models/intermediate/int_ofsted_latest.sql
Normal file
@@ -0,0 +1,37 @@
|
||||
-- Intermediate model: Latest Ofsted inspection per URN
|
||||
-- Picks the most recent inspection for each school
|
||||
|
||||
with ranked as (
|
||||
select
|
||||
*,
|
||||
row_number() over (
|
||||
partition by urn
|
||||
order by inspection_date desc
|
||||
) as rn
|
||||
from {{ ref('stg_ofsted_inspections') }}
|
||||
)
|
||||
|
||||
select
|
||||
urn,
|
||||
inspection_date,
|
||||
inspection_type,
|
||||
framework,
|
||||
overall_effectiveness,
|
||||
quality_of_education,
|
||||
behaviour_attitudes,
|
||||
personal_development,
|
||||
leadership_management,
|
||||
early_years_provision,
|
||||
sixth_form_provision,
|
||||
rc_safeguarding_met,
|
||||
rc_inclusion,
|
||||
rc_curriculum_teaching,
|
||||
rc_achievement,
|
||||
rc_attendance_behaviour,
|
||||
rc_personal_development,
|
||||
rc_leadership_governance,
|
||||
rc_early_years,
|
||||
rc_sixth_form,
|
||||
report_url
|
||||
from ranked
|
||||
where rn = 1
|
||||
@@ -0,0 +1,18 @@
|
||||
-- Intermediate model: Merged pupil characteristics from census data
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
fsm_pct,
|
||||
sen_support_pct,
|
||||
sen_ehcp_pct,
|
||||
eal_pct,
|
||||
disadvantaged_pct,
|
||||
ethnicity_white_pct,
|
||||
ethnicity_asian_pct,
|
||||
ethnicity_black_pct,
|
||||
ethnicity_mixed_pct,
|
||||
ethnicity_other_pct,
|
||||
class_size_avg,
|
||||
stability_pct
|
||||
from {{ ref('stg_ees_census') }}
|
||||
@@ -0,0 +1,48 @@
|
||||
-- Intermediate model: Recursive predecessor mapping
|
||||
-- Resolves academy conversion chains so historical data can be attributed
|
||||
-- to the current (active) URN.
|
||||
|
||||
with recursive lineage as (
|
||||
-- Base: schools that are predecessors (linked via academy conversion, amalgamation, etc.)
|
||||
select
|
||||
urn,
|
||||
linked_urn as predecessor_urn,
|
||||
link_type,
|
||||
link_date,
|
||||
1 as depth
|
||||
from {{ ref('stg_gias_links') }}
|
||||
where link_type in (
|
||||
'Predecessor',
|
||||
'Predecessor - Loss of academy converter',
|
||||
'Predecessor - amalgamated',
|
||||
'Predecessor - Fresh Start'
|
||||
)
|
||||
|
||||
union all
|
||||
|
||||
-- Recursive step: follow the chain
|
||||
select
|
||||
l.urn,
|
||||
links.linked_urn as predecessor_urn,
|
||||
links.link_type,
|
||||
links.link_date,
|
||||
l.depth + 1
|
||||
from lineage l
|
||||
inner join {{ ref('stg_gias_links') }} links
|
||||
on l.predecessor_urn = links.urn
|
||||
where links.link_type in (
|
||||
'Predecessor',
|
||||
'Predecessor - Loss of academy converter',
|
||||
'Predecessor - amalgamated',
|
||||
'Predecessor - Fresh Start'
|
||||
)
|
||||
and l.depth < 5 -- safety limit
|
||||
)
|
||||
|
||||
select
|
||||
urn as current_urn,
|
||||
predecessor_urn,
|
||||
link_type,
|
||||
link_date,
|
||||
depth
|
||||
from lineage
|
||||
Reference in New Issue
Block a user