feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
110
pipeline/transform/models/marts/_marts_schema.yml
Normal file
110
pipeline/transform/models/marts/_marts_schema.yml
Normal file
@@ -0,0 +1,110 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: dim_school
|
||||
description: Canonical school dimension — one row per active URN
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null, unique]
|
||||
- name: school_name
|
||||
tests: [not_null]
|
||||
- name: phase
|
||||
tests: [not_null]
|
||||
- name: status
|
||||
tests:
|
||||
- accepted_values:
|
||||
values: ["Open"]
|
||||
|
||||
- name: dim_location
|
||||
description: School location dimension with PostGIS geometry
|
||||
columns:
|
||||
- name: urn
|
||||
tests:
|
||||
- not_null
|
||||
- unique
|
||||
- relationships:
|
||||
to: ref('dim_school')
|
||||
field: urn
|
||||
- name: postcode
|
||||
tests: [not_null]
|
||||
|
||||
- name: map_school_lineage
|
||||
description: Predecessor/successor lineage map
|
||||
columns:
|
||||
- name: urn
|
||||
tests:
|
||||
- not_null
|
||||
- relationships:
|
||||
to: ref('dim_school')
|
||||
field: urn
|
||||
|
||||
- name: fact_ks2_performance
|
||||
description: KS2 attainment — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
tests:
|
||||
- unique:
|
||||
column_name: "urn || '-' || year"
|
||||
|
||||
- name: fact_ks4_performance
|
||||
description: KS4 attainment — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
tests:
|
||||
- unique:
|
||||
column_name: "urn || '-' || year"
|
||||
|
||||
- name: fact_ofsted_inspection
|
||||
description: Full Ofsted inspection history
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: inspection_date
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_pupil_characteristics
|
||||
description: Pupil demographics — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_admissions
|
||||
description: School admissions — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_finance
|
||||
description: School financial data — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_phonics
|
||||
description: Phonics screening results — one row per URN per year
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_parent_view
|
||||
description: Parent View survey responses
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_deprivation
|
||||
description: IDACI deprivation index
|
||||
19
pipeline/transform/models/marts/dim_location.sql
Normal file
19
pipeline/transform/models/marts/dim_location.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Mart: School location dimension — one row per URN, PostGIS-enabled
|
||||
-- The geom column is populated by a post-hook or the geocode script.
|
||||
|
||||
select
|
||||
s.urn,
|
||||
s.address_line1,
|
||||
s.address_line2,
|
||||
s.town,
|
||||
s.county,
|
||||
s.postcode,
|
||||
s.local_authority_code,
|
||||
s.local_authority_name,
|
||||
s.parliamentary_constituency,
|
||||
s.urban_rural,
|
||||
s.easting,
|
||||
s.northing
|
||||
from {{ ref('stg_gias_establishments') }} s
|
||||
where s.status = 'Open'
|
||||
and s.postcode is not null
|
||||
40
pipeline/transform/models/marts/dim_school.sql
Normal file
40
pipeline/transform/models/marts/dim_school.sql
Normal file
@@ -0,0 +1,40 @@
|
||||
-- Mart: Canonical school dimension — one row per active URN
|
||||
|
||||
with schools as (
|
||||
select * from {{ ref('stg_gias_establishments') }}
|
||||
),
|
||||
|
||||
latest_ofsted as (
|
||||
select * from {{ ref('int_ofsted_latest') }}
|
||||
)
|
||||
|
||||
select
|
||||
s.urn,
|
||||
s.local_authority_code * 1000 + s.establishment_number as laestab,
|
||||
s.school_name,
|
||||
s.phase,
|
||||
s.school_type,
|
||||
s.academy_trust_name,
|
||||
s.academy_trust_uid,
|
||||
s.religious_character,
|
||||
s.gender,
|
||||
s.statutory_low_age || '-' || s.statutory_high_age as age_range,
|
||||
s.capacity,
|
||||
s.total_pupils,
|
||||
concat_ws(' ', s.head_title, s.head_first_name, s.head_last_name) as headteacher_name,
|
||||
s.website,
|
||||
s.telephone,
|
||||
s.open_date,
|
||||
s.close_date,
|
||||
s.status,
|
||||
s.nursery_provision,
|
||||
s.admissions_policy,
|
||||
|
||||
-- Latest Ofsted
|
||||
o.overall_effectiveness as ofsted_grade,
|
||||
o.inspection_date as ofsted_date,
|
||||
o.framework as ofsted_framework
|
||||
|
||||
from schools s
|
||||
left join latest_ofsted o on s.urn = o.urn
|
||||
where s.status = 'Open'
|
||||
10
pipeline/transform/models/marts/fact_admissions.sql
Normal file
10
pipeline/transform/models/marts/fact_admissions.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
-- Mart: School admissions — one row per URN per year
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
published_admission_number,
|
||||
total_applications,
|
||||
first_preference_offers_pct,
|
||||
oversubscribed
|
||||
from {{ ref('stg_ees_admissions') }}
|
||||
22
pipeline/transform/models/marts/fact_deprivation.sql
Normal file
22
pipeline/transform/models/marts/fact_deprivation.sql
Normal file
@@ -0,0 +1,22 @@
|
||||
-- Mart: Deprivation index — one row per URN
|
||||
-- Joins school postcode → LSOA → IDACI score
|
||||
|
||||
with school_postcodes as (
|
||||
select
|
||||
urn,
|
||||
postcode
|
||||
from {{ ref('stg_gias_establishments') }}
|
||||
where status = 'Open'
|
||||
and postcode is not null
|
||||
)
|
||||
|
||||
-- Note: The join between postcode and LSOA requires a postcode-to-LSOA
|
||||
-- lookup table. This will be populated by the geocode script or a seed.
|
||||
-- For now, this model serves as a placeholder that will be completed
|
||||
-- once the IDACI tap provides the postcode→LSOA mapping.
|
||||
|
||||
select
|
||||
i.lsoa_code,
|
||||
i.idaci_score,
|
||||
i.idaci_decile
|
||||
from {{ ref('stg_idaci') }} i
|
||||
11
pipeline/transform/models/marts/fact_finance.sql
Normal file
11
pipeline/transform/models/marts/fact_finance.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Mart: School financial data — one row per URN per year
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
per_pupil_spend,
|
||||
staff_cost_pct,
|
||||
teacher_cost_pct,
|
||||
support_staff_cost_pct,
|
||||
premises_cost_pct
|
||||
from {{ ref('stg_fbit_finance') }}
|
||||
22
pipeline/transform/models/marts/fact_ks2_performance.sql
Normal file
22
pipeline/transform/models/marts/fact_ks2_performance.sql
Normal file
@@ -0,0 +1,22 @@
|
||||
-- Mart: KS2 performance fact table — one row per URN per year
|
||||
-- Includes predecessor data via lineage resolution
|
||||
|
||||
select
|
||||
current_urn as urn,
|
||||
source_urn,
|
||||
year,
|
||||
total_pupils,
|
||||
rwm_expected_pct,
|
||||
reading_expected_pct,
|
||||
writing_expected_pct,
|
||||
maths_expected_pct,
|
||||
rwm_high_pct,
|
||||
reading_high_pct,
|
||||
writing_high_pct,
|
||||
maths_high_pct,
|
||||
reading_progress,
|
||||
writing_progress,
|
||||
maths_progress,
|
||||
reading_avg_score,
|
||||
maths_avg_score
|
||||
from {{ ref('int_ks2_with_lineage') }}
|
||||
16
pipeline/transform/models/marts/fact_ks4_performance.sql
Normal file
16
pipeline/transform/models/marts/fact_ks4_performance.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
-- Mart: KS4 performance fact table — one row per URN per year
|
||||
|
||||
select
|
||||
current_urn as urn,
|
||||
source_urn,
|
||||
year,
|
||||
total_pupils,
|
||||
progress_8_score,
|
||||
attainment_8_score,
|
||||
ebacc_entry_pct,
|
||||
ebacc_achievement_pct,
|
||||
english_strong_pass_pct,
|
||||
maths_strong_pass_pct,
|
||||
english_maths_strong_pass_pct,
|
||||
staying_in_education_pct
|
||||
from {{ ref('int_ks4_with_lineage') }}
|
||||
25
pipeline/transform/models/marts/fact_ofsted_inspection.sql
Normal file
25
pipeline/transform/models/marts/fact_ofsted_inspection.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Mart: Full Ofsted inspection history — one row per inspection
|
||||
|
||||
select
|
||||
urn,
|
||||
inspection_date,
|
||||
inspection_type,
|
||||
framework,
|
||||
overall_effectiveness,
|
||||
quality_of_education,
|
||||
behaviour_attitudes,
|
||||
personal_development,
|
||||
leadership_management,
|
||||
early_years_provision,
|
||||
sixth_form_provision,
|
||||
rc_safeguarding_met,
|
||||
rc_inclusion,
|
||||
rc_curriculum_teaching,
|
||||
rc_achievement,
|
||||
rc_attendance_behaviour,
|
||||
rc_personal_development,
|
||||
rc_leadership_governance,
|
||||
rc_early_years,
|
||||
rc_sixth_form,
|
||||
report_url
|
||||
from {{ ref('stg_ofsted_inspections') }}
|
||||
15
pipeline/transform/models/marts/fact_parent_view.sql
Normal file
15
pipeline/transform/models/marts/fact_parent_view.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- Mart: Parent View survey responses — one row per URN (latest survey)
|
||||
|
||||
select
|
||||
urn,
|
||||
survey_date,
|
||||
total_responses,
|
||||
q_happy_pct,
|
||||
q_safe_pct,
|
||||
q_progress_pct,
|
||||
q_well_taught_pct,
|
||||
q_well_led_pct,
|
||||
q_behaviour_pct,
|
||||
q_bullying_pct,
|
||||
q_recommend_pct
|
||||
from {{ ref('stg_parent_view') }}
|
||||
8
pipeline/transform/models/marts/fact_phonics.sql
Normal file
8
pipeline/transform/models/marts/fact_phonics.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
-- Mart: Phonics screening results — one row per URN per year
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
year1_phonics_pct,
|
||||
year2_phonics_pct
|
||||
from {{ ref('stg_ees_phonics') }}
|
||||
@@ -0,0 +1,18 @@
|
||||
-- Mart: Pupil characteristics — one row per URN per year
|
||||
|
||||
select
|
||||
urn,
|
||||
year,
|
||||
fsm_pct,
|
||||
sen_support_pct,
|
||||
sen_ehcp_pct,
|
||||
eal_pct,
|
||||
disadvantaged_pct,
|
||||
ethnicity_white_pct,
|
||||
ethnicity_asian_pct,
|
||||
ethnicity_black_pct,
|
||||
ethnicity_mixed_pct,
|
||||
ethnicity_other_pct,
|
||||
class_size_avg,
|
||||
stability_pct
|
||||
from {{ ref('int_pupil_chars_merged') }}
|
||||
9
pipeline/transform/models/marts/map_school_lineage.sql
Normal file
9
pipeline/transform/models/marts/map_school_lineage.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
-- Mart: School predecessor/successor lineage map
|
||||
|
||||
select
|
||||
current_urn as urn,
|
||||
predecessor_urn,
|
||||
link_type,
|
||||
link_date,
|
||||
depth
|
||||
from {{ ref('int_school_lineage') }}
|
||||
Reference in New Issue
Block a user