feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
36
pipeline/transform/macros/chain_lineage.sql
Normal file
36
pipeline/transform/macros/chain_lineage.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- Macro: Generate a CTE that unions current and predecessor data for a given source
|
||||
|
||||
{% macro chain_lineage(source_ref, urn_col='urn', year_col='year') %}
|
||||
|
||||
with current_data as (
|
||||
select
|
||||
{{ urn_col }} as current_urn,
|
||||
{{ urn_col }} as source_urn,
|
||||
*
|
||||
from {{ source_ref }}
|
||||
),
|
||||
|
||||
predecessor_data as (
|
||||
select
|
||||
lin.current_urn,
|
||||
src.{{ urn_col }} as source_urn,
|
||||
src.*
|
||||
from {{ source_ref }} src
|
||||
inner join {{ ref('int_school_lineage') }} lin
|
||||
on src.{{ urn_col }} = lin.predecessor_urn
|
||||
where not exists (
|
||||
select 1 from {{ source_ref }} curr
|
||||
where curr.{{ urn_col }} = lin.current_urn
|
||||
and curr.{{ year_col }} = src.{{ year_col }}
|
||||
)
|
||||
),
|
||||
|
||||
combined as (
|
||||
select * from current_data
|
||||
union all
|
||||
select * from predecessor_data
|
||||
)
|
||||
|
||||
select * from combined
|
||||
|
||||
{% endmacro %}
|
||||
13
pipeline/transform/macros/parse_ofsted_grade.sql
Normal file
13
pipeline/transform/macros/parse_ofsted_grade.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
-- Macro: Parse Ofsted grade from various text/numeric representations
|
||||
|
||||
{% macro parse_ofsted_grade(column) %}
|
||||
case
|
||||
when {{ column }}::text in ('1', 'Outstanding') then 1
|
||||
when {{ column }}::text in ('2', 'Good') then 2
|
||||
when {{ column }}::text in ('3', 'Requires improvement', 'Requires Improvement', 'Satisfactory') then 3
|
||||
when {{ column }}::text in ('4', 'Inadequate') then 4
|
||||
when {{ column }}::text in ('9', 'SWK', 'Serious Weaknesses') then 4
|
||||
when {{ column }}::text in ('SM', 'Special Measures') then 4
|
||||
else null
|
||||
end
|
||||
{% endmacro %}
|
||||
8
pipeline/transform/macros/validate_uk_coordinates.sql
Normal file
8
pipeline/transform/macros/validate_uk_coordinates.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
-- Macro: Validate that latitude/longitude fall within UK bounding box
|
||||
|
||||
{% macro validate_uk_coordinates(lat_col, lng_col) %}
|
||||
(
|
||||
{{ lat_col }} between 49.0 and 61.0
|
||||
and {{ lng_col }} between -8.0 and 2.0
|
||||
)
|
||||
{% endmacro %}
|
||||
Reference in New Issue
Block a user