feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# PostgreSQL Database
|
||||
# PostgreSQL Database with PostGIS
|
||||
db:
|
||||
image: postgres:16-alpine
|
||||
image: postgis/postgis:16-3.4-alpine
|
||||
container_name: schoolcompare_db
|
||||
environment:
|
||||
POSTGRES_USER: schoolcompare
|
||||
@@ -33,6 +33,8 @@ services:
|
||||
DATABASE_URL: postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare
|
||||
PYTHONUNBUFFERED: 1
|
||||
ADMIN_API_KEY: ${ADMIN_API_KEY:-changeme}
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
volumes:
|
||||
- ./data:/app/data:ro
|
||||
depends_on:
|
||||
@@ -58,6 +60,8 @@ services:
|
||||
NODE_ENV: production
|
||||
NEXT_PUBLIC_API_URL: http://localhost:8000/api
|
||||
FASTAPI_URL: http://backend:80/api
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_SEARCH_KEY:-changeme}
|
||||
depends_on:
|
||||
backend:
|
||||
condition: service_healthy
|
||||
@@ -71,32 +75,49 @@ services:
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
# Kestra — workflow orchestrator (UI at http://localhost:8080)
|
||||
kestra:
|
||||
image: kestra/kestra:latest
|
||||
container_name: schoolcompare_kestra
|
||||
command: server standalone
|
||||
# Typesense — search engine
|
||||
typesense:
|
||||
image: typesense/typesense:27.1
|
||||
container_name: schoolcompare_typesense
|
||||
ports:
|
||||
- "8108:8108"
|
||||
environment:
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
TYPESENSE_DATA_DIR: /data
|
||||
volumes:
|
||||
- typesense_data:/data
|
||||
networks:
|
||||
- schoolcompare-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:8108/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
|
||||
# Apache Airflow — workflow orchestrator (UI at http://localhost:8080)
|
||||
airflow-webserver:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_webserver
|
||||
command: airflow webserver --port 8080
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment: &airflow-env
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://schoolcompare:schoolcompare@db:5432/schoolcompare
|
||||
AIRFLOW__CORE__DAGS_FOLDER: /opt/pipeline/dags
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
|
||||
AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "false"
|
||||
PG_HOST: db
|
||||
PG_PORT: "5432"
|
||||
PG_USER: schoolcompare
|
||||
PG_PASSWORD: schoolcompare
|
||||
PG_DATABASE: schoolcompare
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
volumes:
|
||||
- kestra_storage:/app/storage
|
||||
environment:
|
||||
KESTRA_CONFIGURATION: |
|
||||
datasources:
|
||||
postgres:
|
||||
url: jdbc:postgresql://db:5432/kestra
|
||||
driverClassName: org.postgresql.Driver
|
||||
username: schoolcompare
|
||||
password: schoolcompare
|
||||
kestra:
|
||||
repository:
|
||||
type: postgres
|
||||
queue:
|
||||
type: postgres
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
base-path: /app/storage
|
||||
- ./pipeline/dags:/opt/pipeline/dags:ro
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
@@ -104,53 +125,42 @@ services:
|
||||
- schoolcompare-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8081/health | grep -q '\"status\":\"UP\"'"]
|
||||
interval: 15s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 60s
|
||||
|
||||
# One-shot container: imports flow YAMLs into Kestra after it's healthy
|
||||
kestra-init:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-kestra-init:latest
|
||||
container_name: schoolcompare_kestra_init
|
||||
environment:
|
||||
KESTRA_URL: http://kestra:8080
|
||||
KESTRA_USER: ${KESTRA_USER:-}
|
||||
KESTRA_PASSWORD: ${KESTRA_PASSWORD:-}
|
||||
depends_on:
|
||||
kestra:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- schoolcompare-network
|
||||
restart: no
|
||||
|
||||
# Data integrator — Python microservice called by Kestra
|
||||
integrator:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-integrator:latest
|
||||
container_name: schoolcompare_integrator
|
||||
ports:
|
||||
- "8001:8001"
|
||||
environment:
|
||||
DATABASE_URL: postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare
|
||||
DATA_DIR: /data
|
||||
BACKEND_URL: http://backend:80
|
||||
ADMIN_API_KEY: ${ADMIN_API_KEY:-changeme}
|
||||
PYTHONUNBUFFERED: 1
|
||||
volumes:
|
||||
- supplementary_data:/data
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- schoolcompare-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
|
||||
airflow-scheduler:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_scheduler
|
||||
command: airflow scheduler
|
||||
environment: *airflow-env
|
||||
volumes:
|
||||
- ./pipeline/dags:/opt/pipeline/dags:ro
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- schoolcompare-network
|
||||
restart: unless-stopped
|
||||
|
||||
# One-shot: initialise Airflow metadata DB
|
||||
airflow-init:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_init
|
||||
command: >
|
||||
bash -c "
|
||||
airflow db migrate &&
|
||||
airflow users create --username admin --password admin --firstname Admin --lastname User --role Admin --email admin@localhost || true
|
||||
"
|
||||
environment: *airflow-env
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- schoolcompare-network
|
||||
restart: "no"
|
||||
|
||||
networks:
|
||||
schoolcompare-network:
|
||||
@@ -158,5 +168,4 @@ networks:
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
kestra_storage:
|
||||
supplementary_data:
|
||||
typesense_data:
|
||||
|
||||
Reference in New Issue
Block a user