feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 35s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m9s
Build and Push Docker Images / Build Integrator (push) Successful in 56s
Build and Push Docker Images / Build Kestra Init (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
289
docker-compose.portainer.yml
Normal file
289
docker-compose.portainer.yml
Normal file
@@ -0,0 +1,289 @@
|
||||
# Portainer Stack Definition for School Compare
|
||||
#
|
||||
# Portainer environment variables (set in Portainer UI -> Stack -> Environment):
|
||||
# DB_USERNAME — PostgreSQL username
|
||||
# DB_PASSWORD — PostgreSQL password
|
||||
# DB_DATABASE_NAME — PostgreSQL database name
|
||||
# ADMIN_API_KEY — Backend admin API key
|
||||
# TYPESENSE_API_KEY — Typesense admin API key
|
||||
# TYPESENSE_SEARCH_KEY — Typesense search-only key (exposed to frontend)
|
||||
# AIRFLOW_ADMIN_PASSWORD — Airflow web UI admin password
|
||||
# KESTRA_USER — Kestra UI username (optional)
|
||||
# KESTRA_PASSWORD — Kestra UI password (optional)
|
||||
|
||||
services:
|
||||
|
||||
# ── PostgreSQL ────────────────────────────────────────────────────────
|
||||
sc_database:
|
||||
container_name: sc_postgres
|
||||
image: postgis/postgis:18-3.6-alpine
|
||||
environment:
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
POSTGRES_USER: ${DB_USERNAME}
|
||||
POSTGRES_DB: ${DB_DATABASE_NAME}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql
|
||||
shm_size: 128mb
|
||||
networks:
|
||||
backend: {}
|
||||
macvlan:
|
||||
ipv4_address: 10.0.1.189
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
|
||||
# ── FastAPI Backend ───────────────────────────────────────────────────
|
||||
backend:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-backend:latest
|
||||
container_name: schoolcompare_backend
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${DB_USERNAME}:${DB_PASSWORD}@sc_database:5432/${DB_DATABASE_NAME}
|
||||
PYTHONUNBUFFERED: 1
|
||||
ADMIN_API_KEY: ${ADMIN_API_KEY:-changeme}
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:80/api/data-info"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
# ── Next.js Frontend ──────────────────────────────────────────────────
|
||||
frontend:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-frontend:latest
|
||||
container_name: schoolcompare_nextjs
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- NEXT_PUBLIC_API_URL=http://localhost:8000/api
|
||||
- FASTAPI_URL=http://backend:80/api
|
||||
- TYPESENSE_URL=http://typesense:8108
|
||||
- TYPESENSE_API_KEY=${TYPESENSE_SEARCH_KEY:-changeme}
|
||||
depends_on:
|
||||
backend:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
backend: {}
|
||||
macvlan:
|
||||
ipv4_address: 10.0.1.150
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
# ── Typesense Search Engine ───────────────────────────────────────────
|
||||
typesense:
|
||||
image: typesense/typesense:27.1
|
||||
container_name: schoolcompare_typesense
|
||||
environment:
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
TYPESENSE_DATA_DIR: /data
|
||||
volumes:
|
||||
- typesense_data:/data
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:8108/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
|
||||
# ── Kestra — workflow orchestrator (legacy, kept during migration) ────
|
||||
kestra:
|
||||
image: kestra/kestra:latest
|
||||
container_name: schoolcompare_kestra
|
||||
command: server standalone
|
||||
ports:
|
||||
- "8090:8080"
|
||||
volumes:
|
||||
- kestra_storage:/app/storage
|
||||
environment:
|
||||
KESTRA_CONFIGURATION: |
|
||||
datasources:
|
||||
postgres:
|
||||
url: jdbc:postgresql://sc_database:5432/kestra
|
||||
driverClassName: org.postgresql.Driver
|
||||
username: ${DB_USERNAME}
|
||||
password: ${DB_PASSWORD}
|
||||
kestra:
|
||||
repository:
|
||||
type: postgres
|
||||
queue:
|
||||
type: postgres
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
base-path: /app/storage
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8081/health | grep -q '\"status\":\"UP\"'"]
|
||||
interval: 15s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 60s
|
||||
|
||||
# ── Kestra init (legacy, kept during migration) ──────────────────────
|
||||
kestra-init:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-kestra-init:latest
|
||||
container_name: schoolcompare_kestra_init
|
||||
environment:
|
||||
KESTRA_URL: http://kestra:8080
|
||||
KESTRA_USER: ${KESTRA_USER:-}
|
||||
KESTRA_PASSWORD: ${KESTRA_PASSWORD:-}
|
||||
depends_on:
|
||||
kestra:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: "no"
|
||||
|
||||
# ── Data integrator (legacy, kept during migration) ──────────────────
|
||||
integrator:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-integrator:latest
|
||||
container_name: schoolcompare_integrator
|
||||
ports:
|
||||
- "8001:8001"
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${DB_USERNAME}:${DB_PASSWORD}@sc_database:5432/${DB_DATABASE_NAME}
|
||||
DATA_DIR: /data
|
||||
BACKEND_URL: http://backend:80
|
||||
ADMIN_API_KEY: ${ADMIN_API_KEY:-changeme}
|
||||
PYTHONUNBUFFERED: 1
|
||||
volumes:
|
||||
- supplementary_data:/data
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
# ── Airflow Webserver (UI at :8080) ──────────────────────────────────
|
||||
airflow-webserver:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_webserver
|
||||
command: airflow webserver --port 8080
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://${DB_USERNAME}:${DB_PASSWORD}@sc_database:5432/${DB_DATABASE_NAME}
|
||||
AIRFLOW__CORE__DAGS_FOLDER: /opt/pipeline/dags
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
|
||||
AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "false"
|
||||
PG_HOST: sc_database
|
||||
PG_PORT: "5432"
|
||||
PG_USER: ${DB_USERNAME}
|
||||
PG_PASSWORD: ${DB_PASSWORD}
|
||||
PG_DATABASE: ${DB_DATABASE_NAME}
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
volumes:
|
||||
- airflow_dags:/opt/pipeline/dags:ro
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
|
||||
# ── Airflow Scheduler ────────────────────────────────────────────────
|
||||
airflow-scheduler:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_scheduler
|
||||
command: airflow scheduler
|
||||
environment:
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://${DB_USERNAME}:${DB_PASSWORD}@sc_database:5432/${DB_DATABASE_NAME}
|
||||
AIRFLOW__CORE__DAGS_FOLDER: /opt/pipeline/dags
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
|
||||
AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "false"
|
||||
PG_HOST: sc_database
|
||||
PG_PORT: "5432"
|
||||
PG_USER: ${DB_USERNAME}
|
||||
PG_PASSWORD: ${DB_PASSWORD}
|
||||
PG_DATABASE: ${DB_DATABASE_NAME}
|
||||
TYPESENSE_URL: http://typesense:8108
|
||||
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY:-changeme}
|
||||
volumes:
|
||||
- airflow_dags:/opt/pipeline/dags:ro
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Airflow DB Init (one-shot) ───────────────────────────────────────
|
||||
airflow-init:
|
||||
image: privaterepo.sitaru.org/tudor/school_compare-pipeline:latest
|
||||
container_name: schoolcompare_airflow_init
|
||||
command: >
|
||||
bash -c "
|
||||
airflow db migrate &&
|
||||
airflow users create
|
||||
--username admin
|
||||
--password $${AIRFLOW_ADMIN_PASSWORD:-admin}
|
||||
--firstname Admin
|
||||
--lastname User
|
||||
--role Admin
|
||||
--email admin@localhost || true
|
||||
"
|
||||
environment:
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://${DB_USERNAME}:${DB_PASSWORD}@sc_database:5432/${DB_DATABASE_NAME}
|
||||
AIRFLOW__CORE__DAGS_FOLDER: /opt/pipeline/dags
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
|
||||
AIRFLOW_ADMIN_PASSWORD: ${AIRFLOW_ADMIN_PASSWORD:-admin}
|
||||
depends_on:
|
||||
sc_database:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- backend
|
||||
restart: "no"
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
macvlan:
|
||||
external:
|
||||
name: macvlan
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
kestra_storage:
|
||||
supplementary_data:
|
||||
typesense_data:
|
||||
airflow_dags:
|
||||
Reference in New Issue
Block a user