2026-03-26 09:18:11 +00:00
|
|
|
FROM python:3.13-slim
|
feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold
Replaces the hand-rolled integrator with a production-grade ELT pipeline
using Meltano (Singer taps), dbt Core (medallion architecture), and
Apache Airflow (orchestration). Adds Typesense for search and PostGIS
for geospatial queries.
- 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI)
- dbt project: 12 staging, 5 intermediate, 12 mart models
- 3 Airflow DAGs (daily/monthly/annual schedules)
- Typesense sync + batch geocoding scripts
- docker-compose: add Airflow, Typesense; upgrade to PostGIS
- Portainer stack definition matching live deployment topology
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 08:37:53 +00:00
|
|
|
|
|
|
|
|
WORKDIR /opt/pipeline
|
|
|
|
|
|
|
|
|
|
# System dependencies
|
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
gcc \
|
|
|
|
|
libpq-dev \
|
|
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
# Python dependencies
|
|
|
|
|
COPY requirements.txt .
|
|
|
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
|
|
|
|
|
|
# Install custom Singer taps
|
|
|
|
|
COPY plugins/ plugins/
|
|
|
|
|
RUN pip install --no-cache-dir \
|
|
|
|
|
./plugins/extractors/tap-uk-gias \
|
|
|
|
|
./plugins/extractors/tap-uk-ees \
|
|
|
|
|
./plugins/extractors/tap-uk-ofsted \
|
|
|
|
|
./plugins/extractors/tap-uk-parent-view \
|
|
|
|
|
./plugins/extractors/tap-uk-fbit \
|
|
|
|
|
./plugins/extractors/tap-uk-idaci
|
|
|
|
|
|
|
|
|
|
# Copy pipeline code
|
|
|
|
|
COPY meltano.yml .
|
|
|
|
|
COPY transform/ transform/
|
|
|
|
|
COPY scripts/ scripts/
|
|
|
|
|
COPY dags/ dags/
|
|
|
|
|
|
|
|
|
|
# dbt deps
|
|
|
|
|
RUN cd transform && dbt deps --profiles-dir . 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
ENV AIRFLOW_HOME=/opt/airflow
|
|
|
|
|
ENV PYTHONPATH=/opt/pipeline
|
|
|
|
|
|
|
|
|
|
CMD ["airflow", "webserver"]
|