Files
school_compare/pipeline/meltano.yml
T
Tudor Sitaru ae33bfe04b
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 13s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 47s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m18s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
refactor(pipeline): unify KS2 and KS4 legacy sources to same annual ZIPs
LegacyKS2Stream now auto-detects ZIP vs bare CSV — if the download is a ZIP
it extracts england_ks2final.csv; if it's a plain CSV file it reads directly.
This keeps backwards compatibility while allowing both streams to share the
same DfE annual archive URLs.

legacy_ks2_urls updated to point at the same 4 ZIPs as legacy_ks4_urls so
only one set of archives needs to be maintained going forward.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 10:41:01 +01:00

115 lines
3.6 KiB
YAML

project_id: school-compare-pipeline
default_environment: production
plugins:
extractors:
- name: tap-uk-gias
namespace: uk_gias
pip_url: ./plugins/extractors/tap-uk-gias
executable: tap-uk-gias
settings:
- name: download_url
kind: string
description: GIAS bulk CSV download URL
- name: tap-uk-ees
namespace: uk_ees
pip_url: ./plugins/extractors/tap-uk-ees
executable: tap-uk-ees
settings:
- name: base_url
kind: string
value: https://content.explore-education-statistics.service.gov.uk/api/v1
- name: datasets
kind: array
description: List of EES dataset configs to extract
- name: legacy_ks2_urls
kind: object
description: "Year code → URL mapping for legacy KS2 CSVs"
- name: legacy_ks4_urls
kind: object
description: "Year code → URL mapping for legacy KS4 ZIPs (england_ks4final.csv inside)"
config:
legacy_ks2_urls:
"201516": "http://10.0.1.224:8081/filebrowser/api/public/dl/iaoSkg1v?inline=true"
"201617": "http://10.0.1.224:8081/filebrowser/api/public/dl/bqCMUcIH?inline=true"
"201718": "http://10.0.1.224:8081/filebrowser/api/public/dl/0L61fE_a?inline=true"
"201819": "http://10.0.1.224:8081/filebrowser/api/public/dl/XJGJ5lG1?inline=true"
legacy_ks4_urls:
"201516": "http://10.0.1.224:8081/filebrowser/api/public/dl/iaoSkg1v?inline=true"
"201617": "http://10.0.1.224:8081/filebrowser/api/public/dl/bqCMUcIH?inline=true"
"201718": "http://10.0.1.224:8081/filebrowser/api/public/dl/0L61fE_a?inline=true"
"201819": "http://10.0.1.224:8081/filebrowser/api/public/dl/XJGJ5lG1?inline=true"
- name: tap-uk-ofsted
namespace: uk_ofsted
pip_url: ./plugins/extractors/tap-uk-ofsted
executable: tap-uk-ofsted
settings:
- name: mi_url
kind: string
description: Ofsted Management Information download URL
- name: tap-uk-parent-view
namespace: uk_parent_view
pip_url: ./plugins/extractors/tap-uk-parent-view
executable: tap-uk-parent-view
- name: tap-uk-fbit
namespace: uk_fbit
pip_url: ./plugins/extractors/tap-uk-fbit
executable: tap-uk-fbit
settings:
- name: base_url
kind: string
value: https://financial-benchmarking-and-insights-tool.education.gov.uk/api
- name: tap-uk-idaci
namespace: uk_idaci
pip_url: ./plugins/extractors/tap-uk-idaci
executable: tap-uk-idaci
loaders:
- name: target-postgres
variant: meltanolabs
pip_url: meltanolabs-target-postgres
config:
host: $PG_HOST
port: $PG_PORT
user: $PG_USER
password: $PG_PASSWORD
database: $PG_DATABASE
default_target_schema: raw
utilities:
- name: dbt-postgres
variant: dbt-labs
pip_url: dbt-postgres~=1.10
config:
project_dir: $MELTANO_PROJECT_ROOT/transform
profiles_dir: $MELTANO_PROJECT_ROOT/transform
environments:
- name: dev
config:
plugins:
loaders:
- name: target-postgres
config:
host: localhost
port: 5432
user: postgres
password: postgres
database: school_compare
- name: production
config:
plugins:
loaders:
- name: target-postgres
config:
host: ${PG_HOST}
port: ${PG_PORT}
user: ${PG_USER}
password: ${PG_PASSWORD}
database: ${PG_DATABASE}