From 54df58746e945b882435bec37177b0e0dc3f4ac2 Mon Sep 17 00:00:00 2001 From: Tudor Date: Thu, 26 Mar 2026 21:18:59 +0000 Subject: [PATCH] feat(pipeline): use GIAS easting/northing for all geocoding, drop postcode step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GIAS grid references are the actual school location — far more accurate than postcode centroids. Remove geocode_postcodes.py from the daily DAG and the postcode-not-null filter from dim_location. Co-Authored-By: Claude Opus 4.6 --- pipeline/dags/school_data_pipeline.py | 7 +------ pipeline/transform/models/marts/dim_location.sql | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/pipeline/dags/school_data_pipeline.py b/pipeline/dags/school_data_pipeline.py index 0718a97..60b6552 100644 --- a/pipeline/dags/school_data_pipeline.py +++ b/pipeline/dags/school_data_pipeline.py @@ -82,17 +82,12 @@ print(f'Validation passed: {{count}} GIAS rows') bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_gias_establishments+ stg_gias_links+ --exclude int_ks2_with_lineage+ int_ks4_with_lineage+", ) - geocode_new = BashOperator( - task_id="geocode_new", - bash_command=f"cd {PIPELINE_DIR} && python scripts/geocode_postcodes.py", - ) - sync_typesense = BashOperator( task_id="sync_typesense", bash_command=f"cd {PIPELINE_DIR} && python scripts/sync_typesense.py", ) - extract_group >> validate_raw >> dbt_build >> geocode_new >> sync_typesense + extract_group >> validate_raw >> dbt_build >> sync_typesense # ── Monthly DAG (Ofsted) ─────────────────────────────────────────────── diff --git a/pipeline/transform/models/marts/dim_location.sql b/pipeline/transform/models/marts/dim_location.sql index 3f7d4de..5993719 100644 --- a/pipeline/transform/models/marts/dim_location.sql +++ b/pipeline/transform/models/marts/dim_location.sql @@ -22,4 +22,3 @@ select end as geom from {{ ref('stg_gias_establishments') }} s where s.status = 'Open' - and s.postcode is not null