From 7f82088d53b80688d52785c60a67b70af8644944 Mon Sep 17 00:00:00 2001 From: Tudor Date: Thu, 26 Mar 2026 20:51:40 +0000 Subject: [PATCH] fix(pipeline): use to_date for DD-MM-YYYY GIAS dates, exclude EES models from daily DAG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GIAS CSV dates are DD-MM-YYYY format — use to_date() instead of cast(). Exclude int_ks2_with_lineage+ and int_ks4_with_lineage+ from daily DAG selector since they depend on EES data not yet loaded. Co-Authored-By: Claude Opus 4.6 --- pipeline/dags/school_data_pipeline.py | 2 +- pipeline/transform/models/staging/stg_gias_establishments.sql | 4 ++-- pipeline/transform/models/staging/stg_gias_links.sql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipeline/dags/school_data_pipeline.py b/pipeline/dags/school_data_pipeline.py index 8b7a1ac..0718a97 100644 --- a/pipeline/dags/school_data_pipeline.py +++ b/pipeline/dags/school_data_pipeline.py @@ -79,7 +79,7 @@ print(f'Validation passed: {{count}} GIAS rows') dbt_build = BashOperator( task_id="dbt_build", - bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_gias_establishments+ stg_gias_links+", + bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_gias_establishments+ stg_gias_links+ --exclude int_ks2_with_lineage+ int_ks4_with_lineage+", ) geocode_new = BashOperator( diff --git a/pipeline/transform/models/staging/stg_gias_establishments.sql b/pipeline/transform/models/staging/stg_gias_establishments.sql index 1b4af2e..3cea609 100644 --- a/pipeline/transform/models/staging/stg_gias_establishments.sql +++ b/pipeline/transform/models/staging/stg_gias_establishments.sql @@ -30,8 +30,8 @@ renamed as ( "County (name)" as county, "Postcode" as postcode, "EstablishmentStatus (name)" as status, - case when "OpenDate" = '' then null else cast("OpenDate" as date) end as open_date, - case when "CloseDate" = '' then null else cast("CloseDate" as date) end as close_date, + case when "OpenDate" = '' then null else to_date("OpenDate", 'DD-MM-YYYY') end as open_date, + case when "CloseDate" = '' then null else to_date("CloseDate", 'DD-MM-YYYY') end as close_date, "Trusts (name)" as academy_trust_name, cast(nullif("Trusts (code)", '') as integer) as academy_trust_uid, "UrbanRural (name)" as urban_rural, diff --git a/pipeline/transform/models/staging/stg_gias_links.sql b/pipeline/transform/models/staging/stg_gias_links.sql index 1e3ccf6..520bf68 100644 --- a/pipeline/transform/models/staging/stg_gias_links.sql +++ b/pipeline/transform/models/staging/stg_gias_links.sql @@ -9,7 +9,7 @@ renamed as ( cast("URN" as integer) as urn, cast("LinkURN" as integer) as linked_urn, "LinkType" as link_type, - case when "LinkEstablishedDate" = '' then null else cast("LinkEstablishedDate" as date) end as link_date + case when "LinkEstablishedDate" = '' then null else to_date("LinkEstablishedDate", 'DD-MM-YYYY') end as link_date from source where "URN" is not null and "LinkURN" is not null