diff --git a/pipeline/dags/school_data_pipeline.py b/pipeline/dags/school_data_pipeline.py index 8b7a1ac..0718a97 100644 --- a/pipeline/dags/school_data_pipeline.py +++ b/pipeline/dags/school_data_pipeline.py @@ -79,7 +79,7 @@ print(f'Validation passed: {{count}} GIAS rows') dbt_build = BashOperator( task_id="dbt_build", - bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_gias_establishments+ stg_gias_links+", + bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_gias_establishments+ stg_gias_links+ --exclude int_ks2_with_lineage+ int_ks4_with_lineage+", ) geocode_new = BashOperator( diff --git a/pipeline/transform/models/staging/stg_gias_establishments.sql b/pipeline/transform/models/staging/stg_gias_establishments.sql index 1b4af2e..3cea609 100644 --- a/pipeline/transform/models/staging/stg_gias_establishments.sql +++ b/pipeline/transform/models/staging/stg_gias_establishments.sql @@ -30,8 +30,8 @@ renamed as ( "County (name)" as county, "Postcode" as postcode, "EstablishmentStatus (name)" as status, - case when "OpenDate" = '' then null else cast("OpenDate" as date) end as open_date, - case when "CloseDate" = '' then null else cast("CloseDate" as date) end as close_date, + case when "OpenDate" = '' then null else to_date("OpenDate", 'DD-MM-YYYY') end as open_date, + case when "CloseDate" = '' then null else to_date("CloseDate", 'DD-MM-YYYY') end as close_date, "Trusts (name)" as academy_trust_name, cast(nullif("Trusts (code)", '') as integer) as academy_trust_uid, "UrbanRural (name)" as urban_rural, diff --git a/pipeline/transform/models/staging/stg_gias_links.sql b/pipeline/transform/models/staging/stg_gias_links.sql index 1e3ccf6..520bf68 100644 --- a/pipeline/transform/models/staging/stg_gias_links.sql +++ b/pipeline/transform/models/staging/stg_gias_links.sql @@ -9,7 +9,7 @@ renamed as ( cast("URN" as integer) as urn, cast("LinkURN" as integer) as linked_urn, "LinkType" as link_type, - case when "LinkEstablishedDate" = '' then null else cast("LinkEstablishedDate" as date) end as link_date + case when "LinkEstablishedDate" = '' then null else to_date("LinkEstablishedDate", 'DD-MM-YYYY') end as link_date from source where "URN" is not null and "LinkURN" is not null