fix(pipeline): expand GIAS schema, handle empty strings, scope DAG selectors
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m8s
Build and Push Docker Images / Build Integrator (push) Successful in 57s
Build and Push Docker Images / Build Kestra Init (push) Successful in 34s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m8s
Build and Push Docker Images / Build Integrator (push) Successful in 57s
Build and Push Docker Images / Build Kestra Init (push) Successful in 34s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
- Declare all 34 columns needed by dbt in GIAS tap schema (target-postgres only persists columns present in the Singer schema message) - Use nullif() for empty-string-to-integer/date casts in staging models - Scope daily DAG dbt build to GIAS models only (stg_gias_establishments+ stg_gias_links+) to avoid errors on unloaded sources - Scope annual EES DAG similarly; remove redundant dbt test steps - Make dim_school gracefully handle missing int_ofsted_latest table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,12 +2,14 @@
|
||||
|
||||
with schools as (
|
||||
select * from {{ ref('stg_gias_establishments') }}
|
||||
),
|
||||
|
||||
latest_ofsted as (
|
||||
select * from {{ ref('int_ofsted_latest') }}
|
||||
)
|
||||
|
||||
{% set ofsted_relation = adapter.get_relation(
|
||||
database=target.database,
|
||||
schema=target.schema,
|
||||
identifier='int_ofsted_latest'
|
||||
) %}
|
||||
|
||||
select
|
||||
s.urn,
|
||||
s.local_authority_code * 1000 + s.establishment_number as laestab,
|
||||
@@ -30,11 +32,19 @@ select
|
||||
s.nursery_provision,
|
||||
s.admissions_policy,
|
||||
|
||||
-- Latest Ofsted
|
||||
-- Latest Ofsted (populated after monthly Ofsted pipeline runs)
|
||||
{% if ofsted_relation is not none %}
|
||||
o.overall_effectiveness as ofsted_grade,
|
||||
o.inspection_date as ofsted_date,
|
||||
o.framework as ofsted_framework
|
||||
{% else %}
|
||||
null::text as ofsted_grade,
|
||||
null::date as ofsted_date,
|
||||
null::text as ofsted_framework
|
||||
{% endif %}
|
||||
|
||||
from schools s
|
||||
left join latest_ofsted o on s.urn = o.urn
|
||||
{% if ofsted_relation is not none %}
|
||||
left join {{ ref('int_ofsted_latest') }} o on s.urn = o.urn
|
||||
{% endif %}
|
||||
where s.status = 'Open'
|
||||
|
||||
@@ -8,9 +8,9 @@ with source as (
|
||||
renamed as (
|
||||
select
|
||||
cast("URN" as integer) as urn,
|
||||
cast("LA (code)" as integer) as local_authority_code,
|
||||
cast(nullif("LA (code)", '') as integer) as local_authority_code,
|
||||
"LA (name)" as local_authority_name,
|
||||
cast("EstablishmentNumber" as integer) as establishment_number,
|
||||
cast(nullif("EstablishmentNumber", '') as integer) as establishment_number,
|
||||
"EstablishmentName" as school_name,
|
||||
"TypeOfEstablishment (name)" as school_type,
|
||||
"PhaseOfEducation (name)" as phase,
|
||||
@@ -18,7 +18,7 @@ renamed as (
|
||||
"ReligiousCharacter (name)" as religious_character,
|
||||
"AdmissionsPolicy (name)" as admissions_policy,
|
||||
"SchoolCapacity" as capacity,
|
||||
cast("NumberOfPupils" as integer) as total_pupils,
|
||||
cast(nullif("NumberOfPupils", '') as integer) as total_pupils,
|
||||
"HeadTitle (name)" as head_title,
|
||||
"HeadFirstName" as head_first_name,
|
||||
"HeadLastName" as head_last_name,
|
||||
@@ -30,18 +30,17 @@ renamed as (
|
||||
"County (name)" as county,
|
||||
"Postcode" as postcode,
|
||||
"EstablishmentStatus (name)" as status,
|
||||
cast("OpenDate" as date) as open_date,
|
||||
cast("CloseDate" as date) as close_date,
|
||||
case when "OpenDate" = '' then null else cast("OpenDate" as date) end as open_date,
|
||||
case when "CloseDate" = '' then null else cast("CloseDate" as date) end as close_date,
|
||||
"Trusts (name)" as academy_trust_name,
|
||||
cast("Trusts (code)" as integer) as academy_trust_uid,
|
||||
cast(nullif("Trusts (code)", '') as integer) as academy_trust_uid,
|
||||
"UrbanRural (name)" as urban_rural,
|
||||
"ParliamentaryConstituency (name)" as parliamentary_constituency,
|
||||
"NurseryProvision (name)" as nursery_provision,
|
||||
cast("Easting" as integer) as easting,
|
||||
cast("Northing" as integer) as northing,
|
||||
-- Age range
|
||||
cast("StatutoryLowAge" as integer) as statutory_low_age,
|
||||
cast("StatutoryHighAge" as integer) as statutory_high_age
|
||||
cast(nullif("Easting", '') as integer) as easting,
|
||||
cast(nullif("Northing", '') as integer) as northing,
|
||||
cast(nullif("StatutoryLowAge", '') as integer) as statutory_low_age,
|
||||
cast(nullif("StatutoryHighAge", '') as integer) as statutory_high_age
|
||||
from source
|
||||
where "URN" is not null
|
||||
)
|
||||
|
||||
@@ -9,7 +9,7 @@ renamed as (
|
||||
cast("URN" as integer) as urn,
|
||||
cast("LinkURN" as integer) as linked_urn,
|
||||
"LinkType" as link_type,
|
||||
cast("LinkEstablishedDate" as date) as link_date
|
||||
case when "LinkEstablishedDate" = '' then null else cast("LinkEstablishedDate" as date) end as link_date
|
||||
from source
|
||||
where "URN" is not null
|
||||
and "LinkURN" is not null
|
||||
|
||||
Reference in New Issue
Block a user