fix(pipeline): expand GIAS schema, handle empty strings, scope DAG selectors
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 32s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m8s
Build and Push Docker Images / Build Integrator (push) Successful in 57s
Build and Push Docker Images / Build Kestra Init (push) Successful in 34s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m39s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s

- Declare all 34 columns needed by dbt in GIAS tap schema (target-postgres
  only persists columns present in the Singer schema message)
- Use nullif() for empty-string-to-integer/date casts in staging models
- Scope daily DAG dbt build to GIAS models only (stg_gias_establishments+
  stg_gias_links+) to avoid errors on unloaded sources
- Scope annual EES DAG similarly; remove redundant dbt test steps
- Make dim_school gracefully handle missing int_ofsted_latest table

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-26 20:43:24 +00:00
parent 24cfb83144
commit e7b1ab9f37
5 changed files with 59 additions and 35 deletions

View File

@@ -2,12 +2,14 @@
with schools as (
select * from {{ ref('stg_gias_establishments') }}
),
latest_ofsted as (
select * from {{ ref('int_ofsted_latest') }}
)
{% set ofsted_relation = adapter.get_relation(
database=target.database,
schema=target.schema,
identifier='int_ofsted_latest'
) %}
select
s.urn,
s.local_authority_code * 1000 + s.establishment_number as laestab,
@@ -30,11 +32,19 @@ select
s.nursery_provision,
s.admissions_policy,
-- Latest Ofsted
-- Latest Ofsted (populated after monthly Ofsted pipeline runs)
{% if ofsted_relation is not none %}
o.overall_effectiveness as ofsted_grade,
o.inspection_date as ofsted_date,
o.framework as ofsted_framework
{% else %}
null::text as ofsted_grade,
null::date as ofsted_date,
null::text as ofsted_framework
{% endif %}
from schools s
left join latest_ofsted o on s.urn = o.urn
{% if ofsted_relation is not none %}
left join {{ ref('int_ofsted_latest') }} o on s.urn = o.urn
{% endif %}
where s.status = 'Open'

View File

@@ -8,9 +8,9 @@ with source as (
renamed as (
select
cast("URN" as integer) as urn,
cast("LA (code)" as integer) as local_authority_code,
cast(nullif("LA (code)", '') as integer) as local_authority_code,
"LA (name)" as local_authority_name,
cast("EstablishmentNumber" as integer) as establishment_number,
cast(nullif("EstablishmentNumber", '') as integer) as establishment_number,
"EstablishmentName" as school_name,
"TypeOfEstablishment (name)" as school_type,
"PhaseOfEducation (name)" as phase,
@@ -18,7 +18,7 @@ renamed as (
"ReligiousCharacter (name)" as religious_character,
"AdmissionsPolicy (name)" as admissions_policy,
"SchoolCapacity" as capacity,
cast("NumberOfPupils" as integer) as total_pupils,
cast(nullif("NumberOfPupils", '') as integer) as total_pupils,
"HeadTitle (name)" as head_title,
"HeadFirstName" as head_first_name,
"HeadLastName" as head_last_name,
@@ -30,18 +30,17 @@ renamed as (
"County (name)" as county,
"Postcode" as postcode,
"EstablishmentStatus (name)" as status,
cast("OpenDate" as date) as open_date,
cast("CloseDate" as date) as close_date,
case when "OpenDate" = '' then null else cast("OpenDate" as date) end as open_date,
case when "CloseDate" = '' then null else cast("CloseDate" as date) end as close_date,
"Trusts (name)" as academy_trust_name,
cast("Trusts (code)" as integer) as academy_trust_uid,
cast(nullif("Trusts (code)", '') as integer) as academy_trust_uid,
"UrbanRural (name)" as urban_rural,
"ParliamentaryConstituency (name)" as parliamentary_constituency,
"NurseryProvision (name)" as nursery_provision,
cast("Easting" as integer) as easting,
cast("Northing" as integer) as northing,
-- Age range
cast("StatutoryLowAge" as integer) as statutory_low_age,
cast("StatutoryHighAge" as integer) as statutory_high_age
cast(nullif("Easting", '') as integer) as easting,
cast(nullif("Northing", '') as integer) as northing,
cast(nullif("StatutoryLowAge", '') as integer) as statutory_low_age,
cast(nullif("StatutoryHighAge", '') as integer) as statutory_high_age
from source
where "URN" is not null
)

View File

@@ -9,7 +9,7 @@ renamed as (
cast("URN" as integer) as urn,
cast("LinkURN" as integer) as linked_urn,
"LinkType" as link_type,
cast("LinkEstablishedDate" as date) as link_date
case when "LinkEstablishedDate" = '' then null else cast("LinkEstablishedDate" as date) end as link_date
from source
where "URN" is not null
and "LinkURN" is not null