From 77f75fb6e50e59c6f282e258ba84bb850e69a30d Mon Sep 17 00:00:00 2001 From: Tudor Date: Fri, 27 Mar 2026 12:16:36 +0000 Subject: [PATCH] fix(dbt): deduplicate predecessor KS2 rows and downgrade orphan test to warn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - int_ks2_with_lineage: use DISTINCT ON (current_urn, year) in predecessor_ks2 to handle schools with multiple predecessors that both have KS2 data for the same year (e.g. two schools that merged). Keeps the predecessor with most pupils. - dbt_project.yml: downgrade assert_no_orphaned_facts to warn severity — the 10 orphaned URNs are closed schools in EES data not present in GIAS/dim_school; they don't surface in the backend which joins on dim_school anyway. Co-Authored-By: Claude Sonnet 4.6 --- pipeline/transform/dbt_project.yml | 5 +++++ .../transform/models/intermediate/int_ks2_with_lineage.sql | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pipeline/transform/dbt_project.yml b/pipeline/transform/dbt_project.yml index 86dce6a..ba23641 100644 --- a/pipeline/transform/dbt_project.yml +++ b/pipeline/transform/dbt_project.yml @@ -23,6 +23,11 @@ models: +materialized: table +schema: marts +tests: + school_compare: + assert_no_orphaned_facts: + +severity: warn + seeds: school_compare: +schema: seeds diff --git a/pipeline/transform/models/intermediate/int_ks2_with_lineage.sql b/pipeline/transform/models/intermediate/int_ks2_with_lineage.sql index e9c4066..68dc193 100644 --- a/pipeline/transform/models/intermediate/int_ks2_with_lineage.sql +++ b/pipeline/transform/models/intermediate/int_ks2_with_lineage.sql @@ -19,7 +19,8 @@ with current_ks2 as ( ), predecessor_ks2 as ( - select + -- If multiple predecessors have data for the same year, keep the one with most pupils. + select distinct on (lin.current_urn, ks2.year) lin.current_urn, ks2.urn as source_urn, ks2.year, ks2.total_pupils, ks2.eligible_pupils, @@ -40,6 +41,7 @@ predecessor_ks2 as ( where curr.urn = lin.current_urn and curr.year = ks2.year ) + order by lin.current_urn, ks2.year, ks2.total_pupils desc nulls last ), combined as (