From f3a8ebdb4b1d9268046870bb5bb097837e6e7397 Mon Sep 17 00:00:00 2001 From: Tudor Date: Sat, 28 Mar 2026 18:58:50 +0000 Subject: [PATCH] fix(dbt): deduplicate int_ks4_with_lineage predecessor rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple predecessor URNs exist for the same current school and year, use DISTINCT ON to keep the one with the most pupils — matching the same logic already in int_ks2_with_lineage. Co-Authored-By: Claude Sonnet 4.6 --- .../transform/models/intermediate/int_ks4_with_lineage.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipeline/transform/models/intermediate/int_ks4_with_lineage.sql b/pipeline/transform/models/intermediate/int_ks4_with_lineage.sql index 124992f..533052a 100644 --- a/pipeline/transform/models/intermediate/int_ks4_with_lineage.sql +++ b/pipeline/transform/models/intermediate/int_ks4_with_lineage.sql @@ -16,7 +16,8 @@ with current_ks4 as ( ), predecessor_ks4 as ( - select + -- If multiple predecessors have data for the same year, keep the one with most pupils. + select distinct on (lin.current_urn, ks4.year) lin.current_urn, ks4.urn as source_urn, ks4.year, ks4.total_pupils, ks4.eligible_pupils, ks4.prior_attainment_avg, @@ -35,6 +36,7 @@ predecessor_ks4 as ( where curr.urn = lin.current_urn and curr.year = ks4.year ) + order by lin.current_urn, ks4.year, ks4.total_pupils desc nulls last ), combined as (