From 8e8d1bd8c57c79784fa8606629b44f61d6d8945d Mon Sep 17 00:00:00 2001 From: Tudor Date: Fri, 27 Mar 2026 10:13:17 +0000 Subject: [PATCH] fix(ees-tap): filter out rows with null URN before emitting The admissions school-level file contains some rows with null school_urn (LA/category aggregates that survive the geographic_level filter). These cause a not-null constraint violation at target-postgres. Drop any row where the URN column is null or empty before yielding records. Co-Authored-By: Claude Sonnet 4.6 --- pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py index 6e07728..5d00c05 100644 --- a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py +++ b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py @@ -95,6 +95,11 @@ class EESDatasetStream(Stream): if "geographic_level" in df.columns: df = df[df["geographic_level"] == "School"] + # Drop rows with no URN (LA/category aggregates that slip through the level filter) + urn_col = self._urn_column + if urn_col in df.columns: + df = df[df[urn_col].notna() & (df[urn_col] != "")] + self.logger.info("Emitting %d school-level rows", len(df)) for _, row in df.iterrows():