diff --git a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py index 7e71b97..2097d17 100644 --- a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py +++ b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py @@ -129,12 +129,18 @@ class EESDatasetStream(Stream): cols[0] = cols[0].lstrip("\ufeff").lstrip("") df.columns = cols + # Normalise URN column name — older files use 'URN' instead of 'urn' + urn_col = self._urn_column + col_lower = {c.lower(): c for c in df.columns} + actual_urn = col_lower.get(urn_col.lower()) + if actual_urn and actual_urn != urn_col: + df.rename(columns={actual_urn: urn_col}, inplace=True) + # Filter to school-level data if the column exists if "geographic_level" in df.columns: df = df[df["geographic_level"] == "School"] # Drop rows with no URN (LA/category aggregates that slip through the level filter) - urn_col = self._urn_column if urn_col in df.columns: df = df[df[urn_col].notna() & (df[urn_col] != "")]