fix(tap-uk-ees): case-insensitive URN column matching for older census files

Older census CSVs use 'URN' (uppercase) while the stream expects 'urn'. Normalise the column name before filtering and emitting records. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 22:36:16 +01:00
parent 752abd69a5
commit fc011c6547
1 changed files with 7 additions and 1 deletions
@@ -129,12 +129,18 @@ class EESDatasetStream(Stream):
                cols[0] = cols[0].lstrip("\ufeff").lstrip("ï»¿")
                df.columns = cols

+            # Normalise URN column name — older files use 'URN' instead of 'urn'
+            urn_col = self._urn_column
+            col_lower = {c.lower(): c for c in df.columns}
+            actual_urn = col_lower.get(urn_col.lower())
+            if actual_urn and actual_urn != urn_col:
+                df.rename(columns={actual_urn: urn_col}, inplace=True)
+
            # Filter to school-level data if the column exists
            if "geographic_level" in df.columns:
                df = df[df["geographic_level"] == "School"]

            # Drop rows with no URN (LA/category aggregates that slip through the level filter)
-            urn_col = self._urn_column
            if urn_col in df.columns:
                df = df[df[urn_col].notna() & (df[urn_col] != "")]