diff --git a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py index 9f550c2..fa6031f 100644 --- a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py +++ b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py @@ -83,6 +83,9 @@ class EESDatasetStream(Stream): with zf.open(target) as f: df = pd.read_csv(f, dtype=str, keep_default_na=False, encoding=self._encoding) + # Strip UTF-8 BOM from column names (some DfE files have a BOM on the first column) + df.columns = df.columns.str.lstrip("\ufeff") + # Filter to school-level data if the column exists if "geographic_level" in df.columns: df = df[df["geographic_level"] == "School"]