From 2b757e556d7a4a228434d5a327b6c91359228c64 Mon Sep 17 00:00:00 2001 From: Tudor Sitaru Date: Wed, 1 Apr 2026 13:07:51 +0100 Subject: [PATCH] fix(legacy-ks2): strip % suffix from percentage values Old DfE CSVs encode percentages as "57%" not "57". The safe_numeric macro rejects non-numeric strings, so strip the suffix before emitting. Co-Authored-By: Claude Opus 4.6 --- pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py index 7b81fac..cbf26d3 100644 --- a/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py +++ b/pipeline/plugins/extractors/tap-uk-ees/tap_uk_ees/tap.py @@ -592,7 +592,11 @@ class LegacyKS2Stream(Stream): for _, row in df.iterrows(): record = {"year": year_code} for old_col, new_col in _LEGACY_KS2_COLUMN_MAP.items(): - record[new_col] = row.get(old_col, "") + val = row.get(old_col, "") + # Strip % suffix — old DfE CSVs use "57%" not "57" + if isinstance(val, str) and val.endswith("%"): + val = val[:-1] + record[new_col] = val yield record