diff --git a/pipeline/plugins/extractors/tap-uk-ofsted/tap_uk_ofsted/tap.py b/pipeline/plugins/extractors/tap-uk-ofsted/tap_uk_ofsted/tap.py index f1ba1f9..764bff5 100644 --- a/pipeline/plugins/extractors/tap-uk-ofsted/tap_uk_ofsted/tap.py +++ b/pipeline/plugins/extractors/tap-uk-ofsted/tap_uk_ofsted/tap.py @@ -137,7 +137,9 @@ class OfstedInspectionsStream(Stream): lines = text.split("\n") header_idx = 0 for i, line in enumerate(lines[:20]): - if "URN" in line or "urn" in line.lower(): + # Match lines where URN appears as a CSV field (start or after comma), + # not as a substring of words like "turn" or "return". + if re.search(r'(?:^|,)\s*URN\s*(?:,|$)', line): header_idx = i break df = pd.read_csv(