""" Schema definitions: column mappings, metric definitions, school type mappings. Single source of truth for all data transformations. """ # Column name mappings from DfE CSV to API field names COLUMN_MAPPINGS = { "URN": "urn", "SCHNAME": "school_name", "ADDRESS1": "address1", "ADDRESS2": "address2", "TOWN": "town", "PCODE": "postcode", "NFTYPE": "school_type_code", "RELDENOM": "religious_denomination", "AGERANGE": "age_range", "TOTPUPS": "total_pupils", "TELIG": "eligible_pupils", # Core KS2 metrics "PTRWM_EXP": "rwm_expected_pct", "PTRWM_HIGH": "rwm_high_pct", "READPROG": "reading_progress", "WRITPROG": "writing_progress", "MATPROG": "maths_progress", "PTREAD_EXP": "reading_expected_pct", "PTWRITTA_EXP": "writing_expected_pct", "PTMAT_EXP": "maths_expected_pct", "READ_AVERAGE": "reading_avg_score", "MAT_AVERAGE": "maths_avg_score", "PTREAD_HIGH": "reading_high_pct", "PTWRITTA_HIGH": "writing_high_pct", "PTMAT_HIGH": "maths_high_pct", # GPS (Grammar, Punctuation & Spelling) "PTGPS_EXP": "gps_expected_pct", "PTGPS_HIGH": "gps_high_pct", "GPS_AVERAGE": "gps_avg_score", # Science "PTSCITA_EXP": "science_expected_pct", # School context "PTFSM6CLA1A": "disadvantaged_pct", "PTEALGRP2": "eal_pct", "PSENELK": "sen_support_pct", "PSENELE": "sen_ehcp_pct", "PTMOBN": "stability_pct", # Pupil absence from tests "PTREAD_AT": "reading_absence_pct", "PTGPS_AT": "gps_absence_pct", "PTMAT_AT": "maths_absence_pct", "PTWRITTA_AD": "writing_absence_pct", "PTSCITA_AD": "science_absence_pct", # Gender breakdown "PTRWM_EXP_B": "rwm_expected_boys_pct", "PTRWM_EXP_G": "rwm_expected_girls_pct", "PTRWM_HIGH_B": "rwm_high_boys_pct", "PTRWM_HIGH_G": "rwm_high_girls_pct", # Disadvantaged performance "PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct", "PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct", "DIFFN_RWM_EXP": "disadvantaged_gap", # 3-year averages "PTRWM_EXP_3YR": "rwm_expected_3yr_pct", "READ_AVERAGE_3YR": "reading_avg_3yr", "MAT_AVERAGE_3YR": "maths_avg_3yr", } # Numeric columns that need parsing NUMERIC_COLUMNS = [ # Core metrics "rwm_expected_pct", "rwm_high_pct", "reading_progress", "writing_progress", "maths_progress", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct", "reading_avg_score", "maths_avg_score", "reading_high_pct", "writing_high_pct", "maths_high_pct", # GPS & Science "gps_expected_pct", "gps_high_pct", "gps_avg_score", "science_expected_pct", # School context "total_pupils", "eligible_pupils", "disadvantaged_pct", "eal_pct", "sen_support_pct", "sen_ehcp_pct", "stability_pct", # Pupil absence from tests "reading_absence_pct", "gps_absence_pct", "maths_absence_pct", "writing_absence_pct", "science_absence_pct", # Gender breakdown "rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct", # Disadvantaged performance "rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap", # 3-year averages "rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr", ] # School type code to user-friendly name mapping SCHOOL_TYPE_MAP = { # Academies "AC": "Academy", "ACC": "Academy", "ACCS": "Academy", "ACS": "Academy (Sponsor Led)", # Community Schools "CY": "Community", "CYS": "Community", # Voluntary Schools "VA": "Voluntary Aided", "VC": "Voluntary Controlled", # Foundation Schools "FD": "Foundation", "F": "Foundation", "FDS": "Foundation", # Free Schools "FS": "Free School", } # Special values to treat as null NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""] # KS2 Metric definitions - single source of truth # Used by both backend API and frontend METRIC_DEFINITIONS = { # Expected Standard "rwm_expected_pct": { "name": "RWM Combined %", "short_name": "RWM %", "description": "% meeting expected standard in reading, writing and maths", "type": "percentage", "category": "expected", }, "reading_expected_pct": { "name": "Reading Expected %", "short_name": "Reading %", "description": "% meeting expected standard in reading", "type": "percentage", "category": "expected", }, "writing_expected_pct": { "name": "Writing Expected %", "short_name": "Writing %", "description": "% meeting expected standard in writing", "type": "percentage", "category": "expected", }, "maths_expected_pct": { "name": "Maths Expected %", "short_name": "Maths %", "description": "% meeting expected standard in maths", "type": "percentage", "category": "expected", }, "gps_expected_pct": { "name": "GPS Expected %", "short_name": "GPS %", "description": "% meeting expected standard in grammar, punctuation & spelling", "type": "percentage", "category": "expected", }, "science_expected_pct": { "name": "Science Expected %", "short_name": "Science %", "description": "% meeting expected standard in science", "type": "percentage", "category": "expected", }, # Higher Standard "rwm_high_pct": { "name": "RWM Combined Higher %", "short_name": "RWM Higher %", "description": "% achieving higher standard in RWM combined", "type": "percentage", "category": "higher", }, "reading_high_pct": { "name": "Reading Higher %", "short_name": "Reading Higher %", "description": "% achieving higher standard in reading", "type": "percentage", "category": "higher", }, "writing_high_pct": { "name": "Writing Higher %", "short_name": "Writing Higher %", "description": "% achieving greater depth in writing", "type": "percentage", "category": "higher", }, "maths_high_pct": { "name": "Maths Higher %", "short_name": "Maths Higher %", "description": "% achieving higher standard in maths", "type": "percentage", "category": "higher", }, "gps_high_pct": { "name": "GPS Higher %", "short_name": "GPS Higher %", "description": "% achieving higher standard in GPS", "type": "percentage", "category": "higher", }, # Progress Scores "reading_progress": { "name": "Reading Progress", "short_name": "Reading Progress", "description": "Progress in reading from KS1 to KS2", "type": "score", "category": "progress", }, "writing_progress": { "name": "Writing Progress", "short_name": "Writing Progress", "description": "Progress in writing from KS1 to KS2", "type": "score", "category": "progress", }, "maths_progress": { "name": "Maths Progress", "short_name": "Maths Progress", "description": "Progress in maths from KS1 to KS2", "type": "score", "category": "progress", }, # Average Scores "reading_avg_score": { "name": "Reading Average Score", "short_name": "Reading Avg", "description": "Average scaled score in reading", "type": "score", "category": "average", }, "maths_avg_score": { "name": "Maths Average Score", "short_name": "Maths Avg", "description": "Average scaled score in maths", "type": "score", "category": "average", }, "gps_avg_score": { "name": "GPS Average Score", "short_name": "GPS Avg", "description": "Average scaled score in GPS", "type": "score", "category": "average", }, # Gender Performance "rwm_expected_boys_pct": { "name": "RWM Expected % (Boys)", "short_name": "Boys RWM %", "description": "% of boys meeting expected standard", "type": "percentage", "category": "gender", }, "rwm_expected_girls_pct": { "name": "RWM Expected % (Girls)", "short_name": "Girls RWM %", "description": "% of girls meeting expected standard", "type": "percentage", "category": "gender", }, "rwm_high_boys_pct": { "name": "RWM Higher % (Boys)", "short_name": "Boys Higher %", "description": "% of boys at higher standard", "type": "percentage", "category": "gender", }, "rwm_high_girls_pct": { "name": "RWM Higher % (Girls)", "short_name": "Girls Higher %", "description": "% of girls at higher standard", "type": "percentage", "category": "gender", }, # Disadvantaged Performance "rwm_expected_disadvantaged_pct": { "name": "RWM Expected % (Disadvantaged)", "short_name": "Disadvantaged %", "description": "% of disadvantaged pupils meeting expected", "type": "percentage", "category": "equity", }, "rwm_expected_non_disadvantaged_pct": { "name": "RWM Expected % (Non-Disadvantaged)", "short_name": "Non-Disadv %", "description": "% of non-disadvantaged pupils meeting expected", "type": "percentage", "category": "equity", }, "disadvantaged_gap": { "name": "Disadvantaged Gap", "short_name": "Disadv Gap", "description": "Gap between disadvantaged and national non-disadvantaged", "type": "score", "category": "equity", }, # School Context "disadvantaged_pct": { "name": "% Disadvantaged Pupils", "short_name": "% Disadvantaged", "description": "% of pupils eligible for free school meals or looked after", "type": "percentage", "category": "context", }, "eal_pct": { "name": "% EAL Pupils", "short_name": "% EAL", "description": "% of pupils with English as additional language", "type": "percentage", "category": "context", }, "sen_support_pct": { "name": "% SEN Support", "short_name": "% SEN", "description": "% of pupils with SEN support", "type": "percentage", "category": "context", }, "stability_pct": { "name": "% Pupil Stability", "short_name": "% Stable", "description": "% of non-mobile pupils (stayed at school)", "type": "percentage", "category": "context", }, # Pupil Absence from Tests "reading_absence_pct": { "name": "Reading Test Absence %", "short_name": "Reading Absent", "description": "% of pupils absent from or unable to access the Reading test", "type": "percentage", "category": "absence", }, "gps_absence_pct": { "name": "GPS Test Absence %", "short_name": "GPS Absent", "description": "% of pupils absent from or unable to access the GPS test", "type": "percentage", "category": "absence", }, "maths_absence_pct": { "name": "Maths Test Absence %", "short_name": "Maths Absent", "description": "% of pupils absent from or unable to access the Maths test", "type": "percentage", "category": "absence", }, "writing_absence_pct": { "name": "Writing Absence %", "short_name": "Writing Absent", "description": "% of pupils absent from or disapplied in Writing assessment", "type": "percentage", "category": "absence", }, "science_absence_pct": { "name": "Science Absence %", "short_name": "Science Absent", "description": "% of pupils absent from or disapplied in Science assessment", "type": "percentage", "category": "absence", }, # 3-Year Averages "rwm_expected_3yr_pct": { "name": "RWM Expected % (3-Year Avg)", "short_name": "RWM 3yr %", "description": "3-year average % meeting expected", "type": "percentage", "category": "trends", }, "reading_avg_3yr": { "name": "Reading Score (3-Year Avg)", "short_name": "Reading 3yr", "description": "3-year average reading score", "type": "score", "category": "trends", }, "maths_avg_3yr": { "name": "Maths Score (3-Year Avg)", "short_name": "Maths 3yr", "description": "3-year average maths score", "type": "score", "category": "trends", }, } # Ranking columns to include in rankings response RANKING_COLUMNS = [ "urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils", # Core expected "rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct", "gps_expected_pct", "science_expected_pct", # Core higher "rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct", # Progress & averages "reading_progress", "writing_progress", "maths_progress", "reading_avg_score", "maths_avg_score", "gps_avg_score", # Gender "rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct", # Equity "rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap", # Context "disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct", # Absence "reading_absence_pct", "gps_absence_pct", "maths_absence_pct", "writing_absence_pct", "science_absence_pct", # 3-year "rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr", ] # School listing columns SCHOOL_COLUMNS = [ "urn", "school_name", "local_authority", "school_type", "address", "town", "postcode", "religious_denomination", "age_range", "latitude", "longitude", ] # Local Authority code to name mapping (for fallback when LANAME column missing) # Source: https://www.gov.uk/government/publications/local-authority-codes LA_CODE_TO_NAME = { # Inner London 201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney", 205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea", 208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets", 212: "Wandsworth", 213: "Westminster", # Outer London 301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent", 305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey", 310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow", 314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge", 318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest", # West Midlands 330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell", 334: "Solihull", 335: "Walsall", 336: "Wolverhampton", # Merseyside 340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral", # Greater Manchester 350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale", 355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan", # South Yorkshire 370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield", # West Yorkshire 380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield", # Tyne and Wear 390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside", 393: "South Tyneside", 394: "Sunderland", # Isles of Scilly 420: "Isles of Scilly", # Unitary authorities (800+) 800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset", 803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough", 807: "Redcar and Cleveland", 808: "Stockton-on-Tees", 810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire", 812: "North East Lincolnshire", 813: "North Lincolnshire", 815: "North Yorkshire", 816: "York", 820: "Bedford", 821: "Central Bedfordshire", 822: "Luton", 823: "West Northamptonshire", 824: "North Northamptonshire", 825: "Buckinghamshire", 826: "Milton Keynes", 830: "Derbyshire", 831: "Derby", 835: "Dorset", 836: "Bournemouth, Christchurch and Poole", 837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836) 839: "Durham", 840: "Darlington", 841: "East Sussex", 845: "Brighton and Hove", 846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight", 855: "Leicestershire", 856: "Leicester", 857: "Rutland", 860: "Staffordshire", 861: "Stoke-on-Trent", 865: "Wiltshire", 866: "Swindon", 867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire", 870: "Reading", 871: "Slough", 872: "Wokingham", 873: "Cambridgeshire", 874: "Peterborough", 876: "Halton", 877: "Warrington", 878: "Devon", 879: "Plymouth", 880: "Torbay", 881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock", 884: "Herefordshire", 885: "Worcestershire", 886: "Kent", 887: "Medway", 888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool", 891: "Nottinghamshire", 892: "Nottingham", 893: "Shropshire", 894: "Telford and Wrekin", 895: "Cheshire East", 896: "Cheshire West and Chester", # County councils (900+) 908: "Cornwall", 909: "Cumbria", 916: "Gloucestershire", 919: "Hertfordshire", 921: "Norfolk", 925: "Lincolnshire", 926: "Northamptonshire", # Historic (split into 823/824 in 2021) 928: "Northumberland", 929: "Oxfordshire", 931: "Somerset", 933: "Suffolk", 935: "Surrey", 936: "Warwickshire", 937: "West Sussex", # New authorities (2023 reorganization) 938: "Westmorland and Furness", 940: "Cumberland", 941: "North Yorkshire", # New unitary 942: "Somerset", # New unitary (replaced 931) 943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets) }