""" Schema definitions: column mappings, metric definitions, school type mappings. Single source of truth for all data transformations. """ # Column name mappings from DfE CSV to API field names COLUMN_MAPPINGS = { 'URN': 'urn', 'SCHNAME': 'school_name', 'ADDRESS1': 'address1', 'ADDRESS2': 'address2', 'TOWN': 'town', 'PCODE': 'postcode', 'NFTYPE': 'school_type_code', 'RELDENOM': 'religious_denomination', 'AGERANGE': 'age_range', 'TOTPUPS': 'total_pupils', 'TELIG': 'eligible_pupils', # Core KS2 metrics 'PTRWM_EXP': 'rwm_expected_pct', 'PTRWM_HIGH': 'rwm_high_pct', 'READPROG': 'reading_progress', 'WRITPROG': 'writing_progress', 'MATPROG': 'maths_progress', 'PTREAD_EXP': 'reading_expected_pct', 'PTWRITTA_EXP': 'writing_expected_pct', 'PTMAT_EXP': 'maths_expected_pct', 'READ_AVERAGE': 'reading_avg_score', 'MAT_AVERAGE': 'maths_avg_score', 'PTREAD_HIGH': 'reading_high_pct', 'PTWRITTA_HIGH': 'writing_high_pct', 'PTMAT_HIGH': 'maths_high_pct', # GPS (Grammar, Punctuation & Spelling) 'PTGPS_EXP': 'gps_expected_pct', 'PTGPS_HIGH': 'gps_high_pct', 'GPS_AVERAGE': 'gps_avg_score', # Science 'PTSCITA_EXP': 'science_expected_pct', # School context 'PTFSM6CLA1A': 'disadvantaged_pct', 'PTEALGRP2': 'eal_pct', 'PSENELK': 'sen_support_pct', 'PSENELE': 'sen_ehcp_pct', 'PTMOBN': 'stability_pct', # Gender breakdown 'PTRWM_EXP_B': 'rwm_expected_boys_pct', 'PTRWM_EXP_G': 'rwm_expected_girls_pct', 'PTRWM_HIGH_B': 'rwm_high_boys_pct', 'PTRWM_HIGH_G': 'rwm_high_girls_pct', # Disadvantaged performance 'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct', 'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct', 'DIFFN_RWM_EXP': 'disadvantaged_gap', # 3-year averages 'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct', 'READ_AVERAGE_3YR': 'reading_avg_3yr', 'MAT_AVERAGE_3YR': 'maths_avg_3yr', } # Numeric columns that need parsing NUMERIC_COLUMNS = [ # Core metrics 'rwm_expected_pct', 'rwm_high_pct', 'reading_progress', 'writing_progress', 'maths_progress', 'reading_expected_pct', 'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score', 'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct', # GPS & Science 'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct', # School context 'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct', 'sen_support_pct', 'sen_ehcp_pct', 'stability_pct', # Gender breakdown 'rwm_expected_boys_pct', 'rwm_expected_girls_pct', 'rwm_high_boys_pct', 'rwm_high_girls_pct', # Disadvantaged performance 'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap', # 3-year averages 'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr', ] # School type code to name mapping SCHOOL_TYPE_MAP = { 'AC': 'Academy', 'ACC': 'Academy Converter', 'ACS': 'Academy Sponsor Led', 'CY': 'Community School', 'VA': 'Voluntary Aided', 'VC': 'Voluntary Controlled', 'FD': 'Foundation', 'F': 'Foundation', 'FS': 'Free School', } # Special values to treat as null NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', ''] # KS2 Metric definitions - single source of truth # Used by both backend API and frontend METRIC_DEFINITIONS = { # Expected Standard "rwm_expected_pct": { "name": "RWM Combined %", "short_name": "RWM %", "description": "% meeting expected standard in reading, writing and maths", "type": "percentage", "category": "expected" }, "reading_expected_pct": { "name": "Reading Expected %", "short_name": "Reading %", "description": "% meeting expected standard in reading", "type": "percentage", "category": "expected" }, "writing_expected_pct": { "name": "Writing Expected %", "short_name": "Writing %", "description": "% meeting expected standard in writing", "type": "percentage", "category": "expected" }, "maths_expected_pct": { "name": "Maths Expected %", "short_name": "Maths %", "description": "% meeting expected standard in maths", "type": "percentage", "category": "expected" }, "gps_expected_pct": { "name": "GPS Expected %", "short_name": "GPS %", "description": "% meeting expected standard in grammar, punctuation & spelling", "type": "percentage", "category": "expected" }, "science_expected_pct": { "name": "Science Expected %", "short_name": "Science %", "description": "% meeting expected standard in science", "type": "percentage", "category": "expected" }, # Higher Standard "rwm_high_pct": { "name": "RWM Combined Higher %", "short_name": "RWM Higher %", "description": "% achieving higher standard in RWM combined", "type": "percentage", "category": "higher" }, "reading_high_pct": { "name": "Reading Higher %", "short_name": "Reading Higher %", "description": "% achieving higher standard in reading", "type": "percentage", "category": "higher" }, "writing_high_pct": { "name": "Writing Higher %", "short_name": "Writing Higher %", "description": "% achieving greater depth in writing", "type": "percentage", "category": "higher" }, "maths_high_pct": { "name": "Maths Higher %", "short_name": "Maths Higher %", "description": "% achieving higher standard in maths", "type": "percentage", "category": "higher" }, "gps_high_pct": { "name": "GPS Higher %", "short_name": "GPS Higher %", "description": "% achieving higher standard in GPS", "type": "percentage", "category": "higher" }, # Progress Scores "reading_progress": { "name": "Reading Progress", "short_name": "Reading Progress", "description": "Progress in reading from KS1 to KS2", "type": "score", "category": "progress" }, "writing_progress": { "name": "Writing Progress", "short_name": "Writing Progress", "description": "Progress in writing from KS1 to KS2", "type": "score", "category": "progress" }, "maths_progress": { "name": "Maths Progress", "short_name": "Maths Progress", "description": "Progress in maths from KS1 to KS2", "type": "score", "category": "progress" }, # Average Scores "reading_avg_score": { "name": "Reading Average Score", "short_name": "Reading Avg", "description": "Average scaled score in reading", "type": "score", "category": "average" }, "maths_avg_score": { "name": "Maths Average Score", "short_name": "Maths Avg", "description": "Average scaled score in maths", "type": "score", "category": "average" }, "gps_avg_score": { "name": "GPS Average Score", "short_name": "GPS Avg", "description": "Average scaled score in GPS", "type": "score", "category": "average" }, # Gender Performance "rwm_expected_boys_pct": { "name": "RWM Expected % (Boys)", "short_name": "Boys RWM %", "description": "% of boys meeting expected standard", "type": "percentage", "category": "gender" }, "rwm_expected_girls_pct": { "name": "RWM Expected % (Girls)", "short_name": "Girls RWM %", "description": "% of girls meeting expected standard", "type": "percentage", "category": "gender" }, "rwm_high_boys_pct": { "name": "RWM Higher % (Boys)", "short_name": "Boys Higher %", "description": "% of boys at higher standard", "type": "percentage", "category": "gender" }, "rwm_high_girls_pct": { "name": "RWM Higher % (Girls)", "short_name": "Girls Higher %", "description": "% of girls at higher standard", "type": "percentage", "category": "gender" }, # Disadvantaged Performance "rwm_expected_disadvantaged_pct": { "name": "RWM Expected % (Disadvantaged)", "short_name": "Disadvantaged %", "description": "% of disadvantaged pupils meeting expected", "type": "percentage", "category": "equity" }, "rwm_expected_non_disadvantaged_pct": { "name": "RWM Expected % (Non-Disadvantaged)", "short_name": "Non-Disadv %", "description": "% of non-disadvantaged pupils meeting expected", "type": "percentage", "category": "equity" }, "disadvantaged_gap": { "name": "Disadvantaged Gap", "short_name": "Disadv Gap", "description": "Gap between disadvantaged and national non-disadvantaged", "type": "score", "category": "equity" }, # School Context "disadvantaged_pct": { "name": "% Disadvantaged Pupils", "short_name": "% Disadvantaged", "description": "% of pupils eligible for free school meals or looked after", "type": "percentage", "category": "context" }, "eal_pct": { "name": "% EAL Pupils", "short_name": "% EAL", "description": "% of pupils with English as additional language", "type": "percentage", "category": "context" }, "sen_support_pct": { "name": "% SEN Support", "short_name": "% SEN", "description": "% of pupils with SEN support", "type": "percentage", "category": "context" }, "stability_pct": { "name": "% Pupil Stability", "short_name": "% Stable", "description": "% of non-mobile pupils (stayed at school)", "type": "percentage", "category": "context" }, # 3-Year Averages "rwm_expected_3yr_pct": { "name": "RWM Expected % (3-Year Avg)", "short_name": "RWM 3yr %", "description": "3-year average % meeting expected", "type": "percentage", "category": "trends" }, "reading_avg_3yr": { "name": "Reading Score (3-Year Avg)", "short_name": "Reading 3yr", "description": "3-year average reading score", "type": "score", "category": "trends" }, "maths_avg_3yr": { "name": "Maths Score (3-Year Avg)", "short_name": "Maths 3yr", "description": "3-year average maths score", "type": "score", "category": "trends" }, } # Ranking columns to include in rankings response RANKING_COLUMNS = [ "urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils", # Core expected "rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct", "gps_expected_pct", "science_expected_pct", # Core higher "rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct", # Progress & averages "reading_progress", "writing_progress", "maths_progress", "reading_avg_score", "maths_avg_score", "gps_avg_score", # Gender "rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct", # Equity "rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap", # Context "disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct", # 3-year "rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr", ] # School listing columns SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"] # Local Authority code to name mapping (for fallback when LANAME column missing) # Source: https://www.gov.uk/government/publications/local-authority-codes LA_CODE_TO_NAME = { # Inner London 201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney", 205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea", 208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets", 212: "Wandsworth", 213: "Westminster", # Outer London 301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent", 305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey", 310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow", 314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge", 318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest", # West Midlands 330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell", 334: "Solihull", 335: "Walsall", 336: "Wolverhampton", # Merseyside 340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral", # Greater Manchester 350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale", 355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan", # South Yorkshire 370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield", # West Yorkshire 380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield", # Tyne and Wear 390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside", 393: "South Tyneside", 394: "Sunderland", # Isles of Scilly 420: "Isles of Scilly", # Unitary authorities (800+) 800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset", 803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough", 807: "Redcar and Cleveland", 808: "Stockton-on-Tees", 810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire", 812: "North East Lincolnshire", 813: "North Lincolnshire", 815: "North Yorkshire", 816: "York", 820: "Bedford", 821: "Central Bedfordshire", 822: "Luton", 823: "West Northamptonshire", 824: "North Northamptonshire", 825: "Buckinghamshire", 826: "Milton Keynes", 830: "Derbyshire", 831: "Derby", 835: "Dorset", 836: "Bournemouth, Christchurch and Poole", 837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836) 839: "Durham", 840: "Darlington", 841: "East Sussex", 845: "Brighton and Hove", 846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight", 855: "Leicestershire", 856: "Leicester", 857: "Rutland", 860: "Staffordshire", 861: "Stoke-on-Trent", 865: "Wiltshire", 866: "Swindon", 867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire", 870: "Reading", 871: "Slough", 872: "Wokingham", 873: "Cambridgeshire", 874: "Peterborough", 876: "Halton", 877: "Warrington", 878: "Devon", 879: "Plymouth", 880: "Torbay", 881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock", 884: "Herefordshire", 885: "Worcestershire", 886: "Kent", 887: "Medway", 888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool", 891: "Nottinghamshire", 892: "Nottingham", 893: "Shropshire", 894: "Telford and Wrekin", 895: "Cheshire East", 896: "Cheshire West and Chester", # County councils (900+) 908: "Cornwall", 909: "Cumbria", 916: "Gloucestershire", 919: "Hertfordshire", 921: "Norfolk", 925: "Lincolnshire", 926: "Northamptonshire", # Historic (split into 823/824 in 2021) 928: "Northumberland", 929: "Oxfordshire", 931: "Somerset", 933: "Suffolk", 935: "Surrey", 936: "Warwickshire", 937: "West Sussex", # New authorities (2023 reorganization) 938: "Westmorland and Furness", 940: "Cumberland", 941: "North Yorkshire", # New unitary 942: "Somerset", # New unitary (replaced 931) 943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets) }