Files
school_compare/backend/schemas.py

428 lines
16 KiB
Python
Raw Normal View History

2026-01-06 16:30:32 +00:00
"""
Schema definitions: column mappings, metric definitions, school type mappings.
Single source of truth for all data transformations.
"""
# Column name mappings from DfE CSV to API field names
COLUMN_MAPPINGS = {
'URN': 'urn',
'SCHNAME': 'school_name',
'ADDRESS1': 'address1',
'ADDRESS2': 'address2',
'TOWN': 'town',
'PCODE': 'postcode',
'NFTYPE': 'school_type_code',
'RELDENOM': 'religious_denomination',
'AGERANGE': 'age_range',
'TOTPUPS': 'total_pupils',
'TELIG': 'eligible_pupils',
# Core KS2 metrics
'PTRWM_EXP': 'rwm_expected_pct',
'PTRWM_HIGH': 'rwm_high_pct',
'READPROG': 'reading_progress',
'WRITPROG': 'writing_progress',
'MATPROG': 'maths_progress',
'PTREAD_EXP': 'reading_expected_pct',
'PTWRITTA_EXP': 'writing_expected_pct',
'PTMAT_EXP': 'maths_expected_pct',
'READ_AVERAGE': 'reading_avg_score',
'MAT_AVERAGE': 'maths_avg_score',
'PTREAD_HIGH': 'reading_high_pct',
'PTWRITTA_HIGH': 'writing_high_pct',
'PTMAT_HIGH': 'maths_high_pct',
# GPS (Grammar, Punctuation & Spelling)
'PTGPS_EXP': 'gps_expected_pct',
'PTGPS_HIGH': 'gps_high_pct',
'GPS_AVERAGE': 'gps_avg_score',
# Science
'PTSCITA_EXP': 'science_expected_pct',
# School context
'PTFSM6CLA1A': 'disadvantaged_pct',
'PTEALGRP2': 'eal_pct',
'PSENELK': 'sen_support_pct',
'PSENELE': 'sen_ehcp_pct',
'PTMOBN': 'stability_pct',
# Gender breakdown
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
# Disadvantaged performance
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
'DIFFN_RWM_EXP': 'disadvantaged_gap',
# 3-year averages
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
'READ_AVERAGE_3YR': 'reading_avg_3yr',
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
}
# Numeric columns that need parsing
NUMERIC_COLUMNS = [
# Core metrics
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
'writing_progress', 'maths_progress', 'reading_expected_pct',
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
# GPS & Science
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
# School context
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
# Gender breakdown
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
'rwm_high_boys_pct', 'rwm_high_girls_pct',
# Disadvantaged performance
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
# 3-year averages
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
]
# School type code to name mapping
SCHOOL_TYPE_MAP = {
'AC': 'Academy',
'ACC': 'Academy Converter',
'ACS': 'Academy Sponsor Led',
'CY': 'Community School',
'VA': 'Voluntary Aided',
'VC': 'Voluntary Controlled',
'FD': 'Foundation',
'F': 'Foundation',
'FS': 'Free School',
}
# Special values to treat as null
NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
# KS2 Metric definitions - single source of truth
# Used by both backend API and frontend
METRIC_DEFINITIONS = {
# Expected Standard
"rwm_expected_pct": {
"name": "RWM Combined %",
"short_name": "RWM %",
"description": "% meeting expected standard in reading, writing and maths",
"type": "percentage",
"category": "expected"
},
"reading_expected_pct": {
"name": "Reading Expected %",
"short_name": "Reading %",
"description": "% meeting expected standard in reading",
"type": "percentage",
"category": "expected"
},
"writing_expected_pct": {
"name": "Writing Expected %",
"short_name": "Writing %",
"description": "% meeting expected standard in writing",
"type": "percentage",
"category": "expected"
},
"maths_expected_pct": {
"name": "Maths Expected %",
"short_name": "Maths %",
"description": "% meeting expected standard in maths",
"type": "percentage",
"category": "expected"
},
"gps_expected_pct": {
"name": "GPS Expected %",
"short_name": "GPS %",
"description": "% meeting expected standard in grammar, punctuation & spelling",
"type": "percentage",
"category": "expected"
},
"science_expected_pct": {
"name": "Science Expected %",
"short_name": "Science %",
"description": "% meeting expected standard in science",
"type": "percentage",
"category": "expected"
},
# Higher Standard
"rwm_high_pct": {
"name": "RWM Combined Higher %",
"short_name": "RWM Higher %",
"description": "% achieving higher standard in RWM combined",
"type": "percentage",
"category": "higher"
},
"reading_high_pct": {
"name": "Reading Higher %",
"short_name": "Reading Higher %",
"description": "% achieving higher standard in reading",
"type": "percentage",
"category": "higher"
},
"writing_high_pct": {
"name": "Writing Higher %",
"short_name": "Writing Higher %",
"description": "% achieving greater depth in writing",
"type": "percentage",
"category": "higher"
},
"maths_high_pct": {
"name": "Maths Higher %",
"short_name": "Maths Higher %",
"description": "% achieving higher standard in maths",
"type": "percentage",
"category": "higher"
},
"gps_high_pct": {
"name": "GPS Higher %",
"short_name": "GPS Higher %",
"description": "% achieving higher standard in GPS",
"type": "percentage",
"category": "higher"
},
# Progress Scores
"reading_progress": {
"name": "Reading Progress",
"short_name": "Reading Progress",
"description": "Progress in reading from KS1 to KS2",
"type": "score",
"category": "progress"
},
"writing_progress": {
"name": "Writing Progress",
"short_name": "Writing Progress",
"description": "Progress in writing from KS1 to KS2",
"type": "score",
"category": "progress"
},
"maths_progress": {
"name": "Maths Progress",
"short_name": "Maths Progress",
"description": "Progress in maths from KS1 to KS2",
"type": "score",
"category": "progress"
},
# Average Scores
"reading_avg_score": {
"name": "Reading Average Score",
"short_name": "Reading Avg",
"description": "Average scaled score in reading",
"type": "score",
"category": "average"
},
"maths_avg_score": {
"name": "Maths Average Score",
"short_name": "Maths Avg",
"description": "Average scaled score in maths",
"type": "score",
"category": "average"
},
"gps_avg_score": {
"name": "GPS Average Score",
"short_name": "GPS Avg",
"description": "Average scaled score in GPS",
"type": "score",
"category": "average"
},
# Gender Performance
"rwm_expected_boys_pct": {
"name": "RWM Expected % (Boys)",
"short_name": "Boys RWM %",
"description": "% of boys meeting expected standard",
"type": "percentage",
"category": "gender"
},
"rwm_expected_girls_pct": {
"name": "RWM Expected % (Girls)",
"short_name": "Girls RWM %",
"description": "% of girls meeting expected standard",
"type": "percentage",
"category": "gender"
},
"rwm_high_boys_pct": {
"name": "RWM Higher % (Boys)",
"short_name": "Boys Higher %",
"description": "% of boys at higher standard",
"type": "percentage",
"category": "gender"
},
"rwm_high_girls_pct": {
"name": "RWM Higher % (Girls)",
"short_name": "Girls Higher %",
"description": "% of girls at higher standard",
"type": "percentage",
"category": "gender"
},
# Disadvantaged Performance
"rwm_expected_disadvantaged_pct": {
"name": "RWM Expected % (Disadvantaged)",
"short_name": "Disadvantaged %",
"description": "% of disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
},
"rwm_expected_non_disadvantaged_pct": {
"name": "RWM Expected % (Non-Disadvantaged)",
"short_name": "Non-Disadv %",
"description": "% of non-disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
},
"disadvantaged_gap": {
"name": "Disadvantaged Gap",
"short_name": "Disadv Gap",
"description": "Gap between disadvantaged and national non-disadvantaged",
"type": "score",
"category": "equity"
},
# School Context
"disadvantaged_pct": {
"name": "% Disadvantaged Pupils",
"short_name": "% Disadvantaged",
"description": "% of pupils eligible for free school meals or looked after",
"type": "percentage",
"category": "context"
},
"eal_pct": {
"name": "% EAL Pupils",
"short_name": "% EAL",
"description": "% of pupils with English as additional language",
"type": "percentage",
"category": "context"
},
"sen_support_pct": {
"name": "% SEN Support",
"short_name": "% SEN",
"description": "% of pupils with SEN support",
"type": "percentage",
"category": "context"
},
"stability_pct": {
"name": "% Pupil Stability",
"short_name": "% Stable",
"description": "% of non-mobile pupils (stayed at school)",
"type": "percentage",
"category": "context"
},
# 3-Year Averages
"rwm_expected_3yr_pct": {
"name": "RWM Expected % (3-Year Avg)",
"short_name": "RWM 3yr %",
"description": "3-year average % meeting expected",
"type": "percentage",
"category": "trends"
},
"reading_avg_3yr": {
"name": "Reading Score (3-Year Avg)",
"short_name": "Reading 3yr",
"description": "3-year average reading score",
"type": "score",
"category": "trends"
},
"maths_avg_3yr": {
"name": "Maths Score (3-Year Avg)",
"short_name": "Maths 3yr",
"description": "3-year average maths score",
"type": "score",
"category": "trends"
},
}
# Ranking columns to include in rankings response
RANKING_COLUMNS = [
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
# Core expected
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
"gps_expected_pct", "science_expected_pct",
# Core higher
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
# Progress & averages
"reading_progress", "writing_progress", "maths_progress",
"reading_avg_score", "maths_avg_score", "gps_avg_score",
# Gender
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
# Equity
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
# Context
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
# 3-year
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
]
# School listing columns
SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
# Local Authority code to name mapping (for fallback when LANAME column missing)
# Source: https://www.gov.uk/government/publications/local-authority-codes
LA_CODE_TO_NAME = {
2026-01-06 16:42:06 +00:00
# Inner London
2026-01-06 16:30:32 +00:00
201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
2026-01-06 16:42:06 +00:00
212: "Wandsworth", 213: "Westminster",
# Outer London
301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
# West Midlands
330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
# Merseyside
340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
# Greater Manchester
350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
# South Yorkshire
370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
# West Yorkshire
380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
# Tyne and Wear
390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
393: "South Tyneside", 394: "Sunderland",
# Isles of Scilly
2026-01-06 16:30:32 +00:00
420: "Isles of Scilly",
2026-01-06 16:42:06 +00:00
# Unitary authorities (800+)
800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
812: "North East Lincolnshire", 813: "North Lincolnshire",
815: "North Yorkshire", 816: "York",
820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
823: "West Northamptonshire", 824: "North Northamptonshire",
825: "Buckinghamshire", 826: "Milton Keynes",
830: "Derbyshire", 831: "Derby",
835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836)
839: "Durham", 840: "Darlington",
841: "East Sussex", 845: "Brighton and Hove",
846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
855: "Leicestershire", 856: "Leicester", 857: "Rutland",
860: "Staffordshire", 861: "Stoke-on-Trent",
865: "Wiltshire", 866: "Swindon",
867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
870: "Reading", 871: "Slough", 872: "Wokingham",
873: "Cambridgeshire", 874: "Peterborough",
876: "Halton", 877: "Warrington",
878: "Devon", 879: "Plymouth", 880: "Torbay",
881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
884: "Herefordshire", 885: "Worcestershire",
886: "Kent", 887: "Medway",
888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
891: "Nottinghamshire", 892: "Nottingham",
893: "Shropshire", 894: "Telford and Wrekin",
895: "Cheshire East", 896: "Cheshire West and Chester",
# County councils (900+)
908: "Cornwall", 909: "Cumbria",
916: "Gloucestershire", 919: "Hertfordshire",
921: "Norfolk", 925: "Lincolnshire",
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
928: "Northumberland", 929: "Oxfordshire",
931: "Somerset", 933: "Suffolk", 935: "Surrey",
936: "Warwickshire", 937: "West Sussex",
# New authorities (2023 reorganization)
938: "Westmorland and Furness", 940: "Cumberland",
941: "North Yorkshire", # New unitary
942: "Somerset", # New unitary (replaced 931)
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
2026-01-06 16:30:32 +00:00
}