Files
school_compare/backend/schemas.py

598 lines
17 KiB
Python
Raw Normal View History

2026-01-06 16:30:32 +00:00
"""
Schema definitions: column mappings, metric definitions, school type mappings.
Single source of truth for all data transformations.
"""
# Column name mappings from DfE CSV to API field names
COLUMN_MAPPINGS = {
2026-01-06 22:06:59 +00:00
"URN": "urn",
"SCHNAME": "school_name",
"ADDRESS1": "address1",
"ADDRESS2": "address2",
"TOWN": "town",
"PCODE": "postcode",
"NFTYPE": "school_type_code",
"RELDENOM": "religious_denomination",
"AGERANGE": "age_range",
"TOTPUPS": "total_pupils",
"TELIG": "eligible_pupils",
2026-01-06 16:30:32 +00:00
# Core KS2 metrics
2026-01-06 22:06:59 +00:00
"PTRWM_EXP": "rwm_expected_pct",
"PTRWM_HIGH": "rwm_high_pct",
"READPROG": "reading_progress",
"WRITPROG": "writing_progress",
"MATPROG": "maths_progress",
"PTREAD_EXP": "reading_expected_pct",
"PTWRITTA_EXP": "writing_expected_pct",
"PTMAT_EXP": "maths_expected_pct",
"READ_AVERAGE": "reading_avg_score",
"MAT_AVERAGE": "maths_avg_score",
"PTREAD_HIGH": "reading_high_pct",
"PTWRITTA_HIGH": "writing_high_pct",
"PTMAT_HIGH": "maths_high_pct",
2026-01-06 16:30:32 +00:00
# GPS (Grammar, Punctuation & Spelling)
2026-01-06 22:06:59 +00:00
"PTGPS_EXP": "gps_expected_pct",
"PTGPS_HIGH": "gps_high_pct",
"GPS_AVERAGE": "gps_avg_score",
2026-01-06 16:30:32 +00:00
# Science
2026-01-06 22:06:59 +00:00
"PTSCITA_EXP": "science_expected_pct",
2026-01-06 16:30:32 +00:00
# School context
2026-01-06 22:06:59 +00:00
"PTFSM6CLA1A": "disadvantaged_pct",
"PTEALGRP2": "eal_pct",
"PSENELK": "sen_support_pct",
"PSENELE": "sen_ehcp_pct",
"PTMOBN": "stability_pct",
2026-01-06 16:30:32 +00:00
# Gender breakdown
2026-01-06 22:06:59 +00:00
"PTRWM_EXP_B": "rwm_expected_boys_pct",
"PTRWM_EXP_G": "rwm_expected_girls_pct",
"PTRWM_HIGH_B": "rwm_high_boys_pct",
"PTRWM_HIGH_G": "rwm_high_girls_pct",
2026-01-06 16:30:32 +00:00
# Disadvantaged performance
2026-01-06 22:06:59 +00:00
"PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
"PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
"DIFFN_RWM_EXP": "disadvantaged_gap",
2026-01-06 16:30:32 +00:00
# 3-year averages
2026-01-06 22:06:59 +00:00
"PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
"READ_AVERAGE_3YR": "reading_avg_3yr",
"MAT_AVERAGE_3YR": "maths_avg_3yr",
2026-01-06 16:30:32 +00:00
}
# Numeric columns that need parsing
NUMERIC_COLUMNS = [
# Core metrics
2026-01-06 22:06:59 +00:00
"rwm_expected_pct",
"rwm_high_pct",
"reading_progress",
"writing_progress",
"maths_progress",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"reading_avg_score",
"maths_avg_score",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
2026-01-06 16:30:32 +00:00
# GPS & Science
2026-01-06 22:06:59 +00:00
"gps_expected_pct",
"gps_high_pct",
"gps_avg_score",
"science_expected_pct",
2026-01-06 16:30:32 +00:00
# School context
2026-01-06 22:06:59 +00:00
"total_pupils",
"eligible_pupils",
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"sen_ehcp_pct",
"stability_pct",
2026-01-06 16:30:32 +00:00
# Gender breakdown
2026-01-06 22:06:59 +00:00
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
2026-01-06 16:30:32 +00:00
# Disadvantaged performance
2026-01-06 22:06:59 +00:00
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
2026-01-06 16:30:32 +00:00
# 3-year averages
2026-01-06 22:06:59 +00:00
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
2026-01-06 16:30:32 +00:00
]
# School type code to user-friendly name mapping
2026-01-06 16:30:32 +00:00
SCHOOL_TYPE_MAP = {
# Academies
2026-01-06 22:06:59 +00:00
"AC": "Academy",
"ACC": "Academy",
"ACCS": "Academy",
"ACS": "Academy (Sponsor Led)",
# Community Schools
"CY": "Community",
"CYS": "Community",
# Voluntary Schools
2026-01-06 22:06:59 +00:00
"VA": "Voluntary Aided",
"VC": "Voluntary Controlled",
# Foundation Schools
2026-01-06 22:06:59 +00:00
"FD": "Foundation",
"F": "Foundation",
"FDS": "Foundation",
# Free Schools
2026-01-06 22:06:59 +00:00
"FS": "Free School",
2026-01-06 16:30:32 +00:00
}
# Special values to treat as null
2026-01-06 22:06:59 +00:00
NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]
2026-01-06 16:30:32 +00:00
# KS2 Metric definitions - single source of truth
# Used by both backend API and frontend
METRIC_DEFINITIONS = {
# Expected Standard
"rwm_expected_pct": {
"name": "RWM Combined %",
"short_name": "RWM %",
"description": "% meeting expected standard in reading, writing and maths",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
"reading_expected_pct": {
"name": "Reading Expected %",
"short_name": "Reading %",
"description": "% meeting expected standard in reading",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
"writing_expected_pct": {
"name": "Writing Expected %",
"short_name": "Writing %",
"description": "% meeting expected standard in writing",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
"maths_expected_pct": {
"name": "Maths Expected %",
"short_name": "Maths %",
"description": "% meeting expected standard in maths",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
"gps_expected_pct": {
"name": "GPS Expected %",
"short_name": "GPS %",
"description": "% meeting expected standard in grammar, punctuation & spelling",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
"science_expected_pct": {
"name": "Science Expected %",
"short_name": "Science %",
"description": "% meeting expected standard in science",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "expected",
2026-01-06 16:30:32 +00:00
},
# Higher Standard
"rwm_high_pct": {
"name": "RWM Combined Higher %",
"short_name": "RWM Higher %",
"description": "% achieving higher standard in RWM combined",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "higher",
2026-01-06 16:30:32 +00:00
},
"reading_high_pct": {
"name": "Reading Higher %",
"short_name": "Reading Higher %",
"description": "% achieving higher standard in reading",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "higher",
2026-01-06 16:30:32 +00:00
},
"writing_high_pct": {
"name": "Writing Higher %",
"short_name": "Writing Higher %",
"description": "% achieving greater depth in writing",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "higher",
2026-01-06 16:30:32 +00:00
},
"maths_high_pct": {
"name": "Maths Higher %",
"short_name": "Maths Higher %",
"description": "% achieving higher standard in maths",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "higher",
2026-01-06 16:30:32 +00:00
},
"gps_high_pct": {
"name": "GPS Higher %",
"short_name": "GPS Higher %",
"description": "% achieving higher standard in GPS",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "higher",
2026-01-06 16:30:32 +00:00
},
# Progress Scores
"reading_progress": {
"name": "Reading Progress",
"short_name": "Reading Progress",
"description": "Progress in reading from KS1 to KS2",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "progress",
2026-01-06 16:30:32 +00:00
},
"writing_progress": {
"name": "Writing Progress",
"short_name": "Writing Progress",
"description": "Progress in writing from KS1 to KS2",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "progress",
2026-01-06 16:30:32 +00:00
},
"maths_progress": {
"name": "Maths Progress",
"short_name": "Maths Progress",
"description": "Progress in maths from KS1 to KS2",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "progress",
2026-01-06 16:30:32 +00:00
},
# Average Scores
"reading_avg_score": {
"name": "Reading Average Score",
"short_name": "Reading Avg",
"description": "Average scaled score in reading",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "average",
2026-01-06 16:30:32 +00:00
},
"maths_avg_score": {
"name": "Maths Average Score",
"short_name": "Maths Avg",
"description": "Average scaled score in maths",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "average",
2026-01-06 16:30:32 +00:00
},
"gps_avg_score": {
"name": "GPS Average Score",
"short_name": "GPS Avg",
"description": "Average scaled score in GPS",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "average",
2026-01-06 16:30:32 +00:00
},
# Gender Performance
"rwm_expected_boys_pct": {
"name": "RWM Expected % (Boys)",
"short_name": "Boys RWM %",
"description": "% of boys meeting expected standard",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "gender",
2026-01-06 16:30:32 +00:00
},
"rwm_expected_girls_pct": {
"name": "RWM Expected % (Girls)",
"short_name": "Girls RWM %",
"description": "% of girls meeting expected standard",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "gender",
2026-01-06 16:30:32 +00:00
},
"rwm_high_boys_pct": {
"name": "RWM Higher % (Boys)",
"short_name": "Boys Higher %",
"description": "% of boys at higher standard",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "gender",
2026-01-06 16:30:32 +00:00
},
"rwm_high_girls_pct": {
"name": "RWM Higher % (Girls)",
"short_name": "Girls Higher %",
"description": "% of girls at higher standard",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "gender",
2026-01-06 16:30:32 +00:00
},
# Disadvantaged Performance
"rwm_expected_disadvantaged_pct": {
"name": "RWM Expected % (Disadvantaged)",
"short_name": "Disadvantaged %",
"description": "% of disadvantaged pupils meeting expected",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "equity",
2026-01-06 16:30:32 +00:00
},
"rwm_expected_non_disadvantaged_pct": {
"name": "RWM Expected % (Non-Disadvantaged)",
"short_name": "Non-Disadv %",
"description": "% of non-disadvantaged pupils meeting expected",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "equity",
2026-01-06 16:30:32 +00:00
},
"disadvantaged_gap": {
"name": "Disadvantaged Gap",
"short_name": "Disadv Gap",
"description": "Gap between disadvantaged and national non-disadvantaged",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "equity",
2026-01-06 16:30:32 +00:00
},
# School Context
"disadvantaged_pct": {
"name": "% Disadvantaged Pupils",
"short_name": "% Disadvantaged",
"description": "% of pupils eligible for free school meals or looked after",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "context",
2026-01-06 16:30:32 +00:00
},
"eal_pct": {
"name": "% EAL Pupils",
"short_name": "% EAL",
"description": "% of pupils with English as additional language",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "context",
2026-01-06 16:30:32 +00:00
},
"sen_support_pct": {
"name": "% SEN Support",
"short_name": "% SEN",
"description": "% of pupils with SEN support",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "context",
2026-01-06 16:30:32 +00:00
},
"stability_pct": {
"name": "% Pupil Stability",
"short_name": "% Stable",
"description": "% of non-mobile pupils (stayed at school)",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "context",
2026-01-06 16:30:32 +00:00
},
# 3-Year Averages
"rwm_expected_3yr_pct": {
"name": "RWM Expected % (3-Year Avg)",
"short_name": "RWM 3yr %",
"description": "3-year average % meeting expected",
"type": "percentage",
2026-01-06 22:06:59 +00:00
"category": "trends",
2026-01-06 16:30:32 +00:00
},
"reading_avg_3yr": {
"name": "Reading Score (3-Year Avg)",
"short_name": "Reading 3yr",
"description": "3-year average reading score",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "trends",
2026-01-06 16:30:32 +00:00
},
"maths_avg_3yr": {
"name": "Maths Score (3-Year Avg)",
"short_name": "Maths 3yr",
"description": "3-year average maths score",
"type": "score",
2026-01-06 22:06:59 +00:00
"category": "trends",
2026-01-06 16:30:32 +00:00
},
}
# Ranking columns to include in rankings response
RANKING_COLUMNS = [
2026-01-06 22:06:59 +00:00
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"year",
"total_pupils",
2026-01-06 16:30:32 +00:00
# Core expected
2026-01-06 22:06:59 +00:00
"rwm_expected_pct",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"gps_expected_pct",
"science_expected_pct",
2026-01-06 16:30:32 +00:00
# Core higher
2026-01-06 22:06:59 +00:00
"rwm_high_pct",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
"gps_high_pct",
2026-01-06 16:30:32 +00:00
# Progress & averages
2026-01-06 22:06:59 +00:00
"reading_progress",
"writing_progress",
"maths_progress",
"reading_avg_score",
"maths_avg_score",
"gps_avg_score",
2026-01-06 16:30:32 +00:00
# Gender
2026-01-06 22:06:59 +00:00
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
2026-01-06 16:30:32 +00:00
# Equity
2026-01-06 22:06:59 +00:00
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
2026-01-06 16:30:32 +00:00
# Context
2026-01-06 22:06:59 +00:00
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"stability_pct",
2026-01-06 16:30:32 +00:00
# 3-year
2026-01-06 22:06:59 +00:00
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
2026-01-06 16:30:32 +00:00
]
# School listing columns
2026-01-06 22:06:59 +00:00
SCHOOL_COLUMNS = [
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"town",
"postcode",
"religious_denomination",
"age_range",
"latitude",
"longitude",
2026-01-06 22:06:59 +00:00
]
2026-01-06 16:30:32 +00:00
# Local Authority code to name mapping (for fallback when LANAME column missing)
# Source: https://www.gov.uk/government/publications/local-authority-codes
LA_CODE_TO_NAME = {
2026-01-06 16:42:06 +00:00
# Inner London
2026-01-06 22:06:59 +00:00
201: "City of London",
202: "Camden",
203: "Greenwich",
204: "Hackney",
205: "Hammersmith and Fulham",
206: "Islington",
207: "Kensington and Chelsea",
208: "Lambeth",
209: "Lewisham",
210: "Southwark",
211: "Tower Hamlets",
212: "Wandsworth",
213: "Westminster",
2026-01-06 16:42:06 +00:00
# Outer London
2026-01-06 22:06:59 +00:00
301: "Barking and Dagenham",
302: "Barnet",
303: "Bexley",
304: "Brent",
305: "Bromley",
306: "Croydon",
307: "Ealing",
308: "Enfield",
309: "Haringey",
310: "Harrow",
311: "Havering",
312: "Hillingdon",
313: "Hounslow",
314: "Kingston upon Thames",
315: "Merton",
316: "Newham",
317: "Redbridge",
318: "Richmond upon Thames",
319: "Sutton",
320: "Waltham Forest",
2026-01-06 16:42:06 +00:00
# West Midlands
2026-01-06 22:06:59 +00:00
330: "Birmingham",
331: "Coventry",
332: "Dudley",
333: "Sandwell",
334: "Solihull",
335: "Walsall",
336: "Wolverhampton",
2026-01-06 16:42:06 +00:00
# Merseyside
2026-01-06 22:06:59 +00:00
340: "Knowsley",
341: "Liverpool",
342: "St. Helens",
343: "Sefton",
344: "Wirral",
2026-01-06 16:42:06 +00:00
# Greater Manchester
2026-01-06 22:06:59 +00:00
350: "Bolton",
351: "Bury",
352: "Manchester",
353: "Oldham",
354: "Rochdale",
355: "Salford",
356: "Stockport",
357: "Tameside",
358: "Trafford",
359: "Wigan",
2026-01-06 16:42:06 +00:00
# South Yorkshire
2026-01-06 22:06:59 +00:00
370: "Barnsley",
371: "Doncaster",
372: "Rotherham",
373: "Sheffield",
2026-01-06 16:42:06 +00:00
# West Yorkshire
2026-01-06 22:06:59 +00:00
380: "Bradford",
381: "Calderdale",
382: "Kirklees",
383: "Leeds",
384: "Wakefield",
2026-01-06 16:42:06 +00:00
# Tyne and Wear
2026-01-06 22:06:59 +00:00
390: "Gateshead",
391: "Newcastle upon Tyne",
392: "North Tyneside",
393: "South Tyneside",
394: "Sunderland",
2026-01-06 16:42:06 +00:00
# Isles of Scilly
2026-01-06 16:30:32 +00:00
420: "Isles of Scilly",
2026-01-06 16:42:06 +00:00
# Unitary authorities (800+)
2026-01-06 22:06:59 +00:00
800: "Bath and North East Somerset",
801: "Bristol, City of",
802: "North Somerset",
803: "South Gloucestershire",
805: "Hartlepool",
806: "Middlesbrough",
807: "Redcar and Cleveland",
808: "Stockton-on-Tees",
810: "Kingston Upon Hull, City of",
811: "East Riding of Yorkshire",
812: "North East Lincolnshire",
813: "North Lincolnshire",
815: "North Yorkshire",
816: "York",
820: "Bedford",
821: "Central Bedfordshire",
822: "Luton",
823: "West Northamptonshire",
824: "North Northamptonshire",
825: "Buckinghamshire",
826: "Milton Keynes",
830: "Derbyshire",
831: "Derby",
835: "Dorset",
836: "Bournemouth, Christchurch and Poole",
837: "Poole",
838: "Bournemouth", # Historic codes (merged into 836)
839: "Durham",
840: "Darlington",
841: "East Sussex",
845: "Brighton and Hove",
846: "Hampshire",
850: "Portsmouth",
851: "Southampton",
852: "Isle of Wight",
855: "Leicestershire",
856: "Leicester",
857: "Rutland",
860: "Staffordshire",
861: "Stoke-on-Trent",
865: "Wiltshire",
866: "Swindon",
867: "Bracknell Forest",
868: "Windsor and Maidenhead",
869: "West Berkshire",
870: "Reading",
871: "Slough",
872: "Wokingham",
873: "Cambridgeshire",
874: "Peterborough",
876: "Halton",
877: "Warrington",
878: "Devon",
879: "Plymouth",
880: "Torbay",
881: "Essex",
882: "Southend-on-Sea",
883: "Thurrock",
884: "Herefordshire",
885: "Worcestershire",
886: "Kent",
887: "Medway",
888: "Lancashire",
889: "Blackburn with Darwen",
890: "Blackpool",
891: "Nottinghamshire",
892: "Nottingham",
893: "Shropshire",
894: "Telford and Wrekin",
895: "Cheshire East",
896: "Cheshire West and Chester",
2026-01-06 16:42:06 +00:00
# County councils (900+)
2026-01-06 22:06:59 +00:00
908: "Cornwall",
909: "Cumbria",
916: "Gloucestershire",
919: "Hertfordshire",
921: "Norfolk",
925: "Lincolnshire",
2026-01-06 16:42:06 +00:00
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
2026-01-06 22:06:59 +00:00
928: "Northumberland",
929: "Oxfordshire",
931: "Somerset",
933: "Suffolk",
935: "Surrey",
936: "Warwickshire",
937: "West Sussex",
2026-01-06 16:42:06 +00:00
# New authorities (2023 reorganization)
2026-01-06 22:06:59 +00:00
938: "Westmorland and Furness",
940: "Cumberland",
2026-01-06 16:42:06 +00:00
941: "North Yorkshire", # New unitary
942: "Somerset", # New unitary (replaced 931)
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
2026-01-06 16:30:32 +00:00
}