All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 58s
428 lines
16 KiB
Python
428 lines
16 KiB
Python
"""
|
|
Schema definitions: column mappings, metric definitions, school type mappings.
|
|
Single source of truth for all data transformations.
|
|
"""
|
|
|
|
# Column name mappings from DfE CSV to API field names
|
|
COLUMN_MAPPINGS = {
|
|
'URN': 'urn',
|
|
'SCHNAME': 'school_name',
|
|
'ADDRESS1': 'address1',
|
|
'ADDRESS2': 'address2',
|
|
'TOWN': 'town',
|
|
'PCODE': 'postcode',
|
|
'NFTYPE': 'school_type_code',
|
|
'RELDENOM': 'religious_denomination',
|
|
'AGERANGE': 'age_range',
|
|
'TOTPUPS': 'total_pupils',
|
|
'TELIG': 'eligible_pupils',
|
|
# Core KS2 metrics
|
|
'PTRWM_EXP': 'rwm_expected_pct',
|
|
'PTRWM_HIGH': 'rwm_high_pct',
|
|
'READPROG': 'reading_progress',
|
|
'WRITPROG': 'writing_progress',
|
|
'MATPROG': 'maths_progress',
|
|
'PTREAD_EXP': 'reading_expected_pct',
|
|
'PTWRITTA_EXP': 'writing_expected_pct',
|
|
'PTMAT_EXP': 'maths_expected_pct',
|
|
'READ_AVERAGE': 'reading_avg_score',
|
|
'MAT_AVERAGE': 'maths_avg_score',
|
|
'PTREAD_HIGH': 'reading_high_pct',
|
|
'PTWRITTA_HIGH': 'writing_high_pct',
|
|
'PTMAT_HIGH': 'maths_high_pct',
|
|
# GPS (Grammar, Punctuation & Spelling)
|
|
'PTGPS_EXP': 'gps_expected_pct',
|
|
'PTGPS_HIGH': 'gps_high_pct',
|
|
'GPS_AVERAGE': 'gps_avg_score',
|
|
# Science
|
|
'PTSCITA_EXP': 'science_expected_pct',
|
|
# School context
|
|
'PTFSM6CLA1A': 'disadvantaged_pct',
|
|
'PTEALGRP2': 'eal_pct',
|
|
'PSENELK': 'sen_support_pct',
|
|
'PSENELE': 'sen_ehcp_pct',
|
|
'PTMOBN': 'stability_pct',
|
|
# Gender breakdown
|
|
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
|
|
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
|
|
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
|
|
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
|
|
# Disadvantaged performance
|
|
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
|
|
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
|
|
'DIFFN_RWM_EXP': 'disadvantaged_gap',
|
|
# 3-year averages
|
|
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
|
|
'READ_AVERAGE_3YR': 'reading_avg_3yr',
|
|
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
|
|
}
|
|
|
|
# Numeric columns that need parsing
|
|
NUMERIC_COLUMNS = [
|
|
# Core metrics
|
|
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
|
|
'writing_progress', 'maths_progress', 'reading_expected_pct',
|
|
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
|
|
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
|
|
# GPS & Science
|
|
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
|
|
# School context
|
|
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
|
|
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
|
|
# Gender breakdown
|
|
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
|
|
'rwm_high_boys_pct', 'rwm_high_girls_pct',
|
|
# Disadvantaged performance
|
|
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
|
|
# 3-year averages
|
|
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
|
|
]
|
|
|
|
# School type code to name mapping
|
|
SCHOOL_TYPE_MAP = {
|
|
'AC': 'Academy',
|
|
'ACC': 'Academy Converter',
|
|
'ACS': 'Academy Sponsor Led',
|
|
'CY': 'Community School',
|
|
'VA': 'Voluntary Aided',
|
|
'VC': 'Voluntary Controlled',
|
|
'FD': 'Foundation',
|
|
'F': 'Foundation',
|
|
'FS': 'Free School',
|
|
}
|
|
|
|
# Special values to treat as null
|
|
NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
|
|
|
|
# KS2 Metric definitions - single source of truth
|
|
# Used by both backend API and frontend
|
|
METRIC_DEFINITIONS = {
|
|
# Expected Standard
|
|
"rwm_expected_pct": {
|
|
"name": "RWM Combined %",
|
|
"short_name": "RWM %",
|
|
"description": "% meeting expected standard in reading, writing and maths",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
"reading_expected_pct": {
|
|
"name": "Reading Expected %",
|
|
"short_name": "Reading %",
|
|
"description": "% meeting expected standard in reading",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
"writing_expected_pct": {
|
|
"name": "Writing Expected %",
|
|
"short_name": "Writing %",
|
|
"description": "% meeting expected standard in writing",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
"maths_expected_pct": {
|
|
"name": "Maths Expected %",
|
|
"short_name": "Maths %",
|
|
"description": "% meeting expected standard in maths",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
"gps_expected_pct": {
|
|
"name": "GPS Expected %",
|
|
"short_name": "GPS %",
|
|
"description": "% meeting expected standard in grammar, punctuation & spelling",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
"science_expected_pct": {
|
|
"name": "Science Expected %",
|
|
"short_name": "Science %",
|
|
"description": "% meeting expected standard in science",
|
|
"type": "percentage",
|
|
"category": "expected"
|
|
},
|
|
# Higher Standard
|
|
"rwm_high_pct": {
|
|
"name": "RWM Combined Higher %",
|
|
"short_name": "RWM Higher %",
|
|
"description": "% achieving higher standard in RWM combined",
|
|
"type": "percentage",
|
|
"category": "higher"
|
|
},
|
|
"reading_high_pct": {
|
|
"name": "Reading Higher %",
|
|
"short_name": "Reading Higher %",
|
|
"description": "% achieving higher standard in reading",
|
|
"type": "percentage",
|
|
"category": "higher"
|
|
},
|
|
"writing_high_pct": {
|
|
"name": "Writing Higher %",
|
|
"short_name": "Writing Higher %",
|
|
"description": "% achieving greater depth in writing",
|
|
"type": "percentage",
|
|
"category": "higher"
|
|
},
|
|
"maths_high_pct": {
|
|
"name": "Maths Higher %",
|
|
"short_name": "Maths Higher %",
|
|
"description": "% achieving higher standard in maths",
|
|
"type": "percentage",
|
|
"category": "higher"
|
|
},
|
|
"gps_high_pct": {
|
|
"name": "GPS Higher %",
|
|
"short_name": "GPS Higher %",
|
|
"description": "% achieving higher standard in GPS",
|
|
"type": "percentage",
|
|
"category": "higher"
|
|
},
|
|
# Progress Scores
|
|
"reading_progress": {
|
|
"name": "Reading Progress",
|
|
"short_name": "Reading Progress",
|
|
"description": "Progress in reading from KS1 to KS2",
|
|
"type": "score",
|
|
"category": "progress"
|
|
},
|
|
"writing_progress": {
|
|
"name": "Writing Progress",
|
|
"short_name": "Writing Progress",
|
|
"description": "Progress in writing from KS1 to KS2",
|
|
"type": "score",
|
|
"category": "progress"
|
|
},
|
|
"maths_progress": {
|
|
"name": "Maths Progress",
|
|
"short_name": "Maths Progress",
|
|
"description": "Progress in maths from KS1 to KS2",
|
|
"type": "score",
|
|
"category": "progress"
|
|
},
|
|
# Average Scores
|
|
"reading_avg_score": {
|
|
"name": "Reading Average Score",
|
|
"short_name": "Reading Avg",
|
|
"description": "Average scaled score in reading",
|
|
"type": "score",
|
|
"category": "average"
|
|
},
|
|
"maths_avg_score": {
|
|
"name": "Maths Average Score",
|
|
"short_name": "Maths Avg",
|
|
"description": "Average scaled score in maths",
|
|
"type": "score",
|
|
"category": "average"
|
|
},
|
|
"gps_avg_score": {
|
|
"name": "GPS Average Score",
|
|
"short_name": "GPS Avg",
|
|
"description": "Average scaled score in GPS",
|
|
"type": "score",
|
|
"category": "average"
|
|
},
|
|
# Gender Performance
|
|
"rwm_expected_boys_pct": {
|
|
"name": "RWM Expected % (Boys)",
|
|
"short_name": "Boys RWM %",
|
|
"description": "% of boys meeting expected standard",
|
|
"type": "percentage",
|
|
"category": "gender"
|
|
},
|
|
"rwm_expected_girls_pct": {
|
|
"name": "RWM Expected % (Girls)",
|
|
"short_name": "Girls RWM %",
|
|
"description": "% of girls meeting expected standard",
|
|
"type": "percentage",
|
|
"category": "gender"
|
|
},
|
|
"rwm_high_boys_pct": {
|
|
"name": "RWM Higher % (Boys)",
|
|
"short_name": "Boys Higher %",
|
|
"description": "% of boys at higher standard",
|
|
"type": "percentage",
|
|
"category": "gender"
|
|
},
|
|
"rwm_high_girls_pct": {
|
|
"name": "RWM Higher % (Girls)",
|
|
"short_name": "Girls Higher %",
|
|
"description": "% of girls at higher standard",
|
|
"type": "percentage",
|
|
"category": "gender"
|
|
},
|
|
# Disadvantaged Performance
|
|
"rwm_expected_disadvantaged_pct": {
|
|
"name": "RWM Expected % (Disadvantaged)",
|
|
"short_name": "Disadvantaged %",
|
|
"description": "% of disadvantaged pupils meeting expected",
|
|
"type": "percentage",
|
|
"category": "equity"
|
|
},
|
|
"rwm_expected_non_disadvantaged_pct": {
|
|
"name": "RWM Expected % (Non-Disadvantaged)",
|
|
"short_name": "Non-Disadv %",
|
|
"description": "% of non-disadvantaged pupils meeting expected",
|
|
"type": "percentage",
|
|
"category": "equity"
|
|
},
|
|
"disadvantaged_gap": {
|
|
"name": "Disadvantaged Gap",
|
|
"short_name": "Disadv Gap",
|
|
"description": "Gap between disadvantaged and national non-disadvantaged",
|
|
"type": "score",
|
|
"category": "equity"
|
|
},
|
|
# School Context
|
|
"disadvantaged_pct": {
|
|
"name": "% Disadvantaged Pupils",
|
|
"short_name": "% Disadvantaged",
|
|
"description": "% of pupils eligible for free school meals or looked after",
|
|
"type": "percentage",
|
|
"category": "context"
|
|
},
|
|
"eal_pct": {
|
|
"name": "% EAL Pupils",
|
|
"short_name": "% EAL",
|
|
"description": "% of pupils with English as additional language",
|
|
"type": "percentage",
|
|
"category": "context"
|
|
},
|
|
"sen_support_pct": {
|
|
"name": "% SEN Support",
|
|
"short_name": "% SEN",
|
|
"description": "% of pupils with SEN support",
|
|
"type": "percentage",
|
|
"category": "context"
|
|
},
|
|
"stability_pct": {
|
|
"name": "% Pupil Stability",
|
|
"short_name": "% Stable",
|
|
"description": "% of non-mobile pupils (stayed at school)",
|
|
"type": "percentage",
|
|
"category": "context"
|
|
},
|
|
# 3-Year Averages
|
|
"rwm_expected_3yr_pct": {
|
|
"name": "RWM Expected % (3-Year Avg)",
|
|
"short_name": "RWM 3yr %",
|
|
"description": "3-year average % meeting expected",
|
|
"type": "percentage",
|
|
"category": "trends"
|
|
},
|
|
"reading_avg_3yr": {
|
|
"name": "Reading Score (3-Year Avg)",
|
|
"short_name": "Reading 3yr",
|
|
"description": "3-year average reading score",
|
|
"type": "score",
|
|
"category": "trends"
|
|
},
|
|
"maths_avg_3yr": {
|
|
"name": "Maths Score (3-Year Avg)",
|
|
"short_name": "Maths 3yr",
|
|
"description": "3-year average maths score",
|
|
"type": "score",
|
|
"category": "trends"
|
|
},
|
|
}
|
|
|
|
# Ranking columns to include in rankings response
|
|
RANKING_COLUMNS = [
|
|
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
|
|
# Core expected
|
|
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
|
|
"gps_expected_pct", "science_expected_pct",
|
|
# Core higher
|
|
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
|
|
# Progress & averages
|
|
"reading_progress", "writing_progress", "maths_progress",
|
|
"reading_avg_score", "maths_avg_score", "gps_avg_score",
|
|
# Gender
|
|
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
|
|
# Equity
|
|
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
|
|
# Context
|
|
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
|
|
# 3-year
|
|
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
|
|
]
|
|
|
|
# School listing columns
|
|
SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
|
|
|
|
# Local Authority code to name mapping (for fallback when LANAME column missing)
|
|
# Source: https://www.gov.uk/government/publications/local-authority-codes
|
|
LA_CODE_TO_NAME = {
|
|
# Inner London
|
|
201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
|
|
205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
|
|
208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
|
|
212: "Wandsworth", 213: "Westminster",
|
|
# Outer London
|
|
301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
|
|
305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
|
|
310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
|
|
314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
|
|
318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
|
|
# West Midlands
|
|
330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
|
|
334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
|
|
# Merseyside
|
|
340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
|
|
# Greater Manchester
|
|
350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
|
|
355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
|
|
# South Yorkshire
|
|
370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
|
|
# West Yorkshire
|
|
380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
|
|
# Tyne and Wear
|
|
390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
|
|
393: "South Tyneside", 394: "Sunderland",
|
|
# Isles of Scilly
|
|
420: "Isles of Scilly",
|
|
# Unitary authorities (800+)
|
|
800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
|
|
803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
|
|
807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
|
|
810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
|
|
812: "North East Lincolnshire", 813: "North Lincolnshire",
|
|
815: "North Yorkshire", 816: "York",
|
|
820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
|
|
823: "West Northamptonshire", 824: "North Northamptonshire",
|
|
825: "Buckinghamshire", 826: "Milton Keynes",
|
|
830: "Derbyshire", 831: "Derby",
|
|
835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
|
|
837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836)
|
|
839: "Durham", 840: "Darlington",
|
|
841: "East Sussex", 845: "Brighton and Hove",
|
|
846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
|
|
855: "Leicestershire", 856: "Leicester", 857: "Rutland",
|
|
860: "Staffordshire", 861: "Stoke-on-Trent",
|
|
865: "Wiltshire", 866: "Swindon",
|
|
867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
|
|
870: "Reading", 871: "Slough", 872: "Wokingham",
|
|
873: "Cambridgeshire", 874: "Peterborough",
|
|
876: "Halton", 877: "Warrington",
|
|
878: "Devon", 879: "Plymouth", 880: "Torbay",
|
|
881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
|
|
884: "Herefordshire", 885: "Worcestershire",
|
|
886: "Kent", 887: "Medway",
|
|
888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
|
|
891: "Nottinghamshire", 892: "Nottingham",
|
|
893: "Shropshire", 894: "Telford and Wrekin",
|
|
895: "Cheshire East", 896: "Cheshire West and Chester",
|
|
# County councils (900+)
|
|
908: "Cornwall", 909: "Cumbria",
|
|
916: "Gloucestershire", 919: "Hertfordshire",
|
|
921: "Norfolk", 925: "Lincolnshire",
|
|
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
|
|
928: "Northumberland", 929: "Oxfordshire",
|
|
931: "Somerset", 933: "Suffolk", 935: "Surrey",
|
|
936: "Warwickshire", 937: "West Sussex",
|
|
# New authorities (2023 reorganization)
|
|
938: "Westmorland and Furness", 940: "Cumberland",
|
|
941: "North Yorkshire", # New unitary
|
|
942: "Somerset", # New unitary (replaced 931)
|
|
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
|
|
}
|
|
|