Refactoring and bug fixes
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m7s

This commit is contained in:
Tudor Sitaru
2026-01-06 16:30:32 +00:00
parent 0ea4720ac1
commit 54e4bc2e77
11 changed files with 1246 additions and 534 deletions

398
backend/schemas.py Normal file
View File

@@ -0,0 +1,398 @@
"""
Schema definitions: column mappings, metric definitions, school type mappings.
Single source of truth for all data transformations.
"""
# Column name mappings from DfE CSV to API field names
COLUMN_MAPPINGS = {
'URN': 'urn',
'SCHNAME': 'school_name',
'ADDRESS1': 'address1',
'ADDRESS2': 'address2',
'TOWN': 'town',
'PCODE': 'postcode',
'NFTYPE': 'school_type_code',
'RELDENOM': 'religious_denomination',
'AGERANGE': 'age_range',
'TOTPUPS': 'total_pupils',
'TELIG': 'eligible_pupils',
# Core KS2 metrics
'PTRWM_EXP': 'rwm_expected_pct',
'PTRWM_HIGH': 'rwm_high_pct',
'READPROG': 'reading_progress',
'WRITPROG': 'writing_progress',
'MATPROG': 'maths_progress',
'PTREAD_EXP': 'reading_expected_pct',
'PTWRITTA_EXP': 'writing_expected_pct',
'PTMAT_EXP': 'maths_expected_pct',
'READ_AVERAGE': 'reading_avg_score',
'MAT_AVERAGE': 'maths_avg_score',
'PTREAD_HIGH': 'reading_high_pct',
'PTWRITTA_HIGH': 'writing_high_pct',
'PTMAT_HIGH': 'maths_high_pct',
# GPS (Grammar, Punctuation & Spelling)
'PTGPS_EXP': 'gps_expected_pct',
'PTGPS_HIGH': 'gps_high_pct',
'GPS_AVERAGE': 'gps_avg_score',
# Science
'PTSCITA_EXP': 'science_expected_pct',
# School context
'PTFSM6CLA1A': 'disadvantaged_pct',
'PTEALGRP2': 'eal_pct',
'PSENELK': 'sen_support_pct',
'PSENELE': 'sen_ehcp_pct',
'PTMOBN': 'stability_pct',
# Gender breakdown
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
# Disadvantaged performance
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
'DIFFN_RWM_EXP': 'disadvantaged_gap',
# 3-year averages
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
'READ_AVERAGE_3YR': 'reading_avg_3yr',
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
}
# Numeric columns that need parsing
NUMERIC_COLUMNS = [
# Core metrics
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
'writing_progress', 'maths_progress', 'reading_expected_pct',
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
# GPS & Science
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
# School context
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
# Gender breakdown
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
'rwm_high_boys_pct', 'rwm_high_girls_pct',
# Disadvantaged performance
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
# 3-year averages
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
]
# School type code to name mapping
SCHOOL_TYPE_MAP = {
'AC': 'Academy',
'ACC': 'Academy Converter',
'ACS': 'Academy Sponsor Led',
'CY': 'Community School',
'VA': 'Voluntary Aided',
'VC': 'Voluntary Controlled',
'FD': 'Foundation',
'F': 'Foundation',
'FS': 'Free School',
}
# Special values to treat as null
NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
# KS2 Metric definitions - single source of truth
# Used by both backend API and frontend
METRIC_DEFINITIONS = {
# Expected Standard
"rwm_expected_pct": {
"name": "RWM Combined %",
"short_name": "RWM %",
"description": "% meeting expected standard in reading, writing and maths",
"type": "percentage",
"category": "expected"
},
"reading_expected_pct": {
"name": "Reading Expected %",
"short_name": "Reading %",
"description": "% meeting expected standard in reading",
"type": "percentage",
"category": "expected"
},
"writing_expected_pct": {
"name": "Writing Expected %",
"short_name": "Writing %",
"description": "% meeting expected standard in writing",
"type": "percentage",
"category": "expected"
},
"maths_expected_pct": {
"name": "Maths Expected %",
"short_name": "Maths %",
"description": "% meeting expected standard in maths",
"type": "percentage",
"category": "expected"
},
"gps_expected_pct": {
"name": "GPS Expected %",
"short_name": "GPS %",
"description": "% meeting expected standard in grammar, punctuation & spelling",
"type": "percentage",
"category": "expected"
},
"science_expected_pct": {
"name": "Science Expected %",
"short_name": "Science %",
"description": "% meeting expected standard in science",
"type": "percentage",
"category": "expected"
},
# Higher Standard
"rwm_high_pct": {
"name": "RWM Combined Higher %",
"short_name": "RWM Higher %",
"description": "% achieving higher standard in RWM combined",
"type": "percentage",
"category": "higher"
},
"reading_high_pct": {
"name": "Reading Higher %",
"short_name": "Reading Higher %",
"description": "% achieving higher standard in reading",
"type": "percentage",
"category": "higher"
},
"writing_high_pct": {
"name": "Writing Higher %",
"short_name": "Writing Higher %",
"description": "% achieving greater depth in writing",
"type": "percentage",
"category": "higher"
},
"maths_high_pct": {
"name": "Maths Higher %",
"short_name": "Maths Higher %",
"description": "% achieving higher standard in maths",
"type": "percentage",
"category": "higher"
},
"gps_high_pct": {
"name": "GPS Higher %",
"short_name": "GPS Higher %",
"description": "% achieving higher standard in GPS",
"type": "percentage",
"category": "higher"
},
# Progress Scores
"reading_progress": {
"name": "Reading Progress",
"short_name": "Reading Progress",
"description": "Progress in reading from KS1 to KS2",
"type": "score",
"category": "progress"
},
"writing_progress": {
"name": "Writing Progress",
"short_name": "Writing Progress",
"description": "Progress in writing from KS1 to KS2",
"type": "score",
"category": "progress"
},
"maths_progress": {
"name": "Maths Progress",
"short_name": "Maths Progress",
"description": "Progress in maths from KS1 to KS2",
"type": "score",
"category": "progress"
},
# Average Scores
"reading_avg_score": {
"name": "Reading Average Score",
"short_name": "Reading Avg",
"description": "Average scaled score in reading",
"type": "score",
"category": "average"
},
"maths_avg_score": {
"name": "Maths Average Score",
"short_name": "Maths Avg",
"description": "Average scaled score in maths",
"type": "score",
"category": "average"
},
"gps_avg_score": {
"name": "GPS Average Score",
"short_name": "GPS Avg",
"description": "Average scaled score in GPS",
"type": "score",
"category": "average"
},
# Gender Performance
"rwm_expected_boys_pct": {
"name": "RWM Expected % (Boys)",
"short_name": "Boys RWM %",
"description": "% of boys meeting expected standard",
"type": "percentage",
"category": "gender"
},
"rwm_expected_girls_pct": {
"name": "RWM Expected % (Girls)",
"short_name": "Girls RWM %",
"description": "% of girls meeting expected standard",
"type": "percentage",
"category": "gender"
},
"rwm_high_boys_pct": {
"name": "RWM Higher % (Boys)",
"short_name": "Boys Higher %",
"description": "% of boys at higher standard",
"type": "percentage",
"category": "gender"
},
"rwm_high_girls_pct": {
"name": "RWM Higher % (Girls)",
"short_name": "Girls Higher %",
"description": "% of girls at higher standard",
"type": "percentage",
"category": "gender"
},
# Disadvantaged Performance
"rwm_expected_disadvantaged_pct": {
"name": "RWM Expected % (Disadvantaged)",
"short_name": "Disadvantaged %",
"description": "% of disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
},
"rwm_expected_non_disadvantaged_pct": {
"name": "RWM Expected % (Non-Disadvantaged)",
"short_name": "Non-Disadv %",
"description": "% of non-disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
},
"disadvantaged_gap": {
"name": "Disadvantaged Gap",
"short_name": "Disadv Gap",
"description": "Gap between disadvantaged and national non-disadvantaged",
"type": "score",
"category": "equity"
},
# School Context
"disadvantaged_pct": {
"name": "% Disadvantaged Pupils",
"short_name": "% Disadvantaged",
"description": "% of pupils eligible for free school meals or looked after",
"type": "percentage",
"category": "context"
},
"eal_pct": {
"name": "% EAL Pupils",
"short_name": "% EAL",
"description": "% of pupils with English as additional language",
"type": "percentage",
"category": "context"
},
"sen_support_pct": {
"name": "% SEN Support",
"short_name": "% SEN",
"description": "% of pupils with SEN support",
"type": "percentage",
"category": "context"
},
"stability_pct": {
"name": "% Pupil Stability",
"short_name": "% Stable",
"description": "% of non-mobile pupils (stayed at school)",
"type": "percentage",
"category": "context"
},
# 3-Year Averages
"rwm_expected_3yr_pct": {
"name": "RWM Expected % (3-Year Avg)",
"short_name": "RWM 3yr %",
"description": "3-year average % meeting expected",
"type": "percentage",
"category": "trends"
},
"reading_avg_3yr": {
"name": "Reading Score (3-Year Avg)",
"short_name": "Reading 3yr",
"description": "3-year average reading score",
"type": "score",
"category": "trends"
},
"maths_avg_3yr": {
"name": "Maths Score (3-Year Avg)",
"short_name": "Maths 3yr",
"description": "3-year average maths score",
"type": "score",
"category": "trends"
},
}
# Ranking columns to include in rankings response
RANKING_COLUMNS = [
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
# Core expected
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
"gps_expected_pct", "science_expected_pct",
# Core higher
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
# Progress & averages
"reading_progress", "writing_progress", "maths_progress",
"reading_avg_score", "maths_avg_score", "gps_avg_score",
# Gender
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
# Equity
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
# Context
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
# 3-year
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
]
# School listing columns
SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
# Local Authority code to name mapping (for fallback when LANAME column missing)
# Source: https://www.gov.uk/government/publications/local-authority-codes
LA_CODE_TO_NAME = {
201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
212: "Wandsworth", 213: "Westminster", 301: "Barking and Dagenham", 302: "Barnet",
303: "Bexley", 304: "Brent", 305: "Bromley", 306: "Croydon", 307: "Ealing",
308: "Enfield", 309: "Haringey", 310: "Harrow", 311: "Havering", 312: "Hillingdon",
313: "Hounslow", 314: "Kingston upon Thames", 315: "Merton", 316: "Newham",
317: "Redbridge", 318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell", 334: "Solihull",
335: "Walsall", 336: "Wolverhampton", 340: "Knowsley", 341: "Liverpool",
342: "St. Helens", 343: "Sefton", 344: "Wirral", 350: "Bolton", 351: "Bury",
352: "Manchester", 353: "Oldham", 354: "Rochdale", 355: "Salford", 356: "Stockport",
357: "Tameside", 358: "Trafford", 359: "Wigan", 370: "Barnsley", 371: "Doncaster",
372: "Rotherham", 373: "Sheffield", 380: "Bradford", 381: "Calderdale",
382: "Kirklees", 383: "Leeds", 384: "Wakefield", 390: "Gateshead",
391: "Newcastle upon Tyne", 392: "North Tyneside", 393: "South Tyneside",
394: "Sunderland", 420: "Isles of Scilly", 800: "Bath and North East Somerset",
801: "Bristol, City of", 802: "North Somerset", 803: "South Gloucestershire",
805: "Hartlepool", 806: "Middlesbrough", 807: "Redcar and Cleveland",
808: "Stockton-on-Tees", 810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
812: "North East Lincolnshire", 813: "North Lincolnshire", 815: "North Yorkshire",
816: "York", 820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
825: "Buckinghamshire", 826: "Milton Keynes", 830: "Derbyshire", 831: "Derby",
835: "Dorset", 836: "Bournemouth, Christchurch and Poole", 837: "Poole",
838: "Bournemouth", 839: "Durham", 840: "Darlington", 841: "East Sussex",
845: "Brighton and Hove", 846: "Hampshire", 850: "Portsmouth", 851: "Southampton",
852: "Isle of Wight", 855: "Leicestershire", 856: "Leicester", 857: "Rutland",
860: "Staffordshire", 861: "Stoke-on-Trent", 865: "Wiltshire", 866: "Swindon",
867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
870: "Reading", 871: "Slough", 872: "Wokingham", 873: "Cambridgeshire",
874: "Peterborough", 876: "Halton", 877: "Warrington", 878: "Devon",
879: "Plymouth", 880: "Torbay", 881: "Essex", 882: "Southend-on-Sea",
883: "Thurrock", 884: "Herefordshire", 885: "Worcestershire", 886: "Kent",
887: "Medway", 888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
891: "Nottinghamshire", 892: "Nottingham", 893: "Shropshire", 894: "Telford and Wrekin",
895: "Cheshire East", 896: "Cheshire West and Chester", 908: "Cornwall",
909: "Cumbria", 916: "Gloucestershire", 919: "Hertfordshire", 921: "Norfolk",
925: "Lincolnshire", 926: "Northamptonshire", 928: "Northumberland",
929: "Oxfordshire", 931: "Somerset", 933: "Suffolk", 935: "Surrey",
936: "Warwickshire", 937: "West Sussex", 938: "Westmorland and Furness",
940: "Cumberland",
# Additional codes
420: "Isles of Scilly",
}