Files
school_compare/backend/schemas.py
Tudor e8175561d5
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 46s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m15s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 32s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
updates for secondary schools
2026-03-28 22:36:00 +00:00

730 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Schema definitions: column mappings, metric definitions, school type mappings.
Single source of truth for all data transformations.
"""
# Column name mappings from DfE CSV to API field names
COLUMN_MAPPINGS = {
"URN": "urn",
"SCHNAME": "school_name",
"ADDRESS1": "address1",
"ADDRESS2": "address2",
"TOWN": "town",
"PCODE": "postcode",
"NFTYPE": "school_type_code",
"RELDENOM": "religious_denomination",
"AGERANGE": "age_range",
"TOTPUPS": "total_pupils",
"TELIG": "eligible_pupils",
# Core KS2 metrics
"PTRWM_EXP": "rwm_expected_pct",
"PTRWM_HIGH": "rwm_high_pct",
"READPROG": "reading_progress",
"WRITPROG": "writing_progress",
"MATPROG": "maths_progress",
"PTREAD_EXP": "reading_expected_pct",
"PTWRITTA_EXP": "writing_expected_pct",
"PTMAT_EXP": "maths_expected_pct",
"READ_AVERAGE": "reading_avg_score",
"MAT_AVERAGE": "maths_avg_score",
"PTREAD_HIGH": "reading_high_pct",
"PTWRITTA_HIGH": "writing_high_pct",
"PTMAT_HIGH": "maths_high_pct",
# GPS (Grammar, Punctuation & Spelling)
"PTGPS_EXP": "gps_expected_pct",
"PTGPS_HIGH": "gps_high_pct",
"GPS_AVERAGE": "gps_avg_score",
# Science
"PTSCITA_EXP": "science_expected_pct",
# School context
"PTFSM6CLA1A": "disadvantaged_pct",
"PTEALGRP2": "eal_pct",
"PSENELK": "sen_support_pct",
"PSENELE": "sen_ehcp_pct",
"PTMOBN": "stability_pct",
# Pupil absence from tests
"PTREAD_AT": "reading_absence_pct",
"PTGPS_AT": "gps_absence_pct",
"PTMAT_AT": "maths_absence_pct",
"PTWRITTA_AD": "writing_absence_pct",
"PTSCITA_AD": "science_absence_pct",
# Gender breakdown
"PTRWM_EXP_B": "rwm_expected_boys_pct",
"PTRWM_EXP_G": "rwm_expected_girls_pct",
"PTRWM_HIGH_B": "rwm_high_boys_pct",
"PTRWM_HIGH_G": "rwm_high_girls_pct",
# Disadvantaged performance
"PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
"PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
"DIFFN_RWM_EXP": "disadvantaged_gap",
# 3-year averages
"PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
"READ_AVERAGE_3YR": "reading_avg_3yr",
"MAT_AVERAGE_3YR": "maths_avg_3yr",
}
# Numeric columns that need parsing
NUMERIC_COLUMNS = [
# Core metrics
"rwm_expected_pct",
"rwm_high_pct",
"reading_progress",
"writing_progress",
"maths_progress",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"reading_avg_score",
"maths_avg_score",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
# GPS & Science
"gps_expected_pct",
"gps_high_pct",
"gps_avg_score",
"science_expected_pct",
# School context
"total_pupils",
"eligible_pupils",
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"sen_ehcp_pct",
"stability_pct",
# Pupil absence from tests
"reading_absence_pct",
"gps_absence_pct",
"maths_absence_pct",
"writing_absence_pct",
"science_absence_pct",
# Gender breakdown
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
# Disadvantaged performance
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
# 3-year averages
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
]
# School type code to user-friendly name mapping
SCHOOL_TYPE_MAP = {
# Academies
"AC": "Academy",
"ACC": "Academy",
"ACCS": "Academy",
"ACS": "Academy (Sponsor Led)",
# Community Schools
"CY": "Community",
"CYS": "Community",
# Voluntary Schools
"VA": "Voluntary Aided",
"VC": "Voluntary Controlled",
# Foundation Schools
"FD": "Foundation",
"F": "Foundation",
"FDS": "Foundation",
# Free Schools
"FS": "Free School",
}
# Special values to treat as null
NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]
# KS2 Metric definitions - single source of truth
# Used by both backend API and frontend
METRIC_DEFINITIONS = {
# Expected Standard
"rwm_expected_pct": {
"name": "RWM Combined %",
"short_name": "RWM %",
"description": "% meeting expected standard in reading, writing and maths",
"type": "percentage",
"category": "expected",
},
"reading_expected_pct": {
"name": "Reading Expected %",
"short_name": "Reading %",
"description": "% meeting expected standard in reading",
"type": "percentage",
"category": "expected",
},
"writing_expected_pct": {
"name": "Writing Expected %",
"short_name": "Writing %",
"description": "% meeting expected standard in writing",
"type": "percentage",
"category": "expected",
},
"maths_expected_pct": {
"name": "Maths Expected %",
"short_name": "Maths %",
"description": "% meeting expected standard in maths",
"type": "percentage",
"category": "expected",
},
"gps_expected_pct": {
"name": "GPS Expected %",
"short_name": "GPS %",
"description": "% meeting expected standard in grammar, punctuation & spelling",
"type": "percentage",
"category": "expected",
},
"science_expected_pct": {
"name": "Science Expected %",
"short_name": "Science %",
"description": "% meeting expected standard in science",
"type": "percentage",
"category": "expected",
},
# Higher Standard
"rwm_high_pct": {
"name": "RWM Combined Higher %",
"short_name": "RWM Higher %",
"description": "% achieving higher standard in RWM combined",
"type": "percentage",
"category": "higher",
},
"reading_high_pct": {
"name": "Reading Higher %",
"short_name": "Reading Higher %",
"description": "% achieving higher standard in reading",
"type": "percentage",
"category": "higher",
},
"writing_high_pct": {
"name": "Writing Higher %",
"short_name": "Writing Higher %",
"description": "% achieving greater depth in writing",
"type": "percentage",
"category": "higher",
},
"maths_high_pct": {
"name": "Maths Higher %",
"short_name": "Maths Higher %",
"description": "% achieving higher standard in maths",
"type": "percentage",
"category": "higher",
},
"gps_high_pct": {
"name": "GPS Higher %",
"short_name": "GPS Higher %",
"description": "% achieving higher standard in GPS",
"type": "percentage",
"category": "higher",
},
# Progress Scores
"reading_progress": {
"name": "Reading Progress",
"short_name": "Reading Progress",
"description": "Progress in reading from KS1 to KS2",
"type": "score",
"category": "progress",
},
"writing_progress": {
"name": "Writing Progress",
"short_name": "Writing Progress",
"description": "Progress in writing from KS1 to KS2",
"type": "score",
"category": "progress",
},
"maths_progress": {
"name": "Maths Progress",
"short_name": "Maths Progress",
"description": "Progress in maths from KS1 to KS2",
"type": "score",
"category": "progress",
},
# Average Scores
"reading_avg_score": {
"name": "Reading Average Score",
"short_name": "Reading Avg",
"description": "Average scaled score in reading",
"type": "score",
"category": "average",
},
"maths_avg_score": {
"name": "Maths Average Score",
"short_name": "Maths Avg",
"description": "Average scaled score in maths",
"type": "score",
"category": "average",
},
"gps_avg_score": {
"name": "GPS Average Score",
"short_name": "GPS Avg",
"description": "Average scaled score in GPS",
"type": "score",
"category": "average",
},
# Gender Performance
"rwm_expected_boys_pct": {
"name": "RWM Expected % (Boys)",
"short_name": "Boys RWM %",
"description": "% of boys meeting expected standard",
"type": "percentage",
"category": "gender",
},
"rwm_expected_girls_pct": {
"name": "RWM Expected % (Girls)",
"short_name": "Girls RWM %",
"description": "% of girls meeting expected standard",
"type": "percentage",
"category": "gender",
},
"rwm_high_boys_pct": {
"name": "RWM Higher % (Boys)",
"short_name": "Boys Higher %",
"description": "% of boys at higher standard",
"type": "percentage",
"category": "gender",
},
"rwm_high_girls_pct": {
"name": "RWM Higher % (Girls)",
"short_name": "Girls Higher %",
"description": "% of girls at higher standard",
"type": "percentage",
"category": "gender",
},
# Disadvantaged Performance
"rwm_expected_disadvantaged_pct": {
"name": "RWM Expected % (Disadvantaged)",
"short_name": "Disadvantaged %",
"description": "% of disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity",
},
"rwm_expected_non_disadvantaged_pct": {
"name": "RWM Expected % (Non-Disadvantaged)",
"short_name": "Non-Disadv %",
"description": "% of non-disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity",
},
"disadvantaged_gap": {
"name": "Disadvantaged Gap",
"short_name": "Disadv Gap",
"description": "Gap between disadvantaged and national non-disadvantaged",
"type": "score",
"category": "equity",
},
# School Context
"disadvantaged_pct": {
"name": "% Disadvantaged Pupils",
"short_name": "% Disadvantaged",
"description": "% of pupils eligible for free school meals or looked after",
"type": "percentage",
"category": "context",
},
"eal_pct": {
"name": "% EAL Pupils",
"short_name": "% EAL",
"description": "% of pupils with English as additional language",
"type": "percentage",
"category": "context",
},
"sen_support_pct": {
"name": "% SEN Support",
"short_name": "% SEN",
"description": "% of pupils with SEN support",
"type": "percentage",
"category": "context",
},
"stability_pct": {
"name": "% Pupil Stability",
"short_name": "% Stable",
"description": "% of non-mobile pupils (stayed at school)",
"type": "percentage",
"category": "context",
},
# Pupil Absence from Tests
"reading_absence_pct": {
"name": "Reading Test Absence %",
"short_name": "Reading Absent",
"description": "% of pupils absent from or unable to access the Reading test",
"type": "percentage",
"category": "absence",
},
"gps_absence_pct": {
"name": "GPS Test Absence %",
"short_name": "GPS Absent",
"description": "% of pupils absent from or unable to access the GPS test",
"type": "percentage",
"category": "absence",
},
"maths_absence_pct": {
"name": "Maths Test Absence %",
"short_name": "Maths Absent",
"description": "% of pupils absent from or unable to access the Maths test",
"type": "percentage",
"category": "absence",
},
"writing_absence_pct": {
"name": "Writing Absence %",
"short_name": "Writing Absent",
"description": "% of pupils absent from or disapplied in Writing assessment",
"type": "percentage",
"category": "absence",
},
"science_absence_pct": {
"name": "Science Absence %",
"short_name": "Science Absent",
"description": "% of pupils absent from or disapplied in Science assessment",
"type": "percentage",
"category": "absence",
},
# 3-Year Averages
"rwm_expected_3yr_pct": {
"name": "RWM Expected % (3-Year Avg)",
"short_name": "RWM 3yr %",
"description": "3-year average % meeting expected",
"type": "percentage",
"category": "trends",
},
"reading_avg_3yr": {
"name": "Reading Score (3-Year Avg)",
"short_name": "Reading 3yr",
"description": "3-year average reading score",
"type": "score",
"category": "trends",
},
"maths_avg_3yr": {
"name": "Maths Score (3-Year Avg)",
"short_name": "Maths 3yr",
"description": "3-year average maths score",
"type": "score",
"category": "trends",
},
# ── GCSE Performance (KS4) ────────────────────────────────────────────
"attainment_8_score": {
"name": "Attainment 8",
"short_name": "Att 8",
"description": "Average grade across a pupil's best 8 GCSEs including English and Maths",
"type": "score",
"category": "gcse",
},
"progress_8_score": {
"name": "Progress 8",
"short_name": "P8",
"description": "Progress from KS2 baseline to GCSE relative to similar pupils nationally (0 = national average)",
"type": "score",
"category": "gcse",
},
"english_maths_standard_pass_pct": {
"name": "English & Maths Grade 4+",
"short_name": "E&M 4+",
"description": "% of pupils achieving grade 4 (standard pass) or above in both English and Maths",
"type": "percentage",
"category": "gcse",
},
"english_maths_strong_pass_pct": {
"name": "English & Maths Grade 5+",
"short_name": "E&M 5+",
"description": "% of pupils achieving grade 5 (strong pass) or above in both English and Maths",
"type": "percentage",
"category": "gcse",
},
"ebacc_entry_pct": {
"name": "EBacc Entry %",
"short_name": "EBacc Entry",
"description": "% of pupils entered for the English Baccalaureate (English, Maths, Sciences, Languages, Humanities)",
"type": "percentage",
"category": "gcse",
},
"ebacc_standard_pass_pct": {
"name": "EBacc Grade 4+",
"short_name": "EBacc 4+",
"description": "% of pupils achieving grade 4+ across all EBacc subjects",
"type": "percentage",
"category": "gcse",
},
"ebacc_strong_pass_pct": {
"name": "EBacc Grade 5+",
"short_name": "EBacc 5+",
"description": "% of pupils achieving grade 5+ across all EBacc subjects",
"type": "percentage",
"category": "gcse",
},
"ebacc_avg_score": {
"name": "EBacc Average Score",
"short_name": "EBacc Avg",
"description": "Average points score across EBacc subjects",
"type": "score",
"category": "gcse",
},
"gcse_grade_91_pct": {
"name": "GCSE Grade 91 %",
"short_name": "GCSE 91",
"description": "% of GCSE entries achieving a grade 9 to 1",
"type": "percentage",
"category": "gcse",
},
}
# Ranking columns to include in rankings response
RANKING_COLUMNS = [
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"year",
"total_pupils",
# Core expected
"rwm_expected_pct",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"gps_expected_pct",
"science_expected_pct",
# Core higher
"rwm_high_pct",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
"gps_high_pct",
# Progress & averages
"reading_progress",
"writing_progress",
"maths_progress",
"reading_avg_score",
"maths_avg_score",
"gps_avg_score",
# Gender
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
# Equity
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
# Context
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"stability_pct",
# Absence
"reading_absence_pct",
"gps_absence_pct",
"maths_absence_pct",
"writing_absence_pct",
"science_absence_pct",
# 3-year
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
# GCSE (KS4)
"attainment_8_score",
"progress_8_score",
"english_maths_standard_pass_pct",
"english_maths_strong_pass_pct",
"ebacc_entry_pct",
"ebacc_standard_pass_pct",
"ebacc_strong_pass_pct",
"ebacc_avg_score",
"gcse_grade_91_pct",
]
# School listing columns
SCHOOL_COLUMNS = [
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"town",
"postcode",
"religious_denomination",
"age_range",
"gender",
"admissions_policy",
"ofsted_grade",
"ofsted_date",
"latitude",
"longitude",
]
# Local Authority code to name mapping (for fallback when LANAME column missing)
# Source: https://www.gov.uk/government/publications/local-authority-codes
LA_CODE_TO_NAME = {
# Inner London
201: "City of London",
202: "Camden",
203: "Greenwich",
204: "Hackney",
205: "Hammersmith and Fulham",
206: "Islington",
207: "Kensington and Chelsea",
208: "Lambeth",
209: "Lewisham",
210: "Southwark",
211: "Tower Hamlets",
212: "Wandsworth",
213: "Westminster",
# Outer London
301: "Barking and Dagenham",
302: "Barnet",
303: "Bexley",
304: "Brent",
305: "Bromley",
306: "Croydon",
307: "Ealing",
308: "Enfield",
309: "Haringey",
310: "Harrow",
311: "Havering",
312: "Hillingdon",
313: "Hounslow",
314: "Kingston upon Thames",
315: "Merton",
316: "Newham",
317: "Redbridge",
318: "Richmond upon Thames",
319: "Sutton",
320: "Waltham Forest",
# West Midlands
330: "Birmingham",
331: "Coventry",
332: "Dudley",
333: "Sandwell",
334: "Solihull",
335: "Walsall",
336: "Wolverhampton",
# Merseyside
340: "Knowsley",
341: "Liverpool",
342: "St. Helens",
343: "Sefton",
344: "Wirral",
# Greater Manchester
350: "Bolton",
351: "Bury",
352: "Manchester",
353: "Oldham",
354: "Rochdale",
355: "Salford",
356: "Stockport",
357: "Tameside",
358: "Trafford",
359: "Wigan",
# South Yorkshire
370: "Barnsley",
371: "Doncaster",
372: "Rotherham",
373: "Sheffield",
# West Yorkshire
380: "Bradford",
381: "Calderdale",
382: "Kirklees",
383: "Leeds",
384: "Wakefield",
# Tyne and Wear
390: "Gateshead",
391: "Newcastle upon Tyne",
392: "North Tyneside",
393: "South Tyneside",
394: "Sunderland",
# Isles of Scilly
420: "Isles of Scilly",
# Unitary authorities (800+)
800: "Bath and North East Somerset",
801: "Bristol, City of",
802: "North Somerset",
803: "South Gloucestershire",
805: "Hartlepool",
806: "Middlesbrough",
807: "Redcar and Cleveland",
808: "Stockton-on-Tees",
810: "Kingston Upon Hull, City of",
811: "East Riding of Yorkshire",
812: "North East Lincolnshire",
813: "North Lincolnshire",
815: "North Yorkshire",
816: "York",
820: "Bedford",
821: "Central Bedfordshire",
822: "Luton",
823: "West Northamptonshire",
824: "North Northamptonshire",
825: "Buckinghamshire",
826: "Milton Keynes",
830: "Derbyshire",
831: "Derby",
835: "Dorset",
836: "Bournemouth, Christchurch and Poole",
837: "Poole",
838: "Bournemouth", # Historic codes (merged into 836)
839: "Durham",
840: "Darlington",
841: "East Sussex",
845: "Brighton and Hove",
846: "Hampshire",
850: "Portsmouth",
851: "Southampton",
852: "Isle of Wight",
855: "Leicestershire",
856: "Leicester",
857: "Rutland",
860: "Staffordshire",
861: "Stoke-on-Trent",
865: "Wiltshire",
866: "Swindon",
867: "Bracknell Forest",
868: "Windsor and Maidenhead",
869: "West Berkshire",
870: "Reading",
871: "Slough",
872: "Wokingham",
873: "Cambridgeshire",
874: "Peterborough",
876: "Halton",
877: "Warrington",
878: "Devon",
879: "Plymouth",
880: "Torbay",
881: "Essex",
882: "Southend-on-Sea",
883: "Thurrock",
884: "Herefordshire",
885: "Worcestershire",
886: "Kent",
887: "Medway",
888: "Lancashire",
889: "Blackburn with Darwen",
890: "Blackpool",
891: "Nottinghamshire",
892: "Nottingham",
893: "Shropshire",
894: "Telford and Wrekin",
895: "Cheshire East",
896: "Cheshire West and Chester",
# County councils (900+)
908: "Cornwall",
909: "Cumbria",
916: "Gloucestershire",
919: "Hertfordshire",
921: "Norfolk",
925: "Lincolnshire",
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
928: "Northumberland",
929: "Oxfordshire",
931: "Somerset",
933: "Suffolk",
935: "Surrey",
936: "Warwickshire",
937: "West Sussex",
# New authorities (2023 reorganization)
938: "Westmorland and Furness",
940: "Cumberland",
941: "North Yorkshire", # New unitary
942: "Somerset", # New unitary (replaced 931)
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
}