fixing data load
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 57s

This commit is contained in:
Tudor Sitaru
2026-01-06 22:06:59 +00:00
parent e601c499b6
commit 1a8ec670b9
3 changed files with 465 additions and 261 deletions

View File

@@ -5,20 +5,25 @@ Uses real data from UK Government Compare School Performance downloads.
"""
from contextlib import asynccontextmanager
import pandas as pd
from fastapi import FastAPI, HTTPException, Query
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional
import pandas as pd
from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from .config import settings
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
from .data_loader import (
load_school_data, clear_cache, geocode_single_postcode,
geocode_postcodes_bulk, haversine_distance, get_data_info as get_db_info
clear_cache,
geocode_postcodes_bulk,
geocode_single_postcode,
haversine_distance,
load_school_data,
)
from .data_loader import get_data_info as get_db_info
from .database import init_db
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
from .utils import clean_for_json
@@ -28,16 +33,16 @@ async def lifespan(app: FastAPI):
# Startup: initialize database and pre-load data
print("Starting up: Initializing database...")
init_db() # Ensure tables exist
print("Loading school data from database...")
df = load_school_data()
if df.empty:
print("Warning: No data in database. Run the migration script to import data.")
else:
print("Data loaded successfully.")
yield # Application runs here
# Shutdown: cleanup if needed
print("Shutting down...")
@@ -80,7 +85,9 @@ async def serve_rankings():
@app.get("/api/schools")
async def get_schools(
search: Optional[str] = Query(None, description="Search by school name"),
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
local_authority: Optional[str] = Query(
None, description="Filter by local authority"
),
school_type: Optional[str] = Query(None, description="Filter by school type"),
postcode: Optional[str] = Query(None, description="Search near postcode"),
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
@@ -89,28 +96,40 @@ async def get_schools(
):
"""
Get list of unique primary schools with pagination.
Returns paginated results with total count for efficient loading.
Supports location-based search using postcode.
"""
df = load_school_data()
if df.empty:
return {"schools": [], "total": 0, "page": page, "page_size": 0}
# Use configured default if not specified
if page_size is None:
page_size = settings.default_page_size
# Get unique schools (latest year data for each)
latest_year = df.groupby('urn')['year'].max().reset_index()
df_latest = df.merge(latest_year, on=['urn', 'year'])
# Include lat/long in columns for location search
location_cols = ['latitude', 'longitude']
available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
latest_year = df.groupby("urn")["year"].max().reset_index()
df_latest = df.merge(latest_year, on=["urn", "year"])
# Include key result metrics for display on cards
location_cols = ["latitude", "longitude"]
result_cols = [
"year",
"rwm_expected_pct",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"total_pupils",
]
available_cols = [
c
for c in SCHOOL_COLUMNS + location_cols + result_cols
if c in df_latest.columns
]
schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"])
# Location-based search
search_coords = None
if postcode:
@@ -118,65 +137,81 @@ async def get_schools(
if coords:
search_coords = coords
schools_df = schools_df.copy()
# Geocode school postcodes on-demand if not already cached
if 'postcode' in schools_df.columns:
unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
if "postcode" in schools_df.columns:
unique_postcodes = schools_df["postcode"].dropna().unique().tolist()
geocoded = geocode_postcodes_bulk(unique_postcodes)
# Add lat/long from geocoded data
schools_df['latitude'] = schools_df['postcode'].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
schools_df["latitude"] = schools_df["postcode"].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0]
if pd.notna(pc)
else None
)
schools_df['longitude'] = schools_df['postcode'].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
schools_df["longitude"] = schools_df["postcode"].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1]
if pd.notna(pc)
else None
)
# Filter by distance
def calc_distance(row):
if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
return float('inf')
if pd.isna(row.get("latitude")) or pd.isna(row.get("longitude")):
return float("inf")
return haversine_distance(
search_coords[0], search_coords[1],
row['latitude'], row['longitude']
search_coords[0],
search_coords[1],
row["latitude"],
row["longitude"],
)
schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
schools_df = schools_df[schools_df['distance'] <= radius]
schools_df = schools_df.sort_values('distance')
schools_df["distance"] = schools_df.apply(calc_distance, axis=1)
schools_df = schools_df[schools_df["distance"] <= radius]
schools_df = schools_df.sort_values("distance")
# Apply filters
if search:
search_lower = search.lower()
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
mask = (
schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
)
if "address" in schools_df.columns:
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
mask = mask | schools_df["address"].str.lower().str.contains(
search_lower, na=False
)
schools_df = schools_df[mask]
if local_authority:
schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
schools_df = schools_df[
schools_df["local_authority"].str.lower() == local_authority.lower()
]
if school_type:
schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
schools_df = schools_df[
schools_df["school_type"].str.lower() == school_type.lower()
]
# Pagination
total = len(schools_df)
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
schools_df = schools_df.iloc[start_idx:end_idx]
# Remove internal columns before sending
output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
if 'distance' in schools_df.columns:
output_cols.append('distance')
output_cols = [c for c in schools_df.columns if c not in ["latitude", "longitude"]]
if "distance" in schools_df.columns:
output_cols.append("distance")
return {
"schools": clean_for_json(schools_df[output_cols]),
"total": total,
"page": page,
"page_size": page_size,
"total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
"search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
"search_location": {"postcode": postcode, "radius": radius}
if search_coords
else None,
}
@@ -184,21 +219,21 @@ async def get_schools(
async def get_school_details(urn: int):
"""Get detailed KS2 data for a specific primary school across all years."""
df = load_school_data()
if df.empty:
raise HTTPException(status_code=404, detail="No data available")
school_data = df[df["urn"] == urn]
if school_data.empty:
raise HTTPException(status_code=404, detail="School not found")
# Sort by year
school_data = school_data.sort_values("year")
# Get latest info for the school
latest = school_data.iloc[-1]
return {
"school_info": {
"urn": urn,
@@ -208,7 +243,7 @@ async def get_school_details(urn: int):
"address": latest.get("address", ""),
"phase": "Primary",
},
"yearly_data": clean_for_json(school_data)
"yearly_data": clean_for_json(school_data),
}
@@ -216,20 +251,20 @@ async def get_school_details(urn: int):
async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")):
"""Compare multiple primary schools side by side."""
df = load_school_data()
if df.empty:
raise HTTPException(status_code=404, detail="No data available")
try:
urn_list = [int(u.strip()) for u in urns.split(",")]
except ValueError:
raise HTTPException(status_code=400, detail="Invalid URN format")
comparison_data = df[df["urn"].isin(urn_list)]
if comparison_data.empty:
raise HTTPException(status_code=404, detail="No schools found")
result = {}
for urn in urn_list:
school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year")
@@ -242,9 +277,9 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
"local_authority": latest.get("local_authority", ""),
"address": latest.get("address", ""),
},
"yearly_data": clean_for_json(school_data)
"yearly_data": clean_for_json(school_data),
}
return {"comparison": result}
@@ -252,14 +287,14 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
async def get_filter_options():
"""Get available filter options (local authorities, school types, years)."""
df = load_school_data()
if df.empty:
return {
"local_authorities": [],
"school_types": [],
"years": [],
}
return {
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
@@ -271,36 +306,40 @@ async def get_filter_options():
async def get_available_metrics():
"""
Get list of available KS2 performance metrics for primary schools.
This is the single source of truth for metric definitions.
Frontend should consume this to avoid duplication.
"""
df = load_school_data()
available = []
for key, info in METRIC_DEFINITIONS.items():
if df.empty or key in df.columns:
available.append({"key": key, **info})
return {"metrics": available}
@app.get("/api/rankings")
async def get_rankings(
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
year: Optional[int] = Query(
None, description="Specific year (defaults to most recent)"
),
limit: int = Query(20, ge=1, le=100, description="Number of schools to return"),
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
local_authority: Optional[str] = Query(
None, description="Filter by local authority"
),
):
"""Get primary school rankings by a specific KS2 metric."""
df = load_school_data()
if df.empty:
return {"metric": metric, "year": None, "rankings": [], "total": 0}
if metric not in df.columns:
raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available")
# Filter by year
if year:
df = df[df["year"] == year]
@@ -308,22 +347,22 @@ async def get_rankings(
# Use most recent year
max_year = df["year"].max()
df = df[df["year"] == max_year]
# Filter by local authority if specified
if local_authority:
df = df[df["local_authority"].str.lower() == local_authority.lower()]
# Sort and rank (exclude rows with no data for this metric)
df = df.dropna(subset=[metric])
total = len(df)
# For progress scores, higher is better. For percentages, higher is also better.
df = df.sort_values(metric, ascending=False).head(limit)
# Return only relevant fields for rankings
available_cols = [c for c in RANKING_COLUMNS if c in df.columns]
df = df[available_cols]
return {
"metric": metric,
"year": int(df["year"].iloc[0]) if not df.empty else None,
@@ -337,28 +376,34 @@ async def get_data_info():
"""Get information about loaded data."""
# Get info directly from database
db_info = get_db_info()
if db_info["total_schools"] == 0:
return {
"status": "no_data",
"message": "No data in database. Run the migration script: python scripts/migrate_csv_to_db.py",
"data_source": "PostgreSQL",
}
# Also get DataFrame-based stats for backwards compatibility
df = load_school_data()
if df.empty:
return {
"status": "no_data",
"message": "No data available",
"data_source": "PostgreSQL",
}
years = [int(y) for y in sorted(df["year"].unique())]
schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
schools_per_year = {
str(int(k)): int(v)
for k, v in df.groupby("year")["urn"].nunique().to_dict().items()
}
la_counts = {
str(k): int(v)
for k, v in df["local_authority"].value_counts().to_dict().items()
}
return {
"status": "loaded",
"data_source": "PostgreSQL",
@@ -385,4 +430,5 @@ if settings.frontend_dir.exists():
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host=settings.host, port=settings.port)

View File

@@ -5,94 +5,117 @@ Single source of truth for all data transformations.
# Column name mappings from DfE CSV to API field names
COLUMN_MAPPINGS = {
'URN': 'urn',
'SCHNAME': 'school_name',
'ADDRESS1': 'address1',
'ADDRESS2': 'address2',
'TOWN': 'town',
'PCODE': 'postcode',
'NFTYPE': 'school_type_code',
'RELDENOM': 'religious_denomination',
'AGERANGE': 'age_range',
'TOTPUPS': 'total_pupils',
'TELIG': 'eligible_pupils',
"URN": "urn",
"SCHNAME": "school_name",
"ADDRESS1": "address1",
"ADDRESS2": "address2",
"TOWN": "town",
"PCODE": "postcode",
"NFTYPE": "school_type_code",
"RELDENOM": "religious_denomination",
"AGERANGE": "age_range",
"TOTPUPS": "total_pupils",
"TELIG": "eligible_pupils",
# Core KS2 metrics
'PTRWM_EXP': 'rwm_expected_pct',
'PTRWM_HIGH': 'rwm_high_pct',
'READPROG': 'reading_progress',
'WRITPROG': 'writing_progress',
'MATPROG': 'maths_progress',
'PTREAD_EXP': 'reading_expected_pct',
'PTWRITTA_EXP': 'writing_expected_pct',
'PTMAT_EXP': 'maths_expected_pct',
'READ_AVERAGE': 'reading_avg_score',
'MAT_AVERAGE': 'maths_avg_score',
'PTREAD_HIGH': 'reading_high_pct',
'PTWRITTA_HIGH': 'writing_high_pct',
'PTMAT_HIGH': 'maths_high_pct',
"PTRWM_EXP": "rwm_expected_pct",
"PTRWM_HIGH": "rwm_high_pct",
"READPROG": "reading_progress",
"WRITPROG": "writing_progress",
"MATPROG": "maths_progress",
"PTREAD_EXP": "reading_expected_pct",
"PTWRITTA_EXP": "writing_expected_pct",
"PTMAT_EXP": "maths_expected_pct",
"READ_AVERAGE": "reading_avg_score",
"MAT_AVERAGE": "maths_avg_score",
"PTREAD_HIGH": "reading_high_pct",
"PTWRITTA_HIGH": "writing_high_pct",
"PTMAT_HIGH": "maths_high_pct",
# GPS (Grammar, Punctuation & Spelling)
'PTGPS_EXP': 'gps_expected_pct',
'PTGPS_HIGH': 'gps_high_pct',
'GPS_AVERAGE': 'gps_avg_score',
"PTGPS_EXP": "gps_expected_pct",
"PTGPS_HIGH": "gps_high_pct",
"GPS_AVERAGE": "gps_avg_score",
# Science
'PTSCITA_EXP': 'science_expected_pct',
"PTSCITA_EXP": "science_expected_pct",
# School context
'PTFSM6CLA1A': 'disadvantaged_pct',
'PTEALGRP2': 'eal_pct',
'PSENELK': 'sen_support_pct',
'PSENELE': 'sen_ehcp_pct',
'PTMOBN': 'stability_pct',
"PTFSM6CLA1A": "disadvantaged_pct",
"PTEALGRP2": "eal_pct",
"PSENELK": "sen_support_pct",
"PSENELE": "sen_ehcp_pct",
"PTMOBN": "stability_pct",
# Gender breakdown
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
"PTRWM_EXP_B": "rwm_expected_boys_pct",
"PTRWM_EXP_G": "rwm_expected_girls_pct",
"PTRWM_HIGH_B": "rwm_high_boys_pct",
"PTRWM_HIGH_G": "rwm_high_girls_pct",
# Disadvantaged performance
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
'DIFFN_RWM_EXP': 'disadvantaged_gap',
"PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
"PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
"DIFFN_RWM_EXP": "disadvantaged_gap",
# 3-year averages
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
'READ_AVERAGE_3YR': 'reading_avg_3yr',
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
"PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
"READ_AVERAGE_3YR": "reading_avg_3yr",
"MAT_AVERAGE_3YR": "maths_avg_3yr",
}
# Numeric columns that need parsing
NUMERIC_COLUMNS = [
# Core metrics
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
'writing_progress', 'maths_progress', 'reading_expected_pct',
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
"rwm_expected_pct",
"rwm_high_pct",
"reading_progress",
"writing_progress",
"maths_progress",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"reading_avg_score",
"maths_avg_score",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
# GPS & Science
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
"gps_expected_pct",
"gps_high_pct",
"gps_avg_score",
"science_expected_pct",
# School context
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
"total_pupils",
"eligible_pupils",
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"sen_ehcp_pct",
"stability_pct",
# Gender breakdown
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
'rwm_high_boys_pct', 'rwm_high_girls_pct',
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
# Disadvantaged performance
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
# 3-year averages
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
]
# School type code to name mapping
SCHOOL_TYPE_MAP = {
'AC': 'Academy',
'ACC': 'Academy Converter',
'ACS': 'Academy Sponsor Led',
'CY': 'Community School',
'VA': 'Voluntary Aided',
'VC': 'Voluntary Controlled',
'FD': 'Foundation',
'F': 'Foundation',
'FS': 'Free School',
"AC": "Academy",
"ACC": "Academy Converter",
"ACS": "Academy Sponsor Led",
"CY": "Community School",
"VA": "Voluntary Aided",
"VC": "Voluntary Controlled",
"FD": "Foundation",
"F": "Foundation",
"FS": "Free School",
}
# Special values to treat as null
NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]
# KS2 Metric definitions - single source of truth
# Used by both backend API and frontend
@@ -103,42 +126,42 @@ METRIC_DEFINITIONS = {
"short_name": "RWM %",
"description": "% meeting expected standard in reading, writing and maths",
"type": "percentage",
"category": "expected"
"category": "expected",
},
"reading_expected_pct": {
"name": "Reading Expected %",
"short_name": "Reading %",
"description": "% meeting expected standard in reading",
"type": "percentage",
"category": "expected"
"category": "expected",
},
"writing_expected_pct": {
"name": "Writing Expected %",
"short_name": "Writing %",
"description": "% meeting expected standard in writing",
"type": "percentage",
"category": "expected"
"category": "expected",
},
"maths_expected_pct": {
"name": "Maths Expected %",
"short_name": "Maths %",
"description": "% meeting expected standard in maths",
"type": "percentage",
"category": "expected"
"category": "expected",
},
"gps_expected_pct": {
"name": "GPS Expected %",
"short_name": "GPS %",
"description": "% meeting expected standard in grammar, punctuation & spelling",
"type": "percentage",
"category": "expected"
"category": "expected",
},
"science_expected_pct": {
"name": "Science Expected %",
"short_name": "Science %",
"description": "% meeting expected standard in science",
"type": "percentage",
"category": "expected"
"category": "expected",
},
# Higher Standard
"rwm_high_pct": {
@@ -146,35 +169,35 @@ METRIC_DEFINITIONS = {
"short_name": "RWM Higher %",
"description": "% achieving higher standard in RWM combined",
"type": "percentage",
"category": "higher"
"category": "higher",
},
"reading_high_pct": {
"name": "Reading Higher %",
"short_name": "Reading Higher %",
"description": "% achieving higher standard in reading",
"type": "percentage",
"category": "higher"
"category": "higher",
},
"writing_high_pct": {
"name": "Writing Higher %",
"short_name": "Writing Higher %",
"description": "% achieving greater depth in writing",
"type": "percentage",
"category": "higher"
"category": "higher",
},
"maths_high_pct": {
"name": "Maths Higher %",
"short_name": "Maths Higher %",
"description": "% achieving higher standard in maths",
"type": "percentage",
"category": "higher"
"category": "higher",
},
"gps_high_pct": {
"name": "GPS Higher %",
"short_name": "GPS Higher %",
"description": "% achieving higher standard in GPS",
"type": "percentage",
"category": "higher"
"category": "higher",
},
# Progress Scores
"reading_progress": {
@@ -182,21 +205,21 @@ METRIC_DEFINITIONS = {
"short_name": "Reading Progress",
"description": "Progress in reading from KS1 to KS2",
"type": "score",
"category": "progress"
"category": "progress",
},
"writing_progress": {
"name": "Writing Progress",
"short_name": "Writing Progress",
"description": "Progress in writing from KS1 to KS2",
"type": "score",
"category": "progress"
"category": "progress",
},
"maths_progress": {
"name": "Maths Progress",
"short_name": "Maths Progress",
"description": "Progress in maths from KS1 to KS2",
"type": "score",
"category": "progress"
"category": "progress",
},
# Average Scores
"reading_avg_score": {
@@ -204,21 +227,21 @@ METRIC_DEFINITIONS = {
"short_name": "Reading Avg",
"description": "Average scaled score in reading",
"type": "score",
"category": "average"
"category": "average",
},
"maths_avg_score": {
"name": "Maths Average Score",
"short_name": "Maths Avg",
"description": "Average scaled score in maths",
"type": "score",
"category": "average"
"category": "average",
},
"gps_avg_score": {
"name": "GPS Average Score",
"short_name": "GPS Avg",
"description": "Average scaled score in GPS",
"type": "score",
"category": "average"
"category": "average",
},
# Gender Performance
"rwm_expected_boys_pct": {
@@ -226,28 +249,28 @@ METRIC_DEFINITIONS = {
"short_name": "Boys RWM %",
"description": "% of boys meeting expected standard",
"type": "percentage",
"category": "gender"
"category": "gender",
},
"rwm_expected_girls_pct": {
"name": "RWM Expected % (Girls)",
"short_name": "Girls RWM %",
"description": "% of girls meeting expected standard",
"type": "percentage",
"category": "gender"
"category": "gender",
},
"rwm_high_boys_pct": {
"name": "RWM Higher % (Boys)",
"short_name": "Boys Higher %",
"description": "% of boys at higher standard",
"type": "percentage",
"category": "gender"
"category": "gender",
},
"rwm_high_girls_pct": {
"name": "RWM Higher % (Girls)",
"short_name": "Girls Higher %",
"description": "% of girls at higher standard",
"type": "percentage",
"category": "gender"
"category": "gender",
},
# Disadvantaged Performance
"rwm_expected_disadvantaged_pct": {
@@ -255,21 +278,21 @@ METRIC_DEFINITIONS = {
"short_name": "Disadvantaged %",
"description": "% of disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
"category": "equity",
},
"rwm_expected_non_disadvantaged_pct": {
"name": "RWM Expected % (Non-Disadvantaged)",
"short_name": "Non-Disadv %",
"description": "% of non-disadvantaged pupils meeting expected",
"type": "percentage",
"category": "equity"
"category": "equity",
},
"disadvantaged_gap": {
"name": "Disadvantaged Gap",
"short_name": "Disadv Gap",
"description": "Gap between disadvantaged and national non-disadvantaged",
"type": "score",
"category": "equity"
"category": "equity",
},
# School Context
"disadvantaged_pct": {
@@ -277,28 +300,28 @@ METRIC_DEFINITIONS = {
"short_name": "% Disadvantaged",
"description": "% of pupils eligible for free school meals or looked after",
"type": "percentage",
"category": "context"
"category": "context",
},
"eal_pct": {
"name": "% EAL Pupils",
"short_name": "% EAL",
"description": "% of pupils with English as additional language",
"type": "percentage",
"category": "context"
"category": "context",
},
"sen_support_pct": {
"name": "% SEN Support",
"short_name": "% SEN",
"description": "% of pupils with SEN support",
"type": "percentage",
"category": "context"
"category": "context",
},
"stability_pct": {
"name": "% Pupil Stability",
"short_name": "% Stable",
"description": "% of non-mobile pupils (stayed at school)",
"type": "percentage",
"category": "context"
"category": "context",
},
# 3-Year Averages
"rwm_expected_3yr_pct": {
@@ -306,122 +329,257 @@ METRIC_DEFINITIONS = {
"short_name": "RWM 3yr %",
"description": "3-year average % meeting expected",
"type": "percentage",
"category": "trends"
"category": "trends",
},
"reading_avg_3yr": {
"name": "Reading Score (3-Year Avg)",
"short_name": "Reading 3yr",
"description": "3-year average reading score",
"type": "score",
"category": "trends"
"category": "trends",
},
"maths_avg_3yr": {
"name": "Maths Score (3-Year Avg)",
"short_name": "Maths 3yr",
"description": "3-year average maths score",
"type": "score",
"category": "trends"
"category": "trends",
},
}
# Ranking columns to include in rankings response
RANKING_COLUMNS = [
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"year",
"total_pupils",
# Core expected
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
"gps_expected_pct", "science_expected_pct",
"rwm_expected_pct",
"reading_expected_pct",
"writing_expected_pct",
"maths_expected_pct",
"gps_expected_pct",
"science_expected_pct",
# Core higher
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
"rwm_high_pct",
"reading_high_pct",
"writing_high_pct",
"maths_high_pct",
"gps_high_pct",
# Progress & averages
"reading_progress", "writing_progress", "maths_progress",
"reading_avg_score", "maths_avg_score", "gps_avg_score",
"reading_progress",
"writing_progress",
"maths_progress",
"reading_avg_score",
"maths_avg_score",
"gps_avg_score",
# Gender
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
"rwm_expected_boys_pct",
"rwm_expected_girls_pct",
"rwm_high_boys_pct",
"rwm_high_girls_pct",
# Equity
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
"rwm_expected_disadvantaged_pct",
"rwm_expected_non_disadvantaged_pct",
"disadvantaged_gap",
# Context
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
"disadvantaged_pct",
"eal_pct",
"sen_support_pct",
"stability_pct",
# 3-year
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
]
# School listing columns
SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
SCHOOL_COLUMNS = [
"urn",
"school_name",
"local_authority",
"school_type",
"address",
"town",
"postcode",
]
# Local Authority code to name mapping (for fallback when LANAME column missing)
# Source: https://www.gov.uk/government/publications/local-authority-codes
LA_CODE_TO_NAME = {
# Inner London
201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
212: "Wandsworth", 213: "Westminster",
201: "City of London",
202: "Camden",
203: "Greenwich",
204: "Hackney",
205: "Hammersmith and Fulham",
206: "Islington",
207: "Kensington and Chelsea",
208: "Lambeth",
209: "Lewisham",
210: "Southwark",
211: "Tower Hamlets",
212: "Wandsworth",
213: "Westminster",
# Outer London
301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
301: "Barking and Dagenham",
302: "Barnet",
303: "Bexley",
304: "Brent",
305: "Bromley",
306: "Croydon",
307: "Ealing",
308: "Enfield",
309: "Haringey",
310: "Harrow",
311: "Havering",
312: "Hillingdon",
313: "Hounslow",
314: "Kingston upon Thames",
315: "Merton",
316: "Newham",
317: "Redbridge",
318: "Richmond upon Thames",
319: "Sutton",
320: "Waltham Forest",
# West Midlands
330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
330: "Birmingham",
331: "Coventry",
332: "Dudley",
333: "Sandwell",
334: "Solihull",
335: "Walsall",
336: "Wolverhampton",
# Merseyside
340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
340: "Knowsley",
341: "Liverpool",
342: "St. Helens",
343: "Sefton",
344: "Wirral",
# Greater Manchester
350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
350: "Bolton",
351: "Bury",
352: "Manchester",
353: "Oldham",
354: "Rochdale",
355: "Salford",
356: "Stockport",
357: "Tameside",
358: "Trafford",
359: "Wigan",
# South Yorkshire
370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
370: "Barnsley",
371: "Doncaster",
372: "Rotherham",
373: "Sheffield",
# West Yorkshire
380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
380: "Bradford",
381: "Calderdale",
382: "Kirklees",
383: "Leeds",
384: "Wakefield",
# Tyne and Wear
390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
393: "South Tyneside", 394: "Sunderland",
390: "Gateshead",
391: "Newcastle upon Tyne",
392: "North Tyneside",
393: "South Tyneside",
394: "Sunderland",
# Isles of Scilly
420: "Isles of Scilly",
# Unitary authorities (800+)
800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
812: "North East Lincolnshire", 813: "North Lincolnshire",
815: "North Yorkshire", 816: "York",
820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
823: "West Northamptonshire", 824: "North Northamptonshire",
825: "Buckinghamshire", 826: "Milton Keynes",
830: "Derbyshire", 831: "Derby",
835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836)
839: "Durham", 840: "Darlington",
841: "East Sussex", 845: "Brighton and Hove",
846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
855: "Leicestershire", 856: "Leicester", 857: "Rutland",
860: "Staffordshire", 861: "Stoke-on-Trent",
865: "Wiltshire", 866: "Swindon",
867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
870: "Reading", 871: "Slough", 872: "Wokingham",
873: "Cambridgeshire", 874: "Peterborough",
876: "Halton", 877: "Warrington",
878: "Devon", 879: "Plymouth", 880: "Torbay",
881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
884: "Herefordshire", 885: "Worcestershire",
886: "Kent", 887: "Medway",
888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
891: "Nottinghamshire", 892: "Nottingham",
893: "Shropshire", 894: "Telford and Wrekin",
895: "Cheshire East", 896: "Cheshire West and Chester",
800: "Bath and North East Somerset",
801: "Bristol, City of",
802: "North Somerset",
803: "South Gloucestershire",
805: "Hartlepool",
806: "Middlesbrough",
807: "Redcar and Cleveland",
808: "Stockton-on-Tees",
810: "Kingston Upon Hull, City of",
811: "East Riding of Yorkshire",
812: "North East Lincolnshire",
813: "North Lincolnshire",
815: "North Yorkshire",
816: "York",
820: "Bedford",
821: "Central Bedfordshire",
822: "Luton",
823: "West Northamptonshire",
824: "North Northamptonshire",
825: "Buckinghamshire",
826: "Milton Keynes",
830: "Derbyshire",
831: "Derby",
835: "Dorset",
836: "Bournemouth, Christchurch and Poole",
837: "Poole",
838: "Bournemouth", # Historic codes (merged into 836)
839: "Durham",
840: "Darlington",
841: "East Sussex",
845: "Brighton and Hove",
846: "Hampshire",
850: "Portsmouth",
851: "Southampton",
852: "Isle of Wight",
855: "Leicestershire",
856: "Leicester",
857: "Rutland",
860: "Staffordshire",
861: "Stoke-on-Trent",
865: "Wiltshire",
866: "Swindon",
867: "Bracknell Forest",
868: "Windsor and Maidenhead",
869: "West Berkshire",
870: "Reading",
871: "Slough",
872: "Wokingham",
873: "Cambridgeshire",
874: "Peterborough",
876: "Halton",
877: "Warrington",
878: "Devon",
879: "Plymouth",
880: "Torbay",
881: "Essex",
882: "Southend-on-Sea",
883: "Thurrock",
884: "Herefordshire",
885: "Worcestershire",
886: "Kent",
887: "Medway",
888: "Lancashire",
889: "Blackburn with Darwen",
890: "Blackpool",
891: "Nottinghamshire",
892: "Nottingham",
893: "Shropshire",
894: "Telford and Wrekin",
895: "Cheshire East",
896: "Cheshire West and Chester",
# County councils (900+)
908: "Cornwall", 909: "Cumbria",
916: "Gloucestershire", 919: "Hertfordshire",
921: "Norfolk", 925: "Lincolnshire",
908: "Cornwall",
909: "Cumbria",
916: "Gloucestershire",
919: "Hertfordshire",
921: "Norfolk",
925: "Lincolnshire",
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
928: "Northumberland", 929: "Oxfordshire",
931: "Somerset", 933: "Suffolk", 935: "Surrey",
936: "Warwickshire", 937: "West Sussex",
928: "Northumberland",
929: "Oxfordshire",
931: "Somerset",
933: "Suffolk",
935: "Surrey",
936: "Warwickshire",
937: "West Sussex",
# New authorities (2023 reorganization)
938: "Westmorland and Furness", 940: "Cumberland",
938: "Westmorland and Furness",
940: "Cumberland",
941: "North Yorkshire", # New unitary
942: "Somerset", # New unitary (replaced 931)
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
}

View File

@@ -465,12 +465,12 @@ function renderFeaturedSchools(schools) {
<div class="school-address">${escapeHtml(school.address || "")}</div>
<div class="school-stats">
<div class="stat">
<div class="stat-value">Primary</div>
<div class="stat-label">Phase</div>
<div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
<div class="stat-label">RWM Expected</div>
</div>
<div class="stat">
<div class="stat-value">KS2</div>
<div class="stat-label">Data</div>
<div class="stat-value">${school.total_pupils || "-"}</div>
<div class="stat-label">Pupils</div>
</div>
</div>
</div>
@@ -587,12 +587,12 @@ function renderSchools(schools) {
<div class="school-address">${escapeHtml(school.address || "")}</div>
<div class="school-stats">
<div class="stat">
<div class="stat-value">Primary</div>
<div class="stat-label">Phase</div>
<div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
<div class="stat-label">RWM Expected</div>
</div>
<div class="stat">
<div class="stat-value">KS2</div>
<div class="stat-label">Data</div>
<div class="stat-value">${school.total_pupils || "-"}</div>
<div class="stat-label">Pupils</div>
</div>
</div>
</div>