fixing data load

2026-01-06 22:06:59 +00:00
parent e601c499b6
commit 1a8ec670b9
3 changed files with 465 additions and 261 deletions
@@ -5,20 +5,25 @@ Uses real data from UK Government Compare School Performance downloads.
 """
 from contextlib import asynccontextmanager
 import pandas as pd
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from typing import Optional
 import pandas as pd
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 from .config import settings
 from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
 from .data_loader import (
-    load_school_data, clear_cache, geocode_single_postcode, 
+    clear_cache,
-    geocode_postcodes_bulk, haversine_distance, get_data_info as get_db_info
+    geocode_postcodes_bulk,
    geocode_single_postcode,
    haversine_distance,
    load_school_data,
 )
 from .data_loader import get_data_info as get_db_info
 from .database import init_db
 from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
 from .utils import clean_for_json
@@ -80,7 +85,9 @@ async def serve_rankings():
@app.get("/api/schools")
 async def get_schools(
    search: Optional[str] = Query(None, description="Search by school name"),
-    local_authority: Optional[str] = Query(None, description="Filter by local authority"),
+    local_authority: Optional[str] = Query(
        None, description="Filter by local authority"
    ),
    school_type: Optional[str] = Query(None, description="Filter by school type"),
    postcode: Optional[str] = Query(None, description="Search near postcode"),
    radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
@@ -103,13 +110,25 @@ async def get_schools(
        page_size = settings.default_page_size
    # Get unique schools (latest year data for each)
-    latest_year = df.groupby('urn')['year'].max().reset_index()
+    latest_year = df.groupby("urn")["year"].max().reset_index()
-    df_latest = df.merge(latest_year, on=['urn', 'year'])
+    df_latest = df.merge(latest_year, on=["urn", "year"])
-    # Include lat/long in columns for location search
+    # Include key result metrics for display on cards
-    location_cols = ['latitude', 'longitude']
+    location_cols = ["latitude", "longitude"]
-    available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
+    result_cols = [
-    schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
+        "year",
        "rwm_expected_pct",
        "reading_expected_pct",
        "writing_expected_pct",
        "maths_expected_pct",
        "total_pupils",
    ]
    available_cols = [
        c
        for c in SCHOOL_COLUMNS + location_cols + result_cols
        if c in df_latest.columns
    ]
    schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"])
    # Location-based search
    search_coords = None
@@ -120,44 +139,58 @@ async def get_schools(
            schools_df = schools_df.copy()
            # Geocode school postcodes on-demand if not already cached
-            if 'postcode' in schools_df.columns:
+            if "postcode" in schools_df.columns:
-                unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
+                unique_postcodes = schools_df["postcode"].dropna().unique().tolist()
                geocoded = geocode_postcodes_bulk(unique_postcodes)
                # Add lat/long from geocoded data
-                schools_df['latitude'] = schools_df['postcode'].apply(
+                schools_df["latitude"] = schools_df["postcode"].apply(
-                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
+                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0]
                    if pd.notna(pc)
                    else None
                )
-                schools_df['longitude'] = schools_df['postcode'].apply(
+                schools_df["longitude"] = schools_df["postcode"].apply(
-                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
+                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1]
                    if pd.notna(pc)
                    else None
                )
            # Filter by distance
            def calc_distance(row):
-                if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
+                if pd.isna(row.get("latitude")) or pd.isna(row.get("longitude")):
-                    return float('inf')
+                    return float("inf")
                return haversine_distance(
-                    search_coords[0], search_coords[1],
+                    search_coords[0],
-                    row['latitude'], row['longitude']
+                    search_coords[1],
                    row["latitude"],
                    row["longitude"],
                )
-            schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
+            schools_df["distance"] = schools_df.apply(calc_distance, axis=1)
-            schools_df = schools_df[schools_df['distance'] <= radius]
+            schools_df = schools_df[schools_df["distance"] <= radius]
-            schools_df = schools_df.sort_values('distance')
+            schools_df = schools_df.sort_values("distance")
    # Apply filters
    if search:
        search_lower = search.lower()
-        mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
+        mask = (
            schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
        )
        if "address" in schools_df.columns:
-            mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
+            mask = mask | schools_df["address"].str.lower().str.contains(
                search_lower, na=False
            )
        schools_df = schools_df[mask]
    if local_authority:
-        schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
+        schools_df = schools_df[
            schools_df["local_authority"].str.lower() == local_authority.lower()
        ]
    if school_type:
-        schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
+        schools_df = schools_df[
            schools_df["school_type"].str.lower() == school_type.lower()
        ]
    # Pagination
    total = len(schools_df)
@@ -166,9 +199,9 @@ async def get_schools(
    schools_df = schools_df.iloc[start_idx:end_idx]
    # Remove internal columns before sending
-    output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
+    output_cols = [c for c in schools_df.columns if c not in ["latitude", "longitude"]]
-    if 'distance' in schools_df.columns:
+    if "distance" in schools_df.columns:
-        output_cols.append('distance')
+        output_cols.append("distance")
    return {
        "schools": clean_for_json(schools_df[output_cols]),
@@ -176,7 +209,9 @@ async def get_schools(
        "page": page,
        "page_size": page_size,
        "total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
-        "search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
+        "search_location": {"postcode": postcode, "radius": radius}
        if search_coords
        else None,
    }
@@ -208,7 +243,7 @@ async def get_school_details(urn: int):
            "address": latest.get("address", ""),
            "phase": "Primary",
        },
-        "yearly_data": clean_for_json(school_data)
+        "yearly_data": clean_for_json(school_data),
    }
@@ -242,7 +277,7 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
                    "local_authority": latest.get("local_authority", ""),
                    "address": latest.get("address", ""),
                },
-                "yearly_data": clean_for_json(school_data)
+                "yearly_data": clean_for_json(school_data),
            }
    return {"comparison": result}
@@ -288,9 +323,13 @@ async def get_available_metrics():
@app.get("/api/rankings")
 async def get_rankings(
    metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
-    year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
+    year: Optional[int] = Query(
        None, description="Specific year (defaults to most recent)"
    ),
    limit: int = Query(20, ge=1, le=100, description="Number of schools to return"),
-    local_authority: Optional[str] = Query(None, description="Filter by local authority"),
+    local_authority: Optional[str] = Query(
        None, description="Filter by local authority"
    ),
 ):
    """Get primary school rankings by a specific KS2 metric."""
    df = load_school_data()
@@ -356,8 +395,14 @@ async def get_data_info():
        }
    years = [int(y) for y in sorted(df["year"].unique())]
-    schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
+    schools_per_year = {
-    la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
+        str(int(k)): int(v)
        for k, v in df.groupby("year")["urn"].nunique().to_dict().items()
    }
    la_counts = {
        str(k): int(v)
        for k, v in df["local_authority"].value_counts().to_dict().items()
    }
    return {
        "status": "loaded",
@@ -385,4 +430,5 @@ if settings.frontend_dir.exists():
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host=settings.host, port=settings.port)
@@ -5,94 +5,117 @@ Single source of truth for all data transformations.
 # Column name mappings from DfE CSV to API field names
 COLUMN_MAPPINGS = {
-    'URN': 'urn',
+    "URN": "urn",
-    'SCHNAME': 'school_name',
+    "SCHNAME": "school_name",
-    'ADDRESS1': 'address1',
+    "ADDRESS1": "address1",
-    'ADDRESS2': 'address2',
+    "ADDRESS2": "address2",
-    'TOWN': 'town',
+    "TOWN": "town",
-    'PCODE': 'postcode',
+    "PCODE": "postcode",
-    'NFTYPE': 'school_type_code',
+    "NFTYPE": "school_type_code",
-    'RELDENOM': 'religious_denomination',
+    "RELDENOM": "religious_denomination",
-    'AGERANGE': 'age_range',
+    "AGERANGE": "age_range",
-    'TOTPUPS': 'total_pupils',
+    "TOTPUPS": "total_pupils",
-    'TELIG': 'eligible_pupils',
+    "TELIG": "eligible_pupils",
    # Core KS2 metrics
-    'PTRWM_EXP': 'rwm_expected_pct',
+    "PTRWM_EXP": "rwm_expected_pct",
-    'PTRWM_HIGH': 'rwm_high_pct',
+    "PTRWM_HIGH": "rwm_high_pct",
-    'READPROG': 'reading_progress',
+    "READPROG": "reading_progress",
-    'WRITPROG': 'writing_progress',
+    "WRITPROG": "writing_progress",
-    'MATPROG': 'maths_progress',
+    "MATPROG": "maths_progress",
-    'PTREAD_EXP': 'reading_expected_pct',
+    "PTREAD_EXP": "reading_expected_pct",
-    'PTWRITTA_EXP': 'writing_expected_pct',
+    "PTWRITTA_EXP": "writing_expected_pct",
-    'PTMAT_EXP': 'maths_expected_pct',
+    "PTMAT_EXP": "maths_expected_pct",
-    'READ_AVERAGE': 'reading_avg_score',
+    "READ_AVERAGE": "reading_avg_score",
-    'MAT_AVERAGE': 'maths_avg_score',
+    "MAT_AVERAGE": "maths_avg_score",
-    'PTREAD_HIGH': 'reading_high_pct',
+    "PTREAD_HIGH": "reading_high_pct",
-    'PTWRITTA_HIGH': 'writing_high_pct',
+    "PTWRITTA_HIGH": "writing_high_pct",
-    'PTMAT_HIGH': 'maths_high_pct',
+    "PTMAT_HIGH": "maths_high_pct",
    # GPS (Grammar, Punctuation & Spelling)
-    'PTGPS_EXP': 'gps_expected_pct',
+    "PTGPS_EXP": "gps_expected_pct",
-    'PTGPS_HIGH': 'gps_high_pct',
+    "PTGPS_HIGH": "gps_high_pct",
-    'GPS_AVERAGE': 'gps_avg_score',
+    "GPS_AVERAGE": "gps_avg_score",
    # Science
-    'PTSCITA_EXP': 'science_expected_pct',
+    "PTSCITA_EXP": "science_expected_pct",
    # School context
-    'PTFSM6CLA1A': 'disadvantaged_pct',
+    "PTFSM6CLA1A": "disadvantaged_pct",
-    'PTEALGRP2': 'eal_pct',
+    "PTEALGRP2": "eal_pct",
-    'PSENELK': 'sen_support_pct',
+    "PSENELK": "sen_support_pct",
-    'PSENELE': 'sen_ehcp_pct',
+    "PSENELE": "sen_ehcp_pct",
-    'PTMOBN': 'stability_pct',
+    "PTMOBN": "stability_pct",
    # Gender breakdown
-    'PTRWM_EXP_B': 'rwm_expected_boys_pct',
+    "PTRWM_EXP_B": "rwm_expected_boys_pct",
-    'PTRWM_EXP_G': 'rwm_expected_girls_pct',
+    "PTRWM_EXP_G": "rwm_expected_girls_pct",
-    'PTRWM_HIGH_B': 'rwm_high_boys_pct',
+    "PTRWM_HIGH_B": "rwm_high_boys_pct",
-    'PTRWM_HIGH_G': 'rwm_high_girls_pct',
+    "PTRWM_HIGH_G": "rwm_high_girls_pct",
    # Disadvantaged performance
-    'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
+    "PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
-    'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
+    "PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
-    'DIFFN_RWM_EXP': 'disadvantaged_gap',
+    "DIFFN_RWM_EXP": "disadvantaged_gap",
    # 3-year averages
-    'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
+    "PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
-    'READ_AVERAGE_3YR': 'reading_avg_3yr',
+    "READ_AVERAGE_3YR": "reading_avg_3yr",
-    'MAT_AVERAGE_3YR': 'maths_avg_3yr',
+    "MAT_AVERAGE_3YR": "maths_avg_3yr",
 }
 # Numeric columns that need parsing
 NUMERIC_COLUMNS = [
    # Core metrics
-    'rwm_expected_pct', 'rwm_high_pct', 'reading_progress', 
+    "rwm_expected_pct",
-    'writing_progress', 'maths_progress', 'reading_expected_pct',
+    "rwm_high_pct",
-    'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
+    "reading_progress",
-    'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
+    "writing_progress",
    "maths_progress",
    "reading_expected_pct",
    "writing_expected_pct",
    "maths_expected_pct",
    "reading_avg_score",
    "maths_avg_score",
    "reading_high_pct",
    "writing_high_pct",
    "maths_high_pct",
    # GPS & Science
-    'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
+    "gps_expected_pct",
    "gps_high_pct",
    "gps_avg_score",
    "science_expected_pct",
    # School context
-    'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
+    "total_pupils",
-    'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
+    "eligible_pupils",
    "disadvantaged_pct",
    "eal_pct",
    "sen_support_pct",
    "sen_ehcp_pct",
    "stability_pct",
    # Gender breakdown
-    'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
+    "rwm_expected_boys_pct",
-    'rwm_high_boys_pct', 'rwm_high_girls_pct',
+    "rwm_expected_girls_pct",
    "rwm_high_boys_pct",
    "rwm_high_girls_pct",
    # Disadvantaged performance
-    'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
+    "rwm_expected_disadvantaged_pct",
    "rwm_expected_non_disadvantaged_pct",
    "disadvantaged_gap",
    # 3-year averages
-    'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
+    "rwm_expected_3yr_pct",
    "reading_avg_3yr",
    "maths_avg_3yr",
 ]
 # School type code to name mapping
 SCHOOL_TYPE_MAP = {
-    'AC': 'Academy',
+    "AC": "Academy",
-    'ACC': 'Academy Converter',
+    "ACC": "Academy Converter",
-    'ACS': 'Academy Sponsor Led',
+    "ACS": "Academy Sponsor Led",
-    'CY': 'Community School',
+    "CY": "Community School",
-    'VA': 'Voluntary Aided',
+    "VA": "Voluntary Aided",
-    'VC': 'Voluntary Controlled',
+    "VC": "Voluntary Controlled",
-    'FD': 'Foundation',
+    "FD": "Foundation",
-    'F': 'Foundation',
+    "F": "Foundation",
-    'FS': 'Free School',
+    "FS": "Free School",
 }
 # Special values to treat as null
-NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
+NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]
 # KS2 Metric definitions - single source of truth
 # Used by both backend API and frontend
@@ -103,42 +126,42 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM %",
        "description": "% meeting expected standard in reading, writing and maths",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "reading_expected_pct": {
        "name": "Reading Expected %",
        "short_name": "Reading %",
        "description": "% meeting expected standard in reading",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "writing_expected_pct": {
        "name": "Writing Expected %",
        "short_name": "Writing %",
        "description": "% meeting expected standard in writing",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "maths_expected_pct": {
        "name": "Maths Expected %",
        "short_name": "Maths %",
        "description": "% meeting expected standard in maths",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "gps_expected_pct": {
        "name": "GPS Expected %",
        "short_name": "GPS %",
        "description": "% meeting expected standard in grammar, punctuation & spelling",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "science_expected_pct": {
        "name": "Science Expected %",
        "short_name": "Science %",
        "description": "% meeting expected standard in science",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    # Higher Standard
    "rwm_high_pct": {
@@ -146,35 +169,35 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM Higher %",
        "description": "% achieving higher standard in RWM combined",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "reading_high_pct": {
        "name": "Reading Higher %",
        "short_name": "Reading Higher %",
        "description": "% achieving higher standard in reading",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "writing_high_pct": {
        "name": "Writing Higher %",
        "short_name": "Writing Higher %",
        "description": "% achieving greater depth in writing",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "maths_high_pct": {
        "name": "Maths Higher %",
        "short_name": "Maths Higher %",
        "description": "% achieving higher standard in maths",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "gps_high_pct": {
        "name": "GPS Higher %",
        "short_name": "GPS Higher %",
        "description": "% achieving higher standard in GPS",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    # Progress Scores
    "reading_progress": {
@@ -182,21 +205,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Reading Progress",
        "description": "Progress in reading from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    "writing_progress": {
        "name": "Writing Progress",
        "short_name": "Writing Progress",
        "description": "Progress in writing from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    "maths_progress": {
        "name": "Maths Progress",
        "short_name": "Maths Progress",
        "description": "Progress in maths from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    # Average Scores
    "reading_avg_score": {
@@ -204,21 +227,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Reading Avg",
        "description": "Average scaled score in reading",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    "maths_avg_score": {
        "name": "Maths Average Score",
        "short_name": "Maths Avg",
        "description": "Average scaled score in maths",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    "gps_avg_score": {
        "name": "GPS Average Score",
        "short_name": "GPS Avg",
        "description": "Average scaled score in GPS",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    # Gender Performance
    "rwm_expected_boys_pct": {
@@ -226,28 +249,28 @@ METRIC_DEFINITIONS = {
        "short_name": "Boys RWM %",
        "description": "% of boys meeting expected standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_expected_girls_pct": {
        "name": "RWM Expected % (Girls)",
        "short_name": "Girls RWM %",
        "description": "% of girls meeting expected standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_high_boys_pct": {
        "name": "RWM Higher % (Boys)",
        "short_name": "Boys Higher %",
        "description": "% of boys at higher standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_high_girls_pct": {
        "name": "RWM Higher % (Girls)",
        "short_name": "Girls Higher %",
        "description": "% of girls at higher standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    # Disadvantaged Performance
    "rwm_expected_disadvantaged_pct": {
@@ -255,21 +278,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Disadvantaged %",
        "description": "% of disadvantaged pupils meeting expected",
        "type": "percentage",
-        "category": "equity"
+        "category": "equity",
    },
    "rwm_expected_non_disadvantaged_pct": {
        "name": "RWM Expected % (Non-Disadvantaged)",
        "short_name": "Non-Disadv %",
        "description": "% of non-disadvantaged pupils meeting expected",
        "type": "percentage",
-        "category": "equity"
+        "category": "equity",
    },
    "disadvantaged_gap": {
        "name": "Disadvantaged Gap",
        "short_name": "Disadv Gap",
        "description": "Gap between disadvantaged and national non-disadvantaged",
        "type": "score",
-        "category": "equity"
+        "category": "equity",
    },
    # School Context
    "disadvantaged_pct": {
@@ -277,28 +300,28 @@ METRIC_DEFINITIONS = {
        "short_name": "% Disadvantaged",
        "description": "% of pupils eligible for free school meals or looked after",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "eal_pct": {
        "name": "% EAL Pupils",
        "short_name": "% EAL",
        "description": "% of pupils with English as additional language",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "sen_support_pct": {
        "name": "% SEN Support",
        "short_name": "% SEN",
        "description": "% of pupils with SEN support",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "stability_pct": {
        "name": "% Pupil Stability",
        "short_name": "% Stable",
        "description": "% of non-mobile pupils (stayed at school)",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    # 3-Year Averages
    "rwm_expected_3yr_pct": {
@@ -306,122 +329,257 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM 3yr %",
        "description": "3-year average % meeting expected",
        "type": "percentage",
-        "category": "trends"
+        "category": "trends",
    },
    "reading_avg_3yr": {
        "name": "Reading Score (3-Year Avg)",
        "short_name": "Reading 3yr",
        "description": "3-year average reading score",
        "type": "score",
-        "category": "trends"
+        "category": "trends",
    },
    "maths_avg_3yr": {
        "name": "Maths Score (3-Year Avg)",
        "short_name": "Maths 3yr",
        "description": "3-year average maths score",
        "type": "score",
-        "category": "trends"
+        "category": "trends",
    },
 }
 # Ranking columns to include in rankings response
 RANKING_COLUMNS = [
-    "urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
+    "urn",
    "school_name",
    "local_authority",
    "school_type",
    "address",
    "year",
    "total_pupils",
    # Core expected
-    "rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
+    "rwm_expected_pct",
-    "gps_expected_pct", "science_expected_pct",
+    "reading_expected_pct",
    "writing_expected_pct",
    "maths_expected_pct",
    "gps_expected_pct",
    "science_expected_pct",
    # Core higher
-    "rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
+    "rwm_high_pct",
    "reading_high_pct",
    "writing_high_pct",
    "maths_high_pct",
    "gps_high_pct",
    # Progress & averages
-    "reading_progress", "writing_progress", "maths_progress",
+    "reading_progress",
-    "reading_avg_score", "maths_avg_score", "gps_avg_score",
+    "writing_progress",
    "maths_progress",
    "reading_avg_score",
    "maths_avg_score",
    "gps_avg_score",
    # Gender
-    "rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
+    "rwm_expected_boys_pct",
    "rwm_expected_girls_pct",
    "rwm_high_boys_pct",
    "rwm_high_girls_pct",
    # Equity
-    "rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
+    "rwm_expected_disadvantaged_pct",
    "rwm_expected_non_disadvantaged_pct",
    "disadvantaged_gap",
    # Context
-    "disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
+    "disadvantaged_pct",
    "eal_pct",
    "sen_support_pct",
    "stability_pct",
    # 3-year
-    "rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
+    "rwm_expected_3yr_pct",
    "reading_avg_3yr",
    "maths_avg_3yr",
 ]
 # School listing columns
-SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
+SCHOOL_COLUMNS = [
    "urn",
    "school_name",
    "local_authority",
    "school_type",
    "address",
    "town",
    "postcode",
 ]
 # Local Authority code to name mapping (for fallback when LANAME column missing)
 # Source: https://www.gov.uk/government/publications/local-authority-codes
 LA_CODE_TO_NAME = {
    # Inner London
-    201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
+    201: "City of London",
-    205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
+    202: "Camden",
-    208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
+    203: "Greenwich",
-    212: "Wandsworth", 213: "Westminster",
+    204: "Hackney",
    205: "Hammersmith and Fulham",
    206: "Islington",
    207: "Kensington and Chelsea",
    208: "Lambeth",
    209: "Lewisham",
    210: "Southwark",
    211: "Tower Hamlets",
    212: "Wandsworth",
    213: "Westminster",
    # Outer London
-    301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
+    301: "Barking and Dagenham",
-    305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
+    302: "Barnet",
-    310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
+    303: "Bexley",
-    314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
+    304: "Brent",
-    318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
+    305: "Bromley",
    306: "Croydon",
    307: "Ealing",
    308: "Enfield",
    309: "Haringey",
    310: "Harrow",
    311: "Havering",
    312: "Hillingdon",
    313: "Hounslow",
    314: "Kingston upon Thames",
    315: "Merton",
    316: "Newham",
    317: "Redbridge",
    318: "Richmond upon Thames",
    319: "Sutton",
    320: "Waltham Forest",
    # West Midlands
-    330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
+    330: "Birmingham",
-    334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
+    331: "Coventry",
    332: "Dudley",
    333: "Sandwell",
    334: "Solihull",
    335: "Walsall",
    336: "Wolverhampton",
    # Merseyside
-    340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
+    340: "Knowsley",
    341: "Liverpool",
    342: "St. Helens",
    343: "Sefton",
    344: "Wirral",
    # Greater Manchester
-    350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
+    350: "Bolton",
-    355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
+    351: "Bury",
    352: "Manchester",
    353: "Oldham",
    354: "Rochdale",
    355: "Salford",
    356: "Stockport",
    357: "Tameside",
    358: "Trafford",
    359: "Wigan",
    # South Yorkshire
-    370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
+    370: "Barnsley",
    371: "Doncaster",
    372: "Rotherham",
    373: "Sheffield",
    # West Yorkshire
-    380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
+    380: "Bradford",
    381: "Calderdale",
    382: "Kirklees",
    383: "Leeds",
    384: "Wakefield",
    # Tyne and Wear
-    390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
+    390: "Gateshead",
-    393: "South Tyneside", 394: "Sunderland",
+    391: "Newcastle upon Tyne",
    392: "North Tyneside",
    393: "South Tyneside",
    394: "Sunderland",
    # Isles of Scilly
    420: "Isles of Scilly",
    # Unitary authorities (800+)
-    800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
+    800: "Bath and North East Somerset",
-    803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
+    801: "Bristol, City of",
-    807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
+    802: "North Somerset",
-    810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
+    803: "South Gloucestershire",
-    812: "North East Lincolnshire", 813: "North Lincolnshire",
+    805: "Hartlepool",
-    815: "North Yorkshire", 816: "York",
+    806: "Middlesbrough",
-    820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
+    807: "Redcar and Cleveland",
-    823: "West Northamptonshire", 824: "North Northamptonshire",
+    808: "Stockton-on-Tees",
-    825: "Buckinghamshire", 826: "Milton Keynes",
+    810: "Kingston Upon Hull, City of",
-    830: "Derbyshire", 831: "Derby",
+    811: "East Riding of Yorkshire",
-    835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
+    812: "North East Lincolnshire",
-    837: "Poole", 838: "Bournemouth",  # Historic codes (merged into 836)
+    813: "North Lincolnshire",
-    839: "Durham", 840: "Darlington",
+    815: "North Yorkshire",
-    841: "East Sussex", 845: "Brighton and Hove",
+    816: "York",
-    846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
+    820: "Bedford",
-    855: "Leicestershire", 856: "Leicester", 857: "Rutland",
+    821: "Central Bedfordshire",
-    860: "Staffordshire", 861: "Stoke-on-Trent",
+    822: "Luton",
-    865: "Wiltshire", 866: "Swindon",
+    823: "West Northamptonshire",
-    867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
+    824: "North Northamptonshire",
-    870: "Reading", 871: "Slough", 872: "Wokingham",
+    825: "Buckinghamshire",
-    873: "Cambridgeshire", 874: "Peterborough",
+    826: "Milton Keynes",
-    876: "Halton", 877: "Warrington",
+    830: "Derbyshire",
-    878: "Devon", 879: "Plymouth", 880: "Torbay",
+    831: "Derby",
-    881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
+    835: "Dorset",
-    884: "Herefordshire", 885: "Worcestershire",
+    836: "Bournemouth, Christchurch and Poole",
-    886: "Kent", 887: "Medway",
+    837: "Poole",
-    888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
+    838: "Bournemouth",  # Historic codes (merged into 836)
-    891: "Nottinghamshire", 892: "Nottingham",
+    839: "Durham",
-    893: "Shropshire", 894: "Telford and Wrekin",
+    840: "Darlington",
-    895: "Cheshire East", 896: "Cheshire West and Chester",
+    841: "East Sussex",
    845: "Brighton and Hove",
    846: "Hampshire",
    850: "Portsmouth",
    851: "Southampton",
    852: "Isle of Wight",
    855: "Leicestershire",
    856: "Leicester",
    857: "Rutland",
    860: "Staffordshire",
    861: "Stoke-on-Trent",
    865: "Wiltshire",
    866: "Swindon",
    867: "Bracknell Forest",
    868: "Windsor and Maidenhead",
    869: "West Berkshire",
    870: "Reading",
    871: "Slough",
    872: "Wokingham",
    873: "Cambridgeshire",
    874: "Peterborough",
    876: "Halton",
    877: "Warrington",
    878: "Devon",
    879: "Plymouth",
    880: "Torbay",
    881: "Essex",
    882: "Southend-on-Sea",
    883: "Thurrock",
    884: "Herefordshire",
    885: "Worcestershire",
    886: "Kent",
    887: "Medway",
    888: "Lancashire",
    889: "Blackburn with Darwen",
    890: "Blackpool",
    891: "Nottinghamshire",
    892: "Nottingham",
    893: "Shropshire",
    894: "Telford and Wrekin",
    895: "Cheshire East",
    896: "Cheshire West and Chester",
    # County councils (900+)
-    908: "Cornwall", 909: "Cumbria",
+    908: "Cornwall",
-    916: "Gloucestershire", 919: "Hertfordshire",
+    909: "Cumbria",
-    921: "Norfolk", 925: "Lincolnshire",
+    916: "Gloucestershire",
    919: "Hertfordshire",
    921: "Norfolk",
    925: "Lincolnshire",
    926: "Northamptonshire",  # Historic (split into 823/824 in 2021)
-    928: "Northumberland", 929: "Oxfordshire",
+    928: "Northumberland",
-    931: "Somerset", 933: "Suffolk", 935: "Surrey",
+    929: "Oxfordshire",
-    936: "Warwickshire", 937: "West Sussex",
+    931: "Somerset",
    933: "Suffolk",
    935: "Surrey",
    936: "Warwickshire",
    937: "West Sussex",
    # New authorities (2023 reorganization)
-    938: "Westmorland and Furness", 940: "Cumberland",
+    938: "Westmorland and Furness",
    940: "Cumberland",
    941: "North Yorkshire",  # New unitary
    942: "Somerset",  # New unitary (replaced 931)
    943: "Buckinghamshire",  # New unitary (2020, replacing 825 in some datasets)
 }
@@ -465,12 +465,12 @@ function renderFeaturedSchools(schools) {
                <div class="school-address">${escapeHtml(school.address || "")}</div>
                <div class="school-stats">
                    <div class="stat">
-                        <div class="stat-value">Primary</div>
+                        <div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
-                        <div class="stat-label">Phase</div>
+                        <div class="stat-label">RWM Expected</div>
                    </div>
                    <div class="stat">
-                        <div class="stat-value">KS2</div>
+                        <div class="stat-value">${school.total_pupils || "-"}</div>
-                        <div class="stat-label">Data</div>
+                        <div class="stat-label">Pupils</div>
                    </div>
                </div>
            </div>
@@ -587,12 +587,12 @@ function renderSchools(schools) {
                <div class="school-address">${escapeHtml(school.address || "")}</div>
                <div class="school-stats">
                    <div class="stat">
-                        <div class="stat-value">Primary</div>
+                        <div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
-                        <div class="stat-label">Phase</div>
+                        <div class="stat-label">RWM Expected</div>
                    </div>
                    <div class="stat">
-                        <div class="stat-value">KS2</div>
+                        <div class="stat-value">${school.total_pupils || "-"}</div>
-                        <div class="stat-label">Data</div>
+                        <div class="stat-label">Pupils</div>
                    </div>
                </div>
            </div>