fixing data load

2026-01-06 22:06:59 +00:00
parent e601c499b6
commit 1a8ec670b9
3 changed files with 465 additions and 261 deletions
@@ -5,20 +5,25 @@ Uses real data from UK Government Compare School Performance downloads.
 """

 from contextlib import asynccontextmanager
-import pandas as pd
-from fastapi import FastAPI, HTTPException, Query
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import FileResponse
-from fastapi.middleware.cors import CORSMiddleware
 from typing import Optional

+import pandas as pd
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+
 from .config import settings
-from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
 from .data_loader import (
-    load_school_data, clear_cache, geocode_single_postcode, 
-    geocode_postcodes_bulk, haversine_distance, get_data_info as get_db_info
+    clear_cache,
+    geocode_postcodes_bulk,
+    geocode_single_postcode,
+    haversine_distance,
+    load_school_data,
 )
+from .data_loader import get_data_info as get_db_info
 from .database import init_db
+from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
 from .utils import clean_for_json


@@ -28,16 +33,16 @@ async def lifespan(app: FastAPI):
    # Startup: initialize database and pre-load data
    print("Starting up: Initializing database...")
    init_db()  # Ensure tables exist
-    
+
    print("Loading school data from database...")
    df = load_school_data()
    if df.empty:
        print("Warning: No data in database. Run the migration script to import data.")
    else:
        print("Data loaded successfully.")
-    
+
    yield  # Application runs here
-    
+
    # Shutdown: cleanup if needed
    print("Shutting down...")

@@ -80,7 +85,9 @@ async def serve_rankings():
@app.get("/api/schools")
 async def get_schools(
    search: Optional[str] = Query(None, description="Search by school name"),
-    local_authority: Optional[str] = Query(None, description="Filter by local authority"),
+    local_authority: Optional[str] = Query(
+        None, description="Filter by local authority"
+    ),
    school_type: Optional[str] = Query(None, description="Filter by school type"),
    postcode: Optional[str] = Query(None, description="Search near postcode"),
    radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
@@ -89,28 +96,40 @@ async def get_schools(
 ):
    """
    Get list of unique primary schools with pagination.
-    
+
    Returns paginated results with total count for efficient loading.
    Supports location-based search using postcode.
    """
    df = load_school_data()
-    
+
    if df.empty:
        return {"schools": [], "total": 0, "page": page, "page_size": 0}
-    
+
    # Use configured default if not specified
    if page_size is None:
        page_size = settings.default_page_size
-    
+
    # Get unique schools (latest year data for each)
-    latest_year = df.groupby('urn')['year'].max().reset_index()
-    df_latest = df.merge(latest_year, on=['urn', 'year'])
-    
-    # Include lat/long in columns for location search
-    location_cols = ['latitude', 'longitude']
-    available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
-    schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
-    
+    latest_year = df.groupby("urn")["year"].max().reset_index()
+    df_latest = df.merge(latest_year, on=["urn", "year"])
+
+    # Include key result metrics for display on cards
+    location_cols = ["latitude", "longitude"]
+    result_cols = [
+        "year",
+        "rwm_expected_pct",
+        "reading_expected_pct",
+        "writing_expected_pct",
+        "maths_expected_pct",
+        "total_pupils",
+    ]
+    available_cols = [
+        c
+        for c in SCHOOL_COLUMNS + location_cols + result_cols
+        if c in df_latest.columns
+    ]
+    schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"])
+
    # Location-based search
    search_coords = None
    if postcode:
@@ -118,65 +137,81 @@ async def get_schools(
        if coords:
            search_coords = coords
            schools_df = schools_df.copy()
-            
+
            # Geocode school postcodes on-demand if not already cached
-            if 'postcode' in schools_df.columns:
-                unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
+            if "postcode" in schools_df.columns:
+                unique_postcodes = schools_df["postcode"].dropna().unique().tolist()
                geocoded = geocode_postcodes_bulk(unique_postcodes)
-                
+
                # Add lat/long from geocoded data
-                schools_df['latitude'] = schools_df['postcode'].apply(
-                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
+                schools_df["latitude"] = schools_df["postcode"].apply(
+                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0]
+                    if pd.notna(pc)
+                    else None
                )
-                schools_df['longitude'] = schools_df['postcode'].apply(
-                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
+                schools_df["longitude"] = schools_df["postcode"].apply(
+                    lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1]
+                    if pd.notna(pc)
+                    else None
                )
-            
+
            # Filter by distance
            def calc_distance(row):
-                if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
-                    return float('inf')
+                if pd.isna(row.get("latitude")) or pd.isna(row.get("longitude")):
+                    return float("inf")
                return haversine_distance(
-                    search_coords[0], search_coords[1],
-                    row['latitude'], row['longitude']
+                    search_coords[0],
+                    search_coords[1],
+                    row["latitude"],
+                    row["longitude"],
                )
-            
-            schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
-            schools_df = schools_df[schools_df['distance'] <= radius]
-            schools_df = schools_df.sort_values('distance')
-    
+
+            schools_df["distance"] = schools_df.apply(calc_distance, axis=1)
+            schools_df = schools_df[schools_df["distance"] <= radius]
+            schools_df = schools_df.sort_values("distance")
+
    # Apply filters
    if search:
        search_lower = search.lower()
-        mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
+        mask = (
+            schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
+        )
        if "address" in schools_df.columns:
-            mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
+            mask = mask | schools_df["address"].str.lower().str.contains(
+                search_lower, na=False
+            )
        schools_df = schools_df[mask]
-    
+
    if local_authority:
-        schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
-    
+        schools_df = schools_df[
+            schools_df["local_authority"].str.lower() == local_authority.lower()
+        ]
+
    if school_type:
-        schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
-    
+        schools_df = schools_df[
+            schools_df["school_type"].str.lower() == school_type.lower()
+        ]
+
    # Pagination
    total = len(schools_df)
    start_idx = (page - 1) * page_size
    end_idx = start_idx + page_size
    schools_df = schools_df.iloc[start_idx:end_idx]
-    
+
    # Remove internal columns before sending
-    output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
-    if 'distance' in schools_df.columns:
-        output_cols.append('distance')
-    
+    output_cols = [c for c in schools_df.columns if c not in ["latitude", "longitude"]]
+    if "distance" in schools_df.columns:
+        output_cols.append("distance")
+
    return {
        "schools": clean_for_json(schools_df[output_cols]),
        "total": total,
        "page": page,
        "page_size": page_size,
        "total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
-        "search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
+        "search_location": {"postcode": postcode, "radius": radius}
+        if search_coords
+        else None,
    }


@@ -184,21 +219,21 @@ async def get_schools(
 async def get_school_details(urn: int):
    """Get detailed KS2 data for a specific primary school across all years."""
    df = load_school_data()
-    
+
    if df.empty:
        raise HTTPException(status_code=404, detail="No data available")
-    
+
    school_data = df[df["urn"] == urn]
-    
+
    if school_data.empty:
        raise HTTPException(status_code=404, detail="School not found")
-    
+
    # Sort by year
    school_data = school_data.sort_values("year")
-    
+
    # Get latest info for the school
    latest = school_data.iloc[-1]
-    
+
    return {
        "school_info": {
            "urn": urn,
@@ -208,7 +243,7 @@ async def get_school_details(urn: int):
            "address": latest.get("address", ""),
            "phase": "Primary",
        },
-        "yearly_data": clean_for_json(school_data)
+        "yearly_data": clean_for_json(school_data),
    }


@@ -216,20 +251,20 @@ async def get_school_details(urn: int):
 async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")):
    """Compare multiple primary schools side by side."""
    df = load_school_data()
-    
+
    if df.empty:
        raise HTTPException(status_code=404, detail="No data available")
-    
+
    try:
        urn_list = [int(u.strip()) for u in urns.split(",")]
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid URN format")
-    
+
    comparison_data = df[df["urn"].isin(urn_list)]
-    
+
    if comparison_data.empty:
        raise HTTPException(status_code=404, detail="No schools found")
-    
+
    result = {}
    for urn in urn_list:
        school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year")
@@ -242,9 +277,9 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
                    "local_authority": latest.get("local_authority", ""),
                    "address": latest.get("address", ""),
                },
-                "yearly_data": clean_for_json(school_data)
+                "yearly_data": clean_for_json(school_data),
            }
-    
+
    return {"comparison": result}


@@ -252,14 +287,14 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
 async def get_filter_options():
    """Get available filter options (local authorities, school types, years)."""
    df = load_school_data()
-    
+
    if df.empty:
        return {
            "local_authorities": [],
            "school_types": [],
            "years": [],
        }
-    
+
    return {
        "local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
        "school_types": sorted(df["school_type"].dropna().unique().tolist()),
@@ -271,36 +306,40 @@ async def get_filter_options():
 async def get_available_metrics():
    """
    Get list of available KS2 performance metrics for primary schools.
-    
+
    This is the single source of truth for metric definitions.
    Frontend should consume this to avoid duplication.
    """
    df = load_school_data()
-    
+
    available = []
    for key, info in METRIC_DEFINITIONS.items():
        if df.empty or key in df.columns:
            available.append({"key": key, **info})
-    
+
    return {"metrics": available}


@app.get("/api/rankings")
 async def get_rankings(
    metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
-    year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
+    year: Optional[int] = Query(
+        None, description="Specific year (defaults to most recent)"
+    ),
    limit: int = Query(20, ge=1, le=100, description="Number of schools to return"),
-    local_authority: Optional[str] = Query(None, description="Filter by local authority"),
+    local_authority: Optional[str] = Query(
+        None, description="Filter by local authority"
+    ),
 ):
    """Get primary school rankings by a specific KS2 metric."""
    df = load_school_data()
-    
+
    if df.empty:
        return {"metric": metric, "year": None, "rankings": [], "total": 0}
-    
+
    if metric not in df.columns:
        raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available")
-    
+
    # Filter by year
    if year:
        df = df[df["year"] == year]
@@ -308,22 +347,22 @@ async def get_rankings(
        # Use most recent year
        max_year = df["year"].max()
        df = df[df["year"] == max_year]
-    
+
    # Filter by local authority if specified
    if local_authority:
        df = df[df["local_authority"].str.lower() == local_authority.lower()]
-    
+
    # Sort and rank (exclude rows with no data for this metric)
    df = df.dropna(subset=[metric])
    total = len(df)
-    
+
    # For progress scores, higher is better. For percentages, higher is also better.
    df = df.sort_values(metric, ascending=False).head(limit)
-    
+
    # Return only relevant fields for rankings
    available_cols = [c for c in RANKING_COLUMNS if c in df.columns]
    df = df[available_cols]
-    
+
    return {
        "metric": metric,
        "year": int(df["year"].iloc[0]) if not df.empty else None,
@@ -337,28 +376,34 @@ async def get_data_info():
    """Get information about loaded data."""
    # Get info directly from database
    db_info = get_db_info()
-    
+
    if db_info["total_schools"] == 0:
        return {
            "status": "no_data",
            "message": "No data in database. Run the migration script: python scripts/migrate_csv_to_db.py",
            "data_source": "PostgreSQL",
        }
-    
+
    # Also get DataFrame-based stats for backwards compatibility
    df = load_school_data()
-    
+
    if df.empty:
        return {
            "status": "no_data",
            "message": "No data available",
            "data_source": "PostgreSQL",
        }
-    
+
    years = [int(y) for y in sorted(df["year"].unique())]
-    schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
-    la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
-    
+    schools_per_year = {
+        str(int(k)): int(v)
+        for k, v in df.groupby("year")["urn"].nunique().to_dict().items()
+    }
+    la_counts = {
+        str(k): int(v)
+        for k, v in df["local_authority"].value_counts().to_dict().items()
+    }
+
    return {
        "status": "loaded",
        "data_source": "PostgreSQL",
@@ -385,4 +430,5 @@ if settings.frontend_dir.exists():

 if __name__ == "__main__":
    import uvicorn
+
    uvicorn.run(app, host=settings.host, port=settings.port)
@@ -5,94 +5,117 @@ Single source of truth for all data transformations.

 # Column name mappings from DfE CSV to API field names
 COLUMN_MAPPINGS = {
-    'URN': 'urn',
-    'SCHNAME': 'school_name',
-    'ADDRESS1': 'address1',
-    'ADDRESS2': 'address2',
-    'TOWN': 'town',
-    'PCODE': 'postcode',
-    'NFTYPE': 'school_type_code',
-    'RELDENOM': 'religious_denomination',
-    'AGERANGE': 'age_range',
-    'TOTPUPS': 'total_pupils',
-    'TELIG': 'eligible_pupils',
+    "URN": "urn",
+    "SCHNAME": "school_name",
+    "ADDRESS1": "address1",
+    "ADDRESS2": "address2",
+    "TOWN": "town",
+    "PCODE": "postcode",
+    "NFTYPE": "school_type_code",
+    "RELDENOM": "religious_denomination",
+    "AGERANGE": "age_range",
+    "TOTPUPS": "total_pupils",
+    "TELIG": "eligible_pupils",
    # Core KS2 metrics
-    'PTRWM_EXP': 'rwm_expected_pct',
-    'PTRWM_HIGH': 'rwm_high_pct',
-    'READPROG': 'reading_progress',
-    'WRITPROG': 'writing_progress',
-    'MATPROG': 'maths_progress',
-    'PTREAD_EXP': 'reading_expected_pct',
-    'PTWRITTA_EXP': 'writing_expected_pct',
-    'PTMAT_EXP': 'maths_expected_pct',
-    'READ_AVERAGE': 'reading_avg_score',
-    'MAT_AVERAGE': 'maths_avg_score',
-    'PTREAD_HIGH': 'reading_high_pct',
-    'PTWRITTA_HIGH': 'writing_high_pct',
-    'PTMAT_HIGH': 'maths_high_pct',
+    "PTRWM_EXP": "rwm_expected_pct",
+    "PTRWM_HIGH": "rwm_high_pct",
+    "READPROG": "reading_progress",
+    "WRITPROG": "writing_progress",
+    "MATPROG": "maths_progress",
+    "PTREAD_EXP": "reading_expected_pct",
+    "PTWRITTA_EXP": "writing_expected_pct",
+    "PTMAT_EXP": "maths_expected_pct",
+    "READ_AVERAGE": "reading_avg_score",
+    "MAT_AVERAGE": "maths_avg_score",
+    "PTREAD_HIGH": "reading_high_pct",
+    "PTWRITTA_HIGH": "writing_high_pct",
+    "PTMAT_HIGH": "maths_high_pct",
    # GPS (Grammar, Punctuation & Spelling)
-    'PTGPS_EXP': 'gps_expected_pct',
-    'PTGPS_HIGH': 'gps_high_pct',
-    'GPS_AVERAGE': 'gps_avg_score',
+    "PTGPS_EXP": "gps_expected_pct",
+    "PTGPS_HIGH": "gps_high_pct",
+    "GPS_AVERAGE": "gps_avg_score",
    # Science
-    'PTSCITA_EXP': 'science_expected_pct',
+    "PTSCITA_EXP": "science_expected_pct",
    # School context
-    'PTFSM6CLA1A': 'disadvantaged_pct',
-    'PTEALGRP2': 'eal_pct',
-    'PSENELK': 'sen_support_pct',
-    'PSENELE': 'sen_ehcp_pct',
-    'PTMOBN': 'stability_pct',
+    "PTFSM6CLA1A": "disadvantaged_pct",
+    "PTEALGRP2": "eal_pct",
+    "PSENELK": "sen_support_pct",
+    "PSENELE": "sen_ehcp_pct",
+    "PTMOBN": "stability_pct",
    # Gender breakdown
-    'PTRWM_EXP_B': 'rwm_expected_boys_pct',
-    'PTRWM_EXP_G': 'rwm_expected_girls_pct',
-    'PTRWM_HIGH_B': 'rwm_high_boys_pct',
-    'PTRWM_HIGH_G': 'rwm_high_girls_pct',
+    "PTRWM_EXP_B": "rwm_expected_boys_pct",
+    "PTRWM_EXP_G": "rwm_expected_girls_pct",
+    "PTRWM_HIGH_B": "rwm_high_boys_pct",
+    "PTRWM_HIGH_G": "rwm_high_girls_pct",
    # Disadvantaged performance
-    'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
-    'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
-    'DIFFN_RWM_EXP': 'disadvantaged_gap',
+    "PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
+    "PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
+    "DIFFN_RWM_EXP": "disadvantaged_gap",
    # 3-year averages
-    'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
-    'READ_AVERAGE_3YR': 'reading_avg_3yr',
-    'MAT_AVERAGE_3YR': 'maths_avg_3yr',
+    "PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
+    "READ_AVERAGE_3YR": "reading_avg_3yr",
+    "MAT_AVERAGE_3YR": "maths_avg_3yr",
 }

 # Numeric columns that need parsing
 NUMERIC_COLUMNS = [
    # Core metrics
-    'rwm_expected_pct', 'rwm_high_pct', 'reading_progress', 
-    'writing_progress', 'maths_progress', 'reading_expected_pct',
-    'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
-    'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
+    "rwm_expected_pct",
+    "rwm_high_pct",
+    "reading_progress",
+    "writing_progress",
+    "maths_progress",
+    "reading_expected_pct",
+    "writing_expected_pct",
+    "maths_expected_pct",
+    "reading_avg_score",
+    "maths_avg_score",
+    "reading_high_pct",
+    "writing_high_pct",
+    "maths_high_pct",
    # GPS & Science
-    'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
+    "gps_expected_pct",
+    "gps_high_pct",
+    "gps_avg_score",
+    "science_expected_pct",
    # School context
-    'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
-    'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
+    "total_pupils",
+    "eligible_pupils",
+    "disadvantaged_pct",
+    "eal_pct",
+    "sen_support_pct",
+    "sen_ehcp_pct",
+    "stability_pct",
    # Gender breakdown
-    'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
-    'rwm_high_boys_pct', 'rwm_high_girls_pct',
+    "rwm_expected_boys_pct",
+    "rwm_expected_girls_pct",
+    "rwm_high_boys_pct",
+    "rwm_high_girls_pct",
    # Disadvantaged performance
-    'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
+    "rwm_expected_disadvantaged_pct",
+    "rwm_expected_non_disadvantaged_pct",
+    "disadvantaged_gap",
    # 3-year averages
-    'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
+    "rwm_expected_3yr_pct",
+    "reading_avg_3yr",
+    "maths_avg_3yr",
 ]

 # School type code to name mapping
 SCHOOL_TYPE_MAP = {
-    'AC': 'Academy',
-    'ACC': 'Academy Converter',
-    'ACS': 'Academy Sponsor Led',
-    'CY': 'Community School',
-    'VA': 'Voluntary Aided',
-    'VC': 'Voluntary Controlled',
-    'FD': 'Foundation',
-    'F': 'Foundation',
-    'FS': 'Free School',
+    "AC": "Academy",
+    "ACC": "Academy Converter",
+    "ACS": "Academy Sponsor Led",
+    "CY": "Community School",
+    "VA": "Voluntary Aided",
+    "VC": "Voluntary Controlled",
+    "FD": "Foundation",
+    "F": "Foundation",
+    "FS": "Free School",
 }

 # Special values to treat as null
-NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
+NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]

 # KS2 Metric definitions - single source of truth
 # Used by both backend API and frontend
@@ -103,42 +126,42 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM %",
        "description": "% meeting expected standard in reading, writing and maths",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "reading_expected_pct": {
        "name": "Reading Expected %",
        "short_name": "Reading %",
        "description": "% meeting expected standard in reading",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "writing_expected_pct": {
        "name": "Writing Expected %",
        "short_name": "Writing %",
        "description": "% meeting expected standard in writing",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "maths_expected_pct": {
        "name": "Maths Expected %",
        "short_name": "Maths %",
        "description": "% meeting expected standard in maths",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "gps_expected_pct": {
        "name": "GPS Expected %",
        "short_name": "GPS %",
        "description": "% meeting expected standard in grammar, punctuation & spelling",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    "science_expected_pct": {
        "name": "Science Expected %",
        "short_name": "Science %",
        "description": "% meeting expected standard in science",
        "type": "percentage",
-        "category": "expected"
+        "category": "expected",
    },
    # Higher Standard
    "rwm_high_pct": {
@@ -146,35 +169,35 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM Higher %",
        "description": "% achieving higher standard in RWM combined",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "reading_high_pct": {
        "name": "Reading Higher %",
        "short_name": "Reading Higher %",
        "description": "% achieving higher standard in reading",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "writing_high_pct": {
        "name": "Writing Higher %",
        "short_name": "Writing Higher %",
        "description": "% achieving greater depth in writing",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "maths_high_pct": {
        "name": "Maths Higher %",
        "short_name": "Maths Higher %",
        "description": "% achieving higher standard in maths",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    "gps_high_pct": {
        "name": "GPS Higher %",
        "short_name": "GPS Higher %",
        "description": "% achieving higher standard in GPS",
        "type": "percentage",
-        "category": "higher"
+        "category": "higher",
    },
    # Progress Scores
    "reading_progress": {
@@ -182,21 +205,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Reading Progress",
        "description": "Progress in reading from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    "writing_progress": {
        "name": "Writing Progress",
        "short_name": "Writing Progress",
        "description": "Progress in writing from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    "maths_progress": {
        "name": "Maths Progress",
        "short_name": "Maths Progress",
        "description": "Progress in maths from KS1 to KS2",
        "type": "score",
-        "category": "progress"
+        "category": "progress",
    },
    # Average Scores
    "reading_avg_score": {
@@ -204,21 +227,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Reading Avg",
        "description": "Average scaled score in reading",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    "maths_avg_score": {
        "name": "Maths Average Score",
        "short_name": "Maths Avg",
        "description": "Average scaled score in maths",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    "gps_avg_score": {
        "name": "GPS Average Score",
        "short_name": "GPS Avg",
        "description": "Average scaled score in GPS",
        "type": "score",
-        "category": "average"
+        "category": "average",
    },
    # Gender Performance
    "rwm_expected_boys_pct": {
@@ -226,28 +249,28 @@ METRIC_DEFINITIONS = {
        "short_name": "Boys RWM %",
        "description": "% of boys meeting expected standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_expected_girls_pct": {
        "name": "RWM Expected % (Girls)",
        "short_name": "Girls RWM %",
        "description": "% of girls meeting expected standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_high_boys_pct": {
        "name": "RWM Higher % (Boys)",
        "short_name": "Boys Higher %",
        "description": "% of boys at higher standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    "rwm_high_girls_pct": {
        "name": "RWM Higher % (Girls)",
        "short_name": "Girls Higher %",
        "description": "% of girls at higher standard",
        "type": "percentage",
-        "category": "gender"
+        "category": "gender",
    },
    # Disadvantaged Performance
    "rwm_expected_disadvantaged_pct": {
@@ -255,21 +278,21 @@ METRIC_DEFINITIONS = {
        "short_name": "Disadvantaged %",
        "description": "% of disadvantaged pupils meeting expected",
        "type": "percentage",
-        "category": "equity"
+        "category": "equity",
    },
    "rwm_expected_non_disadvantaged_pct": {
        "name": "RWM Expected % (Non-Disadvantaged)",
        "short_name": "Non-Disadv %",
        "description": "% of non-disadvantaged pupils meeting expected",
        "type": "percentage",
-        "category": "equity"
+        "category": "equity",
    },
    "disadvantaged_gap": {
        "name": "Disadvantaged Gap",
        "short_name": "Disadv Gap",
        "description": "Gap between disadvantaged and national non-disadvantaged",
        "type": "score",
-        "category": "equity"
+        "category": "equity",
    },
    # School Context
    "disadvantaged_pct": {
@@ -277,28 +300,28 @@ METRIC_DEFINITIONS = {
        "short_name": "% Disadvantaged",
        "description": "% of pupils eligible for free school meals or looked after",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "eal_pct": {
        "name": "% EAL Pupils",
        "short_name": "% EAL",
        "description": "% of pupils with English as additional language",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "sen_support_pct": {
        "name": "% SEN Support",
        "short_name": "% SEN",
        "description": "% of pupils with SEN support",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    "stability_pct": {
        "name": "% Pupil Stability",
        "short_name": "% Stable",
        "description": "% of non-mobile pupils (stayed at school)",
        "type": "percentage",
-        "category": "context"
+        "category": "context",
    },
    # 3-Year Averages
    "rwm_expected_3yr_pct": {
@@ -306,122 +329,257 @@ METRIC_DEFINITIONS = {
        "short_name": "RWM 3yr %",
        "description": "3-year average % meeting expected",
        "type": "percentage",
-        "category": "trends"
+        "category": "trends",
    },
    "reading_avg_3yr": {
        "name": "Reading Score (3-Year Avg)",
        "short_name": "Reading 3yr",
        "description": "3-year average reading score",
        "type": "score",
-        "category": "trends"
+        "category": "trends",
    },
    "maths_avg_3yr": {
        "name": "Maths Score (3-Year Avg)",
        "short_name": "Maths 3yr",
        "description": "3-year average maths score",
        "type": "score",
-        "category": "trends"
+        "category": "trends",
    },
 }

 # Ranking columns to include in rankings response
 RANKING_COLUMNS = [
-    "urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
+    "urn",
+    "school_name",
+    "local_authority",
+    "school_type",
+    "address",
+    "year",
+    "total_pupils",
    # Core expected
-    "rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
-    "gps_expected_pct", "science_expected_pct",
+    "rwm_expected_pct",
+    "reading_expected_pct",
+    "writing_expected_pct",
+    "maths_expected_pct",
+    "gps_expected_pct",
+    "science_expected_pct",
    # Core higher
-    "rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
+    "rwm_high_pct",
+    "reading_high_pct",
+    "writing_high_pct",
+    "maths_high_pct",
+    "gps_high_pct",
    # Progress & averages
-    "reading_progress", "writing_progress", "maths_progress",
-    "reading_avg_score", "maths_avg_score", "gps_avg_score",
+    "reading_progress",
+    "writing_progress",
+    "maths_progress",
+    "reading_avg_score",
+    "maths_avg_score",
+    "gps_avg_score",
    # Gender
-    "rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
+    "rwm_expected_boys_pct",
+    "rwm_expected_girls_pct",
+    "rwm_high_boys_pct",
+    "rwm_high_girls_pct",
    # Equity
-    "rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
+    "rwm_expected_disadvantaged_pct",
+    "rwm_expected_non_disadvantaged_pct",
+    "disadvantaged_gap",
    # Context
-    "disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
+    "disadvantaged_pct",
+    "eal_pct",
+    "sen_support_pct",
+    "stability_pct",
    # 3-year
-    "rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
+    "rwm_expected_3yr_pct",
+    "reading_avg_3yr",
+    "maths_avg_3yr",
 ]

 # School listing columns
-SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
+SCHOOL_COLUMNS = [
+    "urn",
+    "school_name",
+    "local_authority",
+    "school_type",
+    "address",
+    "town",
+    "postcode",
+]

 # Local Authority code to name mapping (for fallback when LANAME column missing)
 # Source: https://www.gov.uk/government/publications/local-authority-codes
 LA_CODE_TO_NAME = {
    # Inner London
-    201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
-    205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
-    208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
-    212: "Wandsworth", 213: "Westminster",
+    201: "City of London",
+    202: "Camden",
+    203: "Greenwich",
+    204: "Hackney",
+    205: "Hammersmith and Fulham",
+    206: "Islington",
+    207: "Kensington and Chelsea",
+    208: "Lambeth",
+    209: "Lewisham",
+    210: "Southwark",
+    211: "Tower Hamlets",
+    212: "Wandsworth",
+    213: "Westminster",
    # Outer London
-    301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
-    305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
-    310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
-    314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
-    318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
+    301: "Barking and Dagenham",
+    302: "Barnet",
+    303: "Bexley",
+    304: "Brent",
+    305: "Bromley",
+    306: "Croydon",
+    307: "Ealing",
+    308: "Enfield",
+    309: "Haringey",
+    310: "Harrow",
+    311: "Havering",
+    312: "Hillingdon",
+    313: "Hounslow",
+    314: "Kingston upon Thames",
+    315: "Merton",
+    316: "Newham",
+    317: "Redbridge",
+    318: "Richmond upon Thames",
+    319: "Sutton",
+    320: "Waltham Forest",
    # West Midlands
-    330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
-    334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
+    330: "Birmingham",
+    331: "Coventry",
+    332: "Dudley",
+    333: "Sandwell",
+    334: "Solihull",
+    335: "Walsall",
+    336: "Wolverhampton",
    # Merseyside
-    340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
+    340: "Knowsley",
+    341: "Liverpool",
+    342: "St. Helens",
+    343: "Sefton",
+    344: "Wirral",
    # Greater Manchester
-    350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
-    355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
+    350: "Bolton",
+    351: "Bury",
+    352: "Manchester",
+    353: "Oldham",
+    354: "Rochdale",
+    355: "Salford",
+    356: "Stockport",
+    357: "Tameside",
+    358: "Trafford",
+    359: "Wigan",
    # South Yorkshire
-    370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
+    370: "Barnsley",
+    371: "Doncaster",
+    372: "Rotherham",
+    373: "Sheffield",
    # West Yorkshire
-    380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
+    380: "Bradford",
+    381: "Calderdale",
+    382: "Kirklees",
+    383: "Leeds",
+    384: "Wakefield",
    # Tyne and Wear
-    390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
-    393: "South Tyneside", 394: "Sunderland",
+    390: "Gateshead",
+    391: "Newcastle upon Tyne",
+    392: "North Tyneside",
+    393: "South Tyneside",
+    394: "Sunderland",
    # Isles of Scilly
    420: "Isles of Scilly",
    # Unitary authorities (800+)
-    800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
-    803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
-    807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
-    810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
-    812: "North East Lincolnshire", 813: "North Lincolnshire",
-    815: "North Yorkshire", 816: "York",
-    820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
-    823: "West Northamptonshire", 824: "North Northamptonshire",
-    825: "Buckinghamshire", 826: "Milton Keynes",
-    830: "Derbyshire", 831: "Derby",
-    835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
-    837: "Poole", 838: "Bournemouth",  # Historic codes (merged into 836)
-    839: "Durham", 840: "Darlington",
-    841: "East Sussex", 845: "Brighton and Hove",
-    846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
-    855: "Leicestershire", 856: "Leicester", 857: "Rutland",
-    860: "Staffordshire", 861: "Stoke-on-Trent",
-    865: "Wiltshire", 866: "Swindon",
-    867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
-    870: "Reading", 871: "Slough", 872: "Wokingham",
-    873: "Cambridgeshire", 874: "Peterborough",
-    876: "Halton", 877: "Warrington",
-    878: "Devon", 879: "Plymouth", 880: "Torbay",
-    881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
-    884: "Herefordshire", 885: "Worcestershire",
-    886: "Kent", 887: "Medway",
-    888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
-    891: "Nottinghamshire", 892: "Nottingham",
-    893: "Shropshire", 894: "Telford and Wrekin",
-    895: "Cheshire East", 896: "Cheshire West and Chester",
+    800: "Bath and North East Somerset",
+    801: "Bristol, City of",
+    802: "North Somerset",
+    803: "South Gloucestershire",
+    805: "Hartlepool",
+    806: "Middlesbrough",
+    807: "Redcar and Cleveland",
+    808: "Stockton-on-Tees",
+    810: "Kingston Upon Hull, City of",
+    811: "East Riding of Yorkshire",
+    812: "North East Lincolnshire",
+    813: "North Lincolnshire",
+    815: "North Yorkshire",
+    816: "York",
+    820: "Bedford",
+    821: "Central Bedfordshire",
+    822: "Luton",
+    823: "West Northamptonshire",
+    824: "North Northamptonshire",
+    825: "Buckinghamshire",
+    826: "Milton Keynes",
+    830: "Derbyshire",
+    831: "Derby",
+    835: "Dorset",
+    836: "Bournemouth, Christchurch and Poole",
+    837: "Poole",
+    838: "Bournemouth",  # Historic codes (merged into 836)
+    839: "Durham",
+    840: "Darlington",
+    841: "East Sussex",
+    845: "Brighton and Hove",
+    846: "Hampshire",
+    850: "Portsmouth",
+    851: "Southampton",
+    852: "Isle of Wight",
+    855: "Leicestershire",
+    856: "Leicester",
+    857: "Rutland",
+    860: "Staffordshire",
+    861: "Stoke-on-Trent",
+    865: "Wiltshire",
+    866: "Swindon",
+    867: "Bracknell Forest",
+    868: "Windsor and Maidenhead",
+    869: "West Berkshire",
+    870: "Reading",
+    871: "Slough",
+    872: "Wokingham",
+    873: "Cambridgeshire",
+    874: "Peterborough",
+    876: "Halton",
+    877: "Warrington",
+    878: "Devon",
+    879: "Plymouth",
+    880: "Torbay",
+    881: "Essex",
+    882: "Southend-on-Sea",
+    883: "Thurrock",
+    884: "Herefordshire",
+    885: "Worcestershire",
+    886: "Kent",
+    887: "Medway",
+    888: "Lancashire",
+    889: "Blackburn with Darwen",
+    890: "Blackpool",
+    891: "Nottinghamshire",
+    892: "Nottingham",
+    893: "Shropshire",
+    894: "Telford and Wrekin",
+    895: "Cheshire East",
+    896: "Cheshire West and Chester",
    # County councils (900+)
-    908: "Cornwall", 909: "Cumbria",
-    916: "Gloucestershire", 919: "Hertfordshire",
-    921: "Norfolk", 925: "Lincolnshire",
+    908: "Cornwall",
+    909: "Cumbria",
+    916: "Gloucestershire",
+    919: "Hertfordshire",
+    921: "Norfolk",
+    925: "Lincolnshire",
    926: "Northamptonshire",  # Historic (split into 823/824 in 2021)
-    928: "Northumberland", 929: "Oxfordshire",
-    931: "Somerset", 933: "Suffolk", 935: "Surrey",
-    936: "Warwickshire", 937: "West Sussex",
+    928: "Northumberland",
+    929: "Oxfordshire",
+    931: "Somerset",
+    933: "Suffolk",
+    935: "Surrey",
+    936: "Warwickshire",
+    937: "West Sussex",
    # New authorities (2023 reorganization)
-    938: "Westmorland and Furness", 940: "Cumberland",
+    938: "Westmorland and Furness",
+    940: "Cumberland",
    941: "North Yorkshire",  # New unitary
    942: "Somerset",  # New unitary (replaced 931)
    943: "Buckinghamshire",  # New unitary (2020, replacing 825 in some datasets)
 }
-
@@ -465,12 +465,12 @@ function renderFeaturedSchools(schools) {
                <div class="school-address">${escapeHtml(school.address || "")}</div>
                <div class="school-stats">
                    <div class="stat">
-                        <div class="stat-value">Primary</div>
-                        <div class="stat-label">Phase</div>
+                        <div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
+                        <div class="stat-label">RWM Expected</div>
                    </div>
                    <div class="stat">
-                        <div class="stat-value">KS2</div>
-                        <div class="stat-label">Data</div>
+                        <div class="stat-value">${school.total_pupils || "-"}</div>
+                        <div class="stat-label">Pupils</div>
                    </div>
                </div>
            </div>
@@ -587,12 +587,12 @@ function renderSchools(schools) {
                <div class="school-address">${escapeHtml(school.address || "")}</div>
                <div class="school-stats">
                    <div class="stat">
-                        <div class="stat-value">Primary</div>
-                        <div class="stat-label">Phase</div>
+                        <div class="stat-value">${formatMetricValue(school.rwm_expected_pct, "rwm_expected_pct")}</div>
+                        <div class="stat-label">RWM Expected</div>
                    </div>
                    <div class="stat">
-                        <div class="stat-value">KS2</div>
-                        <div class="stat-label">Data</div>
+                        <div class="stat-value">${school.total_pupils || "-"}</div>
+                        <div class="stat-label">Pupils</div>
                    </div>
                </div>
            </div>