All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 58s
546 lines
25 KiB
Python
546 lines
25 KiB
Python
"""
|
|
SchoolCompare.co.uk API
|
|
Serves primary school (KS2) performance data for comparing schools.
|
|
Uses real data from UK Government Compare School Performance downloads.
|
|
"""
|
|
|
|
from fastapi import FastAPI, HTTPException, Query
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.responses import FileResponse
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
import pandas as pd
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import os
|
|
import re
|
|
|
|
# No longer filtering by specific LA codes - load all available schools
|
|
|
|
app = FastAPI(
|
|
title="SchoolCompare API",
|
|
description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk",
|
|
version="1.0.0"
|
|
)
|
|
|
|
# CORS middleware for development
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Data directory
|
|
DATA_DIR = Path(__file__).parent.parent / "data"
|
|
FRONTEND_DIR = Path(__file__).parent.parent / "frontend"
|
|
|
|
# Cache for loaded data - cleared on reload (updated for 2016-2017 data)
|
|
_data_cache: Optional[pd.DataFrame] = None
|
|
|
|
|
|
def convert_to_native(value):
|
|
"""Convert numpy types to native Python types for JSON serialization."""
|
|
if pd.isna(value):
|
|
return None
|
|
if isinstance(value, (np.integer,)):
|
|
return int(value)
|
|
if isinstance(value, (np.floating,)):
|
|
if np.isnan(value) or np.isinf(value):
|
|
return None
|
|
return float(value)
|
|
if isinstance(value, np.ndarray):
|
|
return value.tolist()
|
|
if value == "SUPP" or value == "NE" or value == "NA" or value == "NP":
|
|
return None
|
|
return value
|
|
|
|
|
|
def clean_for_json(df: pd.DataFrame) -> list:
|
|
"""Convert DataFrame to list of dicts, replacing NaN/inf with None for JSON serialization."""
|
|
records = df.to_dict(orient="records")
|
|
cleaned = []
|
|
for record in records:
|
|
clean_record = {}
|
|
for key, value in record.items():
|
|
clean_record[key] = convert_to_native(value)
|
|
cleaned.append(clean_record)
|
|
return cleaned
|
|
|
|
|
|
def parse_numeric(value):
|
|
"""Parse a value to numeric, handling SUPP, NE, NA, %, etc."""
|
|
if pd.isna(value):
|
|
return None
|
|
if isinstance(value, (int, float)):
|
|
if np.isnan(value) or np.isinf(value):
|
|
return None
|
|
return value
|
|
if isinstance(value, str):
|
|
value = value.strip()
|
|
if value in ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]:
|
|
return None
|
|
# Remove % sign if present
|
|
if value.endswith('%'):
|
|
value = value[:-1]
|
|
try:
|
|
return float(value)
|
|
except ValueError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
|
"""Extract the end year from folder name like '2023-2024' -> 2024."""
|
|
match = re.search(r'(\d{4})-(\d{4})', folder_name)
|
|
if match:
|
|
return int(match.group(2))
|
|
return None
|
|
|
|
|
|
def load_school_data() -> pd.DataFrame:
|
|
"""Load and combine all school data from CSV files in year folders."""
|
|
global _data_cache
|
|
|
|
if _data_cache is not None:
|
|
return _data_cache
|
|
|
|
all_data = []
|
|
|
|
# Look for year folders in data directory
|
|
if DATA_DIR.exists():
|
|
for year_folder in DATA_DIR.iterdir():
|
|
if year_folder.is_dir() and re.match(r'\d{4}-\d{4}', year_folder.name):
|
|
year = extract_year_from_folder(year_folder.name)
|
|
if year is None:
|
|
continue
|
|
|
|
# Look for KS2 data file
|
|
ks2_file = year_folder / "england_ks2final.csv"
|
|
if ks2_file.exists():
|
|
try:
|
|
print(f"Loading data from {ks2_file}")
|
|
df = pd.read_csv(ks2_file, low_memory=False)
|
|
|
|
# Handle both string and integer columns
|
|
if 'LEA' in df.columns and df['LEA'].dtype == 'object':
|
|
df['LEA'] = pd.to_numeric(df['LEA'], errors='coerce')
|
|
if 'URN' in df.columns and df['URN'].dtype == 'object':
|
|
df['URN'] = pd.to_numeric(df['URN'], errors='coerce')
|
|
|
|
# Filter to schools only (RECTYPE == 1 means school level data)
|
|
if 'RECTYPE' in df.columns:
|
|
df = df[df['RECTYPE'] == 1]
|
|
|
|
# Add year and local authority name from LANAME column
|
|
df['year'] = year
|
|
if 'LANAME' in df.columns:
|
|
df['local_authority'] = df['LANAME']
|
|
elif 'LEA' in df.columns:
|
|
df['local_authority'] = df['LEA'].astype(str)
|
|
|
|
# Standardize column names for our API
|
|
df = df.rename(columns={
|
|
'URN': 'urn',
|
|
'SCHNAME': 'school_name',
|
|
'ADDRESS1': 'address1',
|
|
'ADDRESS2': 'address2',
|
|
'TOWN': 'town',
|
|
'PCODE': 'postcode',
|
|
'NFTYPE': 'school_type_code',
|
|
'RELDENOM': 'religious_denomination',
|
|
'AGERANGE': 'age_range',
|
|
'TOTPUPS': 'total_pupils',
|
|
'TELIG': 'eligible_pupils',
|
|
# Core KS2 metrics
|
|
'PTRWM_EXP': 'rwm_expected_pct',
|
|
'PTRWM_HIGH': 'rwm_high_pct',
|
|
'READPROG': 'reading_progress',
|
|
'WRITPROG': 'writing_progress',
|
|
'MATPROG': 'maths_progress',
|
|
'PTREAD_EXP': 'reading_expected_pct',
|
|
'PTWRITTA_EXP': 'writing_expected_pct',
|
|
'PTMAT_EXP': 'maths_expected_pct',
|
|
'READ_AVERAGE': 'reading_avg_score',
|
|
'MAT_AVERAGE': 'maths_avg_score',
|
|
'PTREAD_HIGH': 'reading_high_pct',
|
|
'PTWRITTA_HIGH': 'writing_high_pct',
|
|
'PTMAT_HIGH': 'maths_high_pct',
|
|
# GPS (Grammar, Punctuation & Spelling)
|
|
'PTGPS_EXP': 'gps_expected_pct',
|
|
'PTGPS_HIGH': 'gps_high_pct',
|
|
'GPS_AVERAGE': 'gps_avg_score',
|
|
# Science
|
|
'PTSCITA_EXP': 'science_expected_pct',
|
|
# School context
|
|
'PTFSM6CLA1A': 'disadvantaged_pct',
|
|
'PTEALGRP2': 'eal_pct',
|
|
'PSENELK': 'sen_support_pct',
|
|
'PSENELE': 'sen_ehcp_pct',
|
|
'PTMOBN': 'stability_pct',
|
|
# Gender breakdown
|
|
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
|
|
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
|
|
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
|
|
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
|
|
# Disadvantaged performance
|
|
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
|
|
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
|
|
'DIFFN_RWM_EXP': 'disadvantaged_gap',
|
|
# 3-year averages
|
|
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
|
|
'READ_AVERAGE_3YR': 'reading_avg_3yr',
|
|
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
|
|
})
|
|
|
|
# Create address field
|
|
def make_address(row):
|
|
parts = []
|
|
if pd.notna(row.get('address1')) and row.get('address1'):
|
|
parts.append(str(row['address1']))
|
|
if pd.notna(row.get('town')) and row.get('town'):
|
|
parts.append(str(row['town']))
|
|
if pd.notna(row.get('postcode')) and row.get('postcode'):
|
|
parts.append(str(row['postcode']))
|
|
return ', '.join(parts) if parts else ''
|
|
|
|
df['address'] = df.apply(make_address, axis=1)
|
|
|
|
# Map school type codes to names
|
|
school_type_map = {
|
|
'AC': 'Academy', 'ACC': 'Academy Converter', 'ACS': 'Academy Sponsor Led',
|
|
'CY': 'Community School', 'VA': 'Voluntary Aided', 'VC': 'Voluntary Controlled',
|
|
'FD': 'Foundation', 'F': 'Foundation', 'FS': 'Free School',
|
|
}
|
|
df['school_type'] = df['school_type_code'].map(school_type_map).fillna('Other')
|
|
|
|
# Parse numeric columns
|
|
numeric_cols = [
|
|
# Core metrics
|
|
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
|
|
'writing_progress', 'maths_progress', 'reading_expected_pct',
|
|
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
|
|
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
|
|
# GPS & Science
|
|
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
|
|
# School context
|
|
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
|
|
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
|
|
# Gender breakdown
|
|
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
|
|
'rwm_high_boys_pct', 'rwm_high_girls_pct',
|
|
# Disadvantaged performance
|
|
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
|
|
# 3-year averages
|
|
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
|
|
]
|
|
|
|
for col in numeric_cols:
|
|
if col in df.columns:
|
|
df[col] = df[col].apply(parse_numeric)
|
|
|
|
all_data.append(df)
|
|
print(f" Loaded {len(df)} schools for year {year}")
|
|
|
|
except Exception as e:
|
|
print(f"Error loading {ks2_file}: {e}")
|
|
|
|
if all_data:
|
|
_data_cache = pd.concat(all_data, ignore_index=True)
|
|
print(f"\nTotal records loaded: {len(_data_cache)}")
|
|
print(f"Unique schools: {_data_cache['urn'].nunique()}")
|
|
print(f"Years: {sorted(_data_cache['year'].unique())}")
|
|
else:
|
|
print("No data files found. Creating empty DataFrame.")
|
|
_data_cache = pd.DataFrame()
|
|
|
|
return _data_cache
|
|
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
"""Serve the frontend."""
|
|
return FileResponse(FRONTEND_DIR / "index.html")
|
|
|
|
|
|
@app.get("/api/schools")
|
|
async def get_schools(
|
|
search: Optional[str] = Query(None, description="Search by school name"),
|
|
local_authority: Optional[str] = Query(None, description="Filter by local authority (Wandsworth or Merton)"),
|
|
school_type: Optional[str] = Query(None, description="Filter by school type"),
|
|
):
|
|
"""Get list of unique primary schools in Wandsworth and Merton."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
return {"schools": []}
|
|
|
|
# Get unique schools (latest year data for each)
|
|
latest_year = df.groupby('urn')['year'].max().reset_index()
|
|
df_latest = df.merge(latest_year, on=['urn', 'year'])
|
|
|
|
school_cols = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
|
|
available_cols = [c for c in school_cols if c in df_latest.columns]
|
|
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
|
|
|
|
# Apply filters
|
|
if search:
|
|
search_lower = search.lower()
|
|
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
|
if "address" in schools_df.columns:
|
|
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
|
|
schools_df = schools_df[mask]
|
|
|
|
if local_authority:
|
|
schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
|
|
|
|
if school_type:
|
|
schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
|
|
|
|
return {"schools": clean_for_json(schools_df)}
|
|
|
|
|
|
@app.get("/api/schools/{urn}")
|
|
async def get_school_details(urn: int):
|
|
"""Get detailed KS2 data for a specific primary school across all years."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
raise HTTPException(status_code=404, detail="No data available")
|
|
|
|
school_data = df[df["urn"] == urn]
|
|
|
|
if school_data.empty:
|
|
raise HTTPException(status_code=404, detail="School not found")
|
|
|
|
# Sort by year
|
|
school_data = school_data.sort_values("year")
|
|
|
|
# Get latest info for the school
|
|
latest = school_data.iloc[-1]
|
|
|
|
return {
|
|
"school_info": {
|
|
"urn": urn,
|
|
"school_name": latest.get("school_name", ""),
|
|
"local_authority": latest.get("local_authority", ""),
|
|
"school_type": latest.get("school_type", ""),
|
|
"address": latest.get("address", ""),
|
|
"phase": "Primary",
|
|
},
|
|
"yearly_data": clean_for_json(school_data)
|
|
}
|
|
|
|
|
|
@app.get("/api/compare")
|
|
async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")):
|
|
"""Compare multiple primary schools side by side."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
raise HTTPException(status_code=404, detail="No data available")
|
|
|
|
try:
|
|
urn_list = [int(u.strip()) for u in urns.split(",")]
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid URN format")
|
|
|
|
comparison_data = df[df["urn"].isin(urn_list)]
|
|
|
|
if comparison_data.empty:
|
|
raise HTTPException(status_code=404, detail="No schools found")
|
|
|
|
result = {}
|
|
for urn in urn_list:
|
|
school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year")
|
|
if not school_data.empty:
|
|
latest = school_data.iloc[-1]
|
|
result[str(urn)] = {
|
|
"school_info": {
|
|
"urn": urn,
|
|
"school_name": latest.get("school_name", ""),
|
|
"local_authority": latest.get("local_authority", ""),
|
|
"address": latest.get("address", ""),
|
|
},
|
|
"yearly_data": clean_for_json(school_data)
|
|
}
|
|
|
|
return {"comparison": result}
|
|
|
|
|
|
@app.get("/api/filters")
|
|
async def get_filter_options():
|
|
"""Get available filter options (local authorities, school types, years)."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
return {
|
|
"local_authorities": [],
|
|
"school_types": [],
|
|
"years": [],
|
|
}
|
|
|
|
return {
|
|
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
|
|
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
|
|
"years": sorted(df["year"].dropna().unique().tolist()),
|
|
}
|
|
|
|
|
|
@app.get("/api/metrics")
|
|
async def get_available_metrics():
|
|
"""Get list of available KS2 performance metrics for primary schools."""
|
|
df = load_school_data()
|
|
|
|
# Define KS2 metric metadata organized by category
|
|
metric_info = {
|
|
# Expected Standard
|
|
"rwm_expected_pct": {"name": "RWM Combined %", "description": "% meeting expected standard in reading, writing and maths", "type": "percentage", "category": "expected"},
|
|
"reading_expected_pct": {"name": "Reading Expected %", "description": "% meeting expected standard in reading", "type": "percentage", "category": "expected"},
|
|
"writing_expected_pct": {"name": "Writing Expected %", "description": "% meeting expected standard in writing", "type": "percentage", "category": "expected"},
|
|
"maths_expected_pct": {"name": "Maths Expected %", "description": "% meeting expected standard in maths", "type": "percentage", "category": "expected"},
|
|
"gps_expected_pct": {"name": "GPS Expected %", "description": "% meeting expected standard in grammar, punctuation & spelling", "type": "percentage", "category": "expected"},
|
|
"science_expected_pct": {"name": "Science Expected %", "description": "% meeting expected standard in science", "type": "percentage", "category": "expected"},
|
|
# Higher Standard
|
|
"rwm_high_pct": {"name": "RWM Combined Higher %", "description": "% achieving higher standard in RWM combined", "type": "percentage", "category": "higher"},
|
|
"reading_high_pct": {"name": "Reading Higher %", "description": "% achieving higher standard in reading", "type": "percentage", "category": "higher"},
|
|
"writing_high_pct": {"name": "Writing Higher %", "description": "% achieving greater depth in writing", "type": "percentage", "category": "higher"},
|
|
"maths_high_pct": {"name": "Maths Higher %", "description": "% achieving higher standard in maths", "type": "percentage", "category": "higher"},
|
|
"gps_high_pct": {"name": "GPS Higher %", "description": "% achieving higher standard in GPS", "type": "percentage", "category": "higher"},
|
|
# Progress Scores
|
|
"reading_progress": {"name": "Reading Progress", "description": "Progress in reading from KS1 to KS2", "type": "score", "category": "progress"},
|
|
"writing_progress": {"name": "Writing Progress", "description": "Progress in writing from KS1 to KS2", "type": "score", "category": "progress"},
|
|
"maths_progress": {"name": "Maths Progress", "description": "Progress in maths from KS1 to KS2", "type": "score", "category": "progress"},
|
|
# Average Scores
|
|
"reading_avg_score": {"name": "Reading Avg Score", "description": "Average scaled score in reading", "type": "score", "category": "average"},
|
|
"maths_avg_score": {"name": "Maths Avg Score", "description": "Average scaled score in maths", "type": "score", "category": "average"},
|
|
"gps_avg_score": {"name": "GPS Avg Score", "description": "Average scaled score in GPS", "type": "score", "category": "average"},
|
|
# Gender Performance
|
|
"rwm_expected_boys_pct": {"name": "RWM Expected % (Boys)", "description": "% of boys meeting expected standard", "type": "percentage", "category": "gender"},
|
|
"rwm_expected_girls_pct": {"name": "RWM Expected % (Girls)", "description": "% of girls meeting expected standard", "type": "percentage", "category": "gender"},
|
|
"rwm_high_boys_pct": {"name": "RWM Higher % (Boys)", "description": "% of boys at higher standard", "type": "percentage", "category": "gender"},
|
|
"rwm_high_girls_pct": {"name": "RWM Higher % (Girls)", "description": "% of girls at higher standard", "type": "percentage", "category": "gender"},
|
|
# Disadvantaged Performance
|
|
"rwm_expected_disadvantaged_pct": {"name": "RWM Expected % (Disadvantaged)", "description": "% of disadvantaged pupils meeting expected", "type": "percentage", "category": "equity"},
|
|
"rwm_expected_non_disadvantaged_pct": {"name": "RWM Expected % (Non-Disadvantaged)", "description": "% of non-disadvantaged pupils meeting expected", "type": "percentage", "category": "equity"},
|
|
"disadvantaged_gap": {"name": "Disadvantaged Gap", "description": "Gap between disadvantaged and national non-disadvantaged", "type": "score", "category": "equity"},
|
|
# School Context
|
|
"disadvantaged_pct": {"name": "% Disadvantaged Pupils", "description": "% of pupils eligible for free school meals or looked after", "type": "percentage", "category": "context"},
|
|
"eal_pct": {"name": "% EAL Pupils", "description": "% of pupils with English as additional language", "type": "percentage", "category": "context"},
|
|
"sen_support_pct": {"name": "% SEN Support", "description": "% of pupils with SEN support", "type": "percentage", "category": "context"},
|
|
"stability_pct": {"name": "% Pupil Stability", "description": "% of non-mobile pupils (stayed at school)", "type": "percentage", "category": "context"},
|
|
# 3-Year Averages
|
|
"rwm_expected_3yr_pct": {"name": "RWM Expected % (3-Year Avg)", "description": "3-year average % meeting expected", "type": "percentage", "category": "trends"},
|
|
"reading_avg_3yr": {"name": "Reading Score (3-Year Avg)", "description": "3-year average reading score", "type": "score", "category": "trends"},
|
|
"maths_avg_3yr": {"name": "Maths Score (3-Year Avg)", "description": "3-year average maths score", "type": "score", "category": "trends"},
|
|
}
|
|
|
|
available = []
|
|
for col, info in metric_info.items():
|
|
if df.empty or col in df.columns:
|
|
available.append({"key": col, **info})
|
|
|
|
return {"metrics": available}
|
|
|
|
|
|
@app.get("/api/rankings")
|
|
async def get_rankings(
|
|
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
|
|
year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
|
|
limit: int = Query(20, description="Number of schools to return"),
|
|
):
|
|
"""Get primary school rankings by a specific KS2 metric."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
return {"metric": metric, "year": None, "rankings": []}
|
|
|
|
if metric not in df.columns:
|
|
raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available")
|
|
|
|
# Filter by year
|
|
if year:
|
|
df = df[df["year"] == year]
|
|
else:
|
|
# Use most recent year
|
|
max_year = df["year"].max()
|
|
df = df[df["year"] == max_year]
|
|
|
|
# Sort and rank (exclude rows with no data for this metric)
|
|
df = df.dropna(subset=[metric])
|
|
|
|
# For progress scores, higher is better. For percentages, higher is also better.
|
|
df = df.sort_values(metric, ascending=False).head(limit)
|
|
|
|
# Return only relevant fields for rankings
|
|
ranking_cols = [
|
|
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
|
|
# Core expected
|
|
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
|
|
"gps_expected_pct", "science_expected_pct",
|
|
# Core higher
|
|
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
|
|
# Progress & averages
|
|
"reading_progress", "writing_progress", "maths_progress",
|
|
"reading_avg_score", "maths_avg_score", "gps_avg_score",
|
|
# Gender
|
|
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
|
|
# Equity
|
|
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
|
|
# Context
|
|
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
|
|
# 3-year
|
|
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
|
|
]
|
|
available_cols = [c for c in ranking_cols if c in df.columns]
|
|
df = df[available_cols]
|
|
|
|
return {
|
|
"metric": metric,
|
|
"year": int(df["year"].iloc[0]) if not df.empty else None,
|
|
"rankings": clean_for_json(df)
|
|
}
|
|
|
|
|
|
@app.get("/api/data-info")
|
|
async def get_data_info():
|
|
"""Get information about loaded data."""
|
|
df = load_school_data()
|
|
|
|
if df.empty:
|
|
return {
|
|
"status": "no_data",
|
|
"message": "No data files found in data folder. Please download KS2 data from the government website.",
|
|
"data_folder": str(DATA_DIR),
|
|
}
|
|
|
|
years = [int(y) for y in sorted(df["year"].unique())]
|
|
schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
|
|
la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
|
|
|
|
return {
|
|
"status": "loaded",
|
|
"total_records": int(len(df)),
|
|
"unique_schools": int(df["urn"].nunique()),
|
|
"years_available": years,
|
|
"schools_per_year": schools_per_year,
|
|
"local_authorities": la_counts,
|
|
}
|
|
|
|
|
|
# Mount static files
|
|
@app.on_event("startup")
|
|
async def startup():
|
|
"""Setup static file serving and load data on startup."""
|
|
if FRONTEND_DIR.exists():
|
|
app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")
|
|
|
|
# Pre-load data
|
|
load_school_data()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|