Files
school_compare/backend/app.py

312 lines
9.9 KiB
Python
Raw Normal View History

2026-01-06 13:52:00 +00:00
"""
SchoolCompare.co.uk API
Serves primary school (KS2) performance data for comparing schools.
2026-01-06 13:52:00 +00:00
Uses real data from UK Government Compare School Performance downloads.
"""
2026-01-06 16:30:32 +00:00
from contextlib import asynccontextmanager
2026-01-06 13:52:00 +00:00
from fastapi import FastAPI, HTTPException, Query
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional
2026-01-06 16:30:32 +00:00
from .config import settings
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
from .data_loader import load_school_data, clear_cache
from .utils import clean_for_json
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan - startup and shutdown events."""
# Startup: pre-load data
print("Starting up: Loading school data...")
load_school_data()
print("Data loaded successfully.")
yield # Application runs here
# Shutdown: cleanup if needed
print("Shutting down...")
2026-01-06 13:52:00 +00:00
app = FastAPI(
title="SchoolCompare API",
description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk",
2026-01-06 16:30:32 +00:00
version="2.0.0",
lifespan=lifespan,
2026-01-06 13:52:00 +00:00
)
2026-01-06 16:30:32 +00:00
# CORS middleware with configurable origins
2026-01-06 13:52:00 +00:00
app.add_middleware(
CORSMiddleware,
2026-01-06 16:30:32 +00:00
allow_origins=settings.allowed_origins,
2026-01-06 13:52:00 +00:00
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
"""Serve the frontend."""
2026-01-06 16:30:32 +00:00
return FileResponse(settings.frontend_dir / "index.html")
2026-01-06 13:52:00 +00:00
@app.get("/api/schools")
async def get_schools(
search: Optional[str] = Query(None, description="Search by school name"),
2026-01-06 16:30:32 +00:00
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
2026-01-06 13:52:00 +00:00
school_type: Optional[str] = Query(None, description="Filter by school type"),
2026-01-06 16:30:32 +00:00
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
2026-01-06 13:52:00 +00:00
):
2026-01-06 16:30:32 +00:00
"""
Get list of unique primary schools with pagination.
Returns paginated results with total count for efficient loading.
"""
2026-01-06 13:52:00 +00:00
df = load_school_data()
if df.empty:
2026-01-06 16:30:32 +00:00
return {"schools": [], "total": 0, "page": page, "page_size": 0}
# Use configured default if not specified
if page_size is None:
page_size = settings.default_page_size
2026-01-06 13:52:00 +00:00
# Get unique schools (latest year data for each)
latest_year = df.groupby('urn')['year'].max().reset_index()
df_latest = df.merge(latest_year, on=['urn', 'year'])
2026-01-06 16:30:32 +00:00
available_cols = [c for c in SCHOOL_COLUMNS if c in df_latest.columns]
2026-01-06 13:52:00 +00:00
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
# Apply filters
if search:
search_lower = search.lower()
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
if "address" in schools_df.columns:
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
schools_df = schools_df[mask]
if local_authority:
schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
if school_type:
schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
2026-01-06 16:30:32 +00:00
# Pagination
total = len(schools_df)
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
schools_df = schools_df.iloc[start_idx:end_idx]
return {
"schools": clean_for_json(schools_df),
"total": total,
"page": page,
"page_size": page_size,
"total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
}
2026-01-06 13:52:00 +00:00
@app.get("/api/schools/{urn}")
async def get_school_details(urn: int):
"""Get detailed KS2 data for a specific primary school across all years."""
df = load_school_data()
if df.empty:
raise HTTPException(status_code=404, detail="No data available")
school_data = df[df["urn"] == urn]
if school_data.empty:
raise HTTPException(status_code=404, detail="School not found")
# Sort by year
school_data = school_data.sort_values("year")
# Get latest info for the school
latest = school_data.iloc[-1]
return {
"school_info": {
"urn": urn,
"school_name": latest.get("school_name", ""),
"local_authority": latest.get("local_authority", ""),
"school_type": latest.get("school_type", ""),
"address": latest.get("address", ""),
"phase": "Primary",
},
"yearly_data": clean_for_json(school_data)
}
@app.get("/api/compare")
async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")):
"""Compare multiple primary schools side by side."""
df = load_school_data()
if df.empty:
raise HTTPException(status_code=404, detail="No data available")
try:
urn_list = [int(u.strip()) for u in urns.split(",")]
except ValueError:
raise HTTPException(status_code=400, detail="Invalid URN format")
comparison_data = df[df["urn"].isin(urn_list)]
if comparison_data.empty:
raise HTTPException(status_code=404, detail="No schools found")
result = {}
for urn in urn_list:
school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year")
if not school_data.empty:
latest = school_data.iloc[-1]
result[str(urn)] = {
"school_info": {
"urn": urn,
"school_name": latest.get("school_name", ""),
"local_authority": latest.get("local_authority", ""),
"address": latest.get("address", ""),
},
"yearly_data": clean_for_json(school_data)
}
return {"comparison": result}
@app.get("/api/filters")
async def get_filter_options():
"""Get available filter options (local authorities, school types, years)."""
df = load_school_data()
if df.empty:
return {
2026-01-06 16:17:00 +00:00
"local_authorities": [],
2026-01-06 13:52:00 +00:00
"school_types": [],
"years": [],
}
return {
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
"years": sorted(df["year"].dropna().unique().tolist()),
}
@app.get("/api/metrics")
async def get_available_metrics():
2026-01-06 16:30:32 +00:00
"""
Get list of available KS2 performance metrics for primary schools.
2026-01-06 13:52:00 +00:00
2026-01-06 16:30:32 +00:00
This is the single source of truth for metric definitions.
Frontend should consume this to avoid duplication.
"""
df = load_school_data()
2026-01-06 13:52:00 +00:00
available = []
2026-01-06 16:30:32 +00:00
for key, info in METRIC_DEFINITIONS.items():
if df.empty or key in df.columns:
available.append({"key": key, **info})
2026-01-06 13:52:00 +00:00
return {"metrics": available}
@app.get("/api/rankings")
async def get_rankings(
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
2026-01-06 16:30:32 +00:00
limit: int = Query(20, ge=1, le=100, description="Number of schools to return"),
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
2026-01-06 13:52:00 +00:00
):
"""Get primary school rankings by a specific KS2 metric."""
df = load_school_data()
if df.empty:
2026-01-06 16:30:32 +00:00
return {"metric": metric, "year": None, "rankings": [], "total": 0}
2026-01-06 13:52:00 +00:00
if metric not in df.columns:
raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available")
# Filter by year
if year:
df = df[df["year"] == year]
else:
# Use most recent year
max_year = df["year"].max()
df = df[df["year"] == max_year]
2026-01-06 16:30:32 +00:00
# Filter by local authority if specified
if local_authority:
df = df[df["local_authority"].str.lower() == local_authority.lower()]
2026-01-06 13:52:00 +00:00
# Sort and rank (exclude rows with no data for this metric)
df = df.dropna(subset=[metric])
2026-01-06 16:30:32 +00:00
total = len(df)
2026-01-06 13:52:00 +00:00
# For progress scores, higher is better. For percentages, higher is also better.
df = df.sort_values(metric, ascending=False).head(limit)
# Return only relevant fields for rankings
2026-01-06 16:30:32 +00:00
available_cols = [c for c in RANKING_COLUMNS if c in df.columns]
2026-01-06 13:52:00 +00:00
df = df[available_cols]
return {
"metric": metric,
"year": int(df["year"].iloc[0]) if not df.empty else None,
2026-01-06 16:30:32 +00:00
"rankings": clean_for_json(df),
"total": total,
2026-01-06 13:52:00 +00:00
}
@app.get("/api/data-info")
async def get_data_info():
"""Get information about loaded data."""
df = load_school_data()
if df.empty:
return {
"status": "no_data",
"message": "No data files found in data folder. Please download KS2 data from the government website.",
2026-01-06 16:30:32 +00:00
"data_folder": str(settings.data_dir),
2026-01-06 13:52:00 +00:00
}
years = [int(y) for y in sorted(df["year"].unique())]
schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
return {
"status": "loaded",
"total_records": int(len(df)),
"unique_schools": int(df["urn"].nunique()),
"years_available": years,
"schools_per_year": schools_per_year,
"local_authorities": la_counts,
}
2026-01-06 16:30:32 +00:00
@app.post("/api/admin/reload")
async def reload_data():
"""Admin endpoint to force data reload (useful after data updates)."""
clear_cache()
2026-01-06 13:52:00 +00:00
load_school_data()
2026-01-06 16:30:32 +00:00
return {"status": "reloaded"}
# Mount static files after all routes are defined
@app.on_event("startup")
async def mount_static():
"""Mount static file serving."""
if settings.frontend_dir.exists():
app.mount("/static", StaticFiles(directory=settings.frontend_dir), name="static")
2026-01-06 13:52:00 +00:00
if __name__ == "__main__":
import uvicorn
2026-01-06 16:30:32 +00:00
uvicorn.run(app, host=settings.host, port=settings.port)