""" SchoolCompare.co.uk API Serves primary school (KS2) performance data for comparing schools. Uses real data from UK Government Compare School Performance downloads. """ from contextlib import asynccontextmanager import pandas as pd from fastapi import FastAPI, HTTPException, Query from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from fastapi.middleware.cors import CORSMiddleware from typing import Optional from .config import settings from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS from .data_loader import load_school_data, clear_cache, geocode_single_postcode, geocode_postcodes_bulk, haversine_distance from .utils import clean_for_json @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan - startup and shutdown events.""" # Startup: pre-load data print("Starting up: Loading school data...") load_school_data() print("Data loaded successfully.") yield # Application runs here # Shutdown: cleanup if needed print("Shutting down...") app = FastAPI( title="SchoolCompare API", description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk", version="2.0.0", lifespan=lifespan, ) # CORS middleware with configurable origins app.add_middleware( CORSMiddleware, allow_origins=settings.allowed_origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/") async def root(): """Serve the frontend.""" return FileResponse(settings.frontend_dir / "index.html") @app.get("/compare") async def serve_compare(): """Serve the frontend for /compare route (SPA routing).""" return FileResponse(settings.frontend_dir / "index.html") @app.get("/rankings") async def serve_rankings(): """Serve the frontend for /rankings route (SPA routing).""" return FileResponse(settings.frontend_dir / "index.html") @app.get("/api/schools") async def get_schools( search: Optional[str] = Query(None, description="Search by school name"), local_authority: Optional[str] = Query(None, description="Filter by local authority"), school_type: Optional[str] = Query(None, description="Filter by school type"), postcode: Optional[str] = Query(None, description="Search near postcode"), radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"), page: int = Query(1, ge=1, description="Page number"), page_size: int = Query(None, ge=1, le=100, description="Results per page"), ): """ Get list of unique primary schools with pagination. Returns paginated results with total count for efficient loading. Supports location-based search using postcode. """ df = load_school_data() if df.empty: return {"schools": [], "total": 0, "page": page, "page_size": 0} # Use configured default if not specified if page_size is None: page_size = settings.default_page_size # Get unique schools (latest year data for each) latest_year = df.groupby('urn')['year'].max().reset_index() df_latest = df.merge(latest_year, on=['urn', 'year']) # Include lat/long in columns for location search location_cols = ['latitude', 'longitude'] available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns] schools_df = df_latest[available_cols].drop_duplicates(subset=['urn']) # Location-based search search_coords = None if postcode: coords = geocode_single_postcode(postcode) if coords: search_coords = coords schools_df = schools_df.copy() # Geocode school postcodes on-demand if not already cached if 'postcode' in schools_df.columns: unique_postcodes = schools_df['postcode'].dropna().unique().tolist() geocoded = geocode_postcodes_bulk(unique_postcodes) # Add lat/long from geocoded data schools_df['latitude'] = schools_df['postcode'].apply( lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None ) schools_df['longitude'] = schools_df['postcode'].apply( lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None ) # Filter by distance def calc_distance(row): if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')): return float('inf') return haversine_distance( search_coords[0], search_coords[1], row['latitude'], row['longitude'] ) schools_df['distance'] = schools_df.apply(calc_distance, axis=1) schools_df = schools_df[schools_df['distance'] <= radius] schools_df = schools_df.sort_values('distance') # Apply filters if search: search_lower = search.lower() mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False) if "address" in schools_df.columns: mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False) schools_df = schools_df[mask] if local_authority: schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()] if school_type: schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()] # Pagination total = len(schools_df) start_idx = (page - 1) * page_size end_idx = start_idx + page_size schools_df = schools_df.iloc[start_idx:end_idx] # Remove internal columns before sending output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']] if 'distance' in schools_df.columns: output_cols.append('distance') return { "schools": clean_for_json(schools_df[output_cols]), "total": total, "page": page, "page_size": page_size, "total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0, "search_location": {"postcode": postcode, "radius": radius} if search_coords else None, } @app.get("/api/schools/{urn}") async def get_school_details(urn: int): """Get detailed KS2 data for a specific primary school across all years.""" df = load_school_data() if df.empty: raise HTTPException(status_code=404, detail="No data available") school_data = df[df["urn"] == urn] if school_data.empty: raise HTTPException(status_code=404, detail="School not found") # Sort by year school_data = school_data.sort_values("year") # Get latest info for the school latest = school_data.iloc[-1] return { "school_info": { "urn": urn, "school_name": latest.get("school_name", ""), "local_authority": latest.get("local_authority", ""), "school_type": latest.get("school_type", ""), "address": latest.get("address", ""), "phase": "Primary", }, "yearly_data": clean_for_json(school_data) } @app.get("/api/compare") async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")): """Compare multiple primary schools side by side.""" df = load_school_data() if df.empty: raise HTTPException(status_code=404, detail="No data available") try: urn_list = [int(u.strip()) for u in urns.split(",")] except ValueError: raise HTTPException(status_code=400, detail="Invalid URN format") comparison_data = df[df["urn"].isin(urn_list)] if comparison_data.empty: raise HTTPException(status_code=404, detail="No schools found") result = {} for urn in urn_list: school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year") if not school_data.empty: latest = school_data.iloc[-1] result[str(urn)] = { "school_info": { "urn": urn, "school_name": latest.get("school_name", ""), "local_authority": latest.get("local_authority", ""), "address": latest.get("address", ""), }, "yearly_data": clean_for_json(school_data) } return {"comparison": result} @app.get("/api/filters") async def get_filter_options(): """Get available filter options (local authorities, school types, years).""" df = load_school_data() if df.empty: return { "local_authorities": [], "school_types": [], "years": [], } return { "local_authorities": sorted(df["local_authority"].dropna().unique().tolist()), "school_types": sorted(df["school_type"].dropna().unique().tolist()), "years": sorted(df["year"].dropna().unique().tolist()), } @app.get("/api/metrics") async def get_available_metrics(): """ Get list of available KS2 performance metrics for primary schools. This is the single source of truth for metric definitions. Frontend should consume this to avoid duplication. """ df = load_school_data() available = [] for key, info in METRIC_DEFINITIONS.items(): if df.empty or key in df.columns: available.append({"key": key, **info}) return {"metrics": available} @app.get("/api/rankings") async def get_rankings( metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"), year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"), limit: int = Query(20, ge=1, le=100, description="Number of schools to return"), local_authority: Optional[str] = Query(None, description="Filter by local authority"), ): """Get primary school rankings by a specific KS2 metric.""" df = load_school_data() if df.empty: return {"metric": metric, "year": None, "rankings": [], "total": 0} if metric not in df.columns: raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available") # Filter by year if year: df = df[df["year"] == year] else: # Use most recent year max_year = df["year"].max() df = df[df["year"] == max_year] # Filter by local authority if specified if local_authority: df = df[df["local_authority"].str.lower() == local_authority.lower()] # Sort and rank (exclude rows with no data for this metric) df = df.dropna(subset=[metric]) total = len(df) # For progress scores, higher is better. For percentages, higher is also better. df = df.sort_values(metric, ascending=False).head(limit) # Return only relevant fields for rankings available_cols = [c for c in RANKING_COLUMNS if c in df.columns] df = df[available_cols] return { "metric": metric, "year": int(df["year"].iloc[0]) if not df.empty else None, "rankings": clean_for_json(df), "total": total, } @app.get("/api/data-info") async def get_data_info(): """Get information about loaded data.""" df = load_school_data() if df.empty: return { "status": "no_data", "message": "No data files found in data folder. Please download KS2 data from the government website.", "data_folder": str(settings.data_dir), } years = [int(y) for y in sorted(df["year"].unique())] schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()} la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()} return { "status": "loaded", "total_records": int(len(df)), "unique_schools": int(df["urn"].nunique()), "years_available": years, "schools_per_year": schools_per_year, "local_authorities": la_counts, } @app.post("/api/admin/reload") async def reload_data(): """Admin endpoint to force data reload (useful after data updates).""" clear_cache() load_school_data() return {"status": "reloaded"} # Mount static files directly (must be after all routes to avoid catching API calls) if settings.frontend_dir.exists(): app.mount("/static", StaticFiles(directory=settings.frontend_dir), name="static") if __name__ == "__main__": import uvicorn uvicorn.run(app, host=settings.host, port=settings.port)