Compare commits
61 Commits
52fbade30c
...
add-contac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75677f4252 | ||
|
|
9b6c37cda3 | ||
|
|
f2eec08bd4 | ||
|
|
f7b9a4d28e | ||
|
|
c23e12fc12 | ||
|
|
a8fe4477f1 | ||
|
|
1a9341eaf4 | ||
|
|
708fbe83a0 | ||
|
|
8e4802df93 | ||
|
|
a18ec04227 | ||
|
|
9cd36a0b15 | ||
|
|
1f6b2dd773 | ||
|
|
6597ee40fb | ||
|
|
bb58d607c2 | ||
|
|
e1383b3432 | ||
|
|
3c1e7b4b27 | ||
|
|
597a841d4d | ||
|
|
ab45f66431 | ||
|
|
c63e0e2682 | ||
|
|
79cf16d6b3 | ||
|
|
e3fc031ecf | ||
|
|
058a741b10 | ||
|
|
ea3f65249e | ||
|
|
b0e2a42acc | ||
|
|
1e6019eac3 | ||
|
|
3f118ef826 | ||
|
|
8458d638ec | ||
|
|
51836852e4 | ||
|
|
116be294a3 | ||
|
|
4b91eb403a | ||
|
|
6623418dbe | ||
|
|
3f8e1911aa | ||
|
|
b7943e1042 | ||
|
|
34f40c0c1c | ||
|
|
1d19c88e49 | ||
|
|
40348cb1bd | ||
|
|
73971a43f0 | ||
|
|
39d0de751b | ||
|
|
0aafdfa382 | ||
|
|
71b05769ae | ||
|
|
8f705221db | ||
|
|
da27643587 | ||
|
|
24ab4593f3 | ||
|
|
9af8d471a6 | ||
|
|
c350216150 | ||
|
|
63c1403f7d | ||
|
|
0d72f81b37 | ||
|
|
e20779ab9f | ||
|
|
0c425bd503 | ||
|
|
e822513e03 | ||
|
|
409853e82e | ||
|
|
e2b2ddfb66 | ||
|
|
1a8ec670b9 | ||
|
|
e601c499b6 | ||
|
|
7274fdd876 | ||
|
|
35e661d732 | ||
|
|
822feaf494 | ||
|
|
491302d409 | ||
|
|
35e62723bb | ||
|
|
4668e19c45 | ||
|
|
5efc4c6af0 |
@@ -26,9 +26,6 @@ Dockerfile
|
||||
docker-compose.yml
|
||||
.dockerignore
|
||||
|
||||
# Scripts (not needed in container)
|
||||
scripts/
|
||||
|
||||
# Documentation
|
||||
README.md
|
||||
*.md
|
||||
|
||||
45
.env
Normal file
45
.env
Normal file
@@ -0,0 +1,45 @@
|
||||
# SchoolCompare Environment Configuration
|
||||
# Copy this file to .env and update the values
|
||||
|
||||
# =============================================================================
|
||||
# DATABASE
|
||||
# =============================================================================
|
||||
# PostgreSQL connection string
|
||||
DATABASE_URL=postgresql://schoolcompare:CHANGE_THIS_PASSWORD@localhost:5432/schoolcompare
|
||||
|
||||
# =============================================================================
|
||||
# SERVER
|
||||
# =============================================================================
|
||||
# Set to False in production
|
||||
DEBUG=False
|
||||
|
||||
# Server host and port
|
||||
HOST=0.0.0.0
|
||||
PORT=80
|
||||
|
||||
# =============================================================================
|
||||
# CORS
|
||||
# =============================================================================
|
||||
# Comma-separated list of allowed origins
|
||||
# In production, only include your actual domain
|
||||
ALLOWED_ORIGINS=["https://schoolcompare.co.uk"]
|
||||
|
||||
# =============================================================================
|
||||
# SECURITY
|
||||
# =============================================================================
|
||||
# Admin API key for protected endpoints (e.g., /api/admin/reload)
|
||||
# Generate a secure random key: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||
ADMIN_API_KEY=CHANGE_THIS_TO_A_SECURE_RANDOM_KEY
|
||||
|
||||
# Rate limiting (requests per minute per IP)
|
||||
RATE_LIMIT_PER_MINUTE=60
|
||||
RATE_LIMIT_BURST=10
|
||||
|
||||
# Maximum request body size in bytes (default 1MB)
|
||||
MAX_REQUEST_SIZE=1048576
|
||||
|
||||
# =============================================================================
|
||||
# API
|
||||
# =============================================================================
|
||||
DEFAULT_PAGE_SIZE=50
|
||||
MAX_PAGE_SIZE=100
|
||||
45
.env.example
Normal file
45
.env.example
Normal file
@@ -0,0 +1,45 @@
|
||||
# SchoolCompare Environment Configuration
|
||||
# Copy this file to .env and update the values
|
||||
|
||||
# =============================================================================
|
||||
# DATABASE
|
||||
# =============================================================================
|
||||
# PostgreSQL connection string
|
||||
DATABASE_URL=postgresql://schoolcompare:CHANGE_THIS_PASSWORD@localhost:5432/schoolcompare
|
||||
|
||||
# =============================================================================
|
||||
# SERVER
|
||||
# =============================================================================
|
||||
# Set to False in production
|
||||
DEBUG=False
|
||||
|
||||
# Server host and port
|
||||
HOST=0.0.0.0
|
||||
PORT=80
|
||||
|
||||
# =============================================================================
|
||||
# CORS
|
||||
# =============================================================================
|
||||
# Comma-separated list of allowed origins
|
||||
# In production, only include your actual domain
|
||||
ALLOWED_ORIGINS=["https://schoolcompare.co.uk"]
|
||||
|
||||
# =============================================================================
|
||||
# SECURITY
|
||||
# =============================================================================
|
||||
# Admin API key for protected endpoints (e.g., /api/admin/reload)
|
||||
# Generate a secure random key: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||
ADMIN_API_KEY=CHANGE_THIS_TO_A_SECURE_RANDOM_KEY
|
||||
|
||||
# Rate limiting (requests per minute per IP)
|
||||
RATE_LIMIT_PER_MINUTE=60
|
||||
RATE_LIMIT_BURST=10
|
||||
|
||||
# Maximum request body size in bytes (default 1MB)
|
||||
MAX_REQUEST_SIZE=1048576
|
||||
|
||||
# =============================================================================
|
||||
# API
|
||||
# =============================================================================
|
||||
DEFAULT_PAGE_SIZE=50
|
||||
MAX_PAGE_SIZE=100
|
||||
@@ -50,3 +50,8 @@ jobs:
|
||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
|
||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max
|
||||
|
||||
- name: Trigger Portainer stack update
|
||||
if: gitea.event_name != 'pull_request'
|
||||
run: |
|
||||
curl -X POST -k "https://10.0.1.224:9443/api/stacks/webhooks/863fc57c-bf24-4c63-9001-bdf9912fba73"
|
||||
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
||||
venv
|
||||
backend/__pycache__
|
||||
|
||||
25
README.md
25
README.md
@@ -179,6 +179,31 @@ Data is sourced from the UK Government's [Compare School Performance](https://ww
|
||||
|
||||
**Important**: When using real data, please comply with the [terms of use](https://www.compare-school-performance.service.gov.uk/download-data) and data protection regulations.
|
||||
|
||||
## Scheduled Jobs
|
||||
|
||||
### Geocoding Schools (Cron Job)
|
||||
|
||||
School postcodes are geocoded by a scheduled job, not on-demand. This improves performance and reduces API calls.
|
||||
|
||||
**Setup the cron job** (runs weekly on Sunday at 2am):
|
||||
|
||||
```bash
|
||||
# Edit crontab
|
||||
crontab -e
|
||||
|
||||
# Add this line (adjust paths as needed):
|
||||
0 2 * * 0 cd /path/to/school_compare && /path/to/venv/bin/python scripts/geocode_schools.py >> /var/log/geocode_schools.log 2>&1
|
||||
```
|
||||
|
||||
**Manual run:**
|
||||
```bash
|
||||
# Geocode only schools missing coordinates
|
||||
python scripts/geocode_schools.py
|
||||
|
||||
# Force re-geocode all schools
|
||||
python scripts/geocode_schools.py --force
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT License - feel free to use this project for educational purposes.
|
||||
|
||||
487
backend/app.py
487
backend/app.py
@@ -4,40 +4,150 @@ Serves primary school (KS2) performance data for comparing schools.
|
||||
Uses real data from UK Government Compare School Performance downloads.
|
||||
"""
|
||||
|
||||
import re
|
||||
from contextlib import asynccontextmanager
|
||||
import pandas as pd
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from fastapi import FastAPI, HTTPException, Query, Request, Depends, Header
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, Response
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||
from slowapi.util import get_remote_address
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
from .config import settings
|
||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||
from .data_loader import (
|
||||
load_school_data, clear_cache, geocode_single_postcode,
|
||||
geocode_postcodes_bulk, haversine_distance, get_data_info as get_db_info
|
||||
clear_cache,
|
||||
load_school_data,
|
||||
geocode_single_postcode,
|
||||
)
|
||||
from .data_loader import get_data_info as get_db_info
|
||||
from .database import init_db
|
||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||
from .utils import clean_for_json
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SECURITY MIDDLEWARE & HELPERS
|
||||
# =============================================================================
|
||||
|
||||
# Rate limiter
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
|
||||
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
||||
"""Add security headers to all responses."""
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
|
||||
# Prevent clickjacking
|
||||
response.headers["X-Frame-Options"] = "DENY"
|
||||
|
||||
# Prevent MIME type sniffing
|
||||
response.headers["X-Content-Type-Options"] = "nosniff"
|
||||
|
||||
# XSS Protection (legacy browsers)
|
||||
response.headers["X-XSS-Protection"] = "1; mode=block"
|
||||
|
||||
# Referrer policy
|
||||
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
||||
|
||||
# Permissions policy (restrict browser features)
|
||||
response.headers["Permissions-Policy"] = (
|
||||
"geolocation=(), microphone=(), camera=(), payment=()"
|
||||
)
|
||||
|
||||
# Content Security Policy
|
||||
response.headers["Content-Security-Policy"] = (
|
||||
"default-src 'self'; "
|
||||
"script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://unpkg.com https://www.googletagmanager.com; "
|
||||
"style-src 'self' 'unsafe-inline' https://fonts.googleapis.com https://cdn.jsdelivr.net https://unpkg.com; "
|
||||
"font-src 'self' https://fonts.gstatic.com; "
|
||||
"img-src 'self' data: https://*.tile.openstreetmap.org https://unpkg.com https://www.google-analytics.com; "
|
||||
"connect-src 'self' https://cdn.jsdelivr.net https://*.tile.openstreetmap.org https://unpkg.com https://www.google-analytics.com https://analytics.google.com https://*.google-analytics.com; "
|
||||
"frame-ancestors 'none'; "
|
||||
"base-uri 'self'; "
|
||||
"form-action 'self' https://formsubmit.co;"
|
||||
)
|
||||
|
||||
# HSTS (only enable if using HTTPS in production)
|
||||
response.headers["Strict-Transport-Security"] = (
|
||||
"max-age=31536000; includeSubDomains"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class RequestSizeLimitMiddleware(BaseHTTPMiddleware):
|
||||
"""Limit request body size to prevent DoS attacks."""
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
if int(content_length) > settings.max_request_size:
|
||||
return Response(
|
||||
content="Request too large",
|
||||
status_code=413,
|
||||
)
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
def verify_admin_api_key(x_api_key: str = Header(None)) -> bool:
|
||||
"""Verify admin API key for protected endpoints."""
|
||||
if not x_api_key or x_api_key != settings.admin_api_key:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid or missing API key",
|
||||
headers={"WWW-Authenticate": "ApiKey"},
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
# Input validation helpers
|
||||
def sanitize_search_input(value: Optional[str], max_length: int = 100) -> Optional[str]:
|
||||
"""Sanitize search input to prevent injection attacks."""
|
||||
if value is None:
|
||||
return None
|
||||
# Strip whitespace and limit length
|
||||
value = value.strip()[:max_length]
|
||||
# Remove potentially dangerous characters (allow alphanumeric, spaces, common punctuation)
|
||||
value = re.sub(r"[^\w\s\-\',\.]", "", value)
|
||||
return value if value else None
|
||||
|
||||
|
||||
def validate_postcode(postcode: Optional[str]) -> Optional[str]:
|
||||
"""Validate and normalize UK postcode format."""
|
||||
if not postcode:
|
||||
return None
|
||||
postcode = postcode.strip().upper()
|
||||
# UK postcode pattern
|
||||
pattern = r"^[A-Z]{1,2}[0-9][A-Z0-9]?\s*[0-9][A-Z]{2}$"
|
||||
if not re.match(pattern, postcode):
|
||||
return None
|
||||
return postcode
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan - startup and shutdown events."""
|
||||
# Startup: initialize database and pre-load data
|
||||
print("Starting up: Initializing database...")
|
||||
init_db() # Ensure tables exist
|
||||
|
||||
|
||||
print("Loading school data from database...")
|
||||
df = load_school_data()
|
||||
if df.empty:
|
||||
print("Warning: No data in database. Run the migration script to import data.")
|
||||
else:
|
||||
print("Data loaded successfully.")
|
||||
|
||||
|
||||
yield # Application runs here
|
||||
|
||||
|
||||
# Shutdown: cleanup if needed
|
||||
print("Shutting down...")
|
||||
|
||||
@@ -47,15 +157,27 @@ app = FastAPI(
|
||||
description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk",
|
||||
version="2.0.0",
|
||||
lifespan=lifespan,
|
||||
# Disable docs in production for security
|
||||
docs_url="/docs" if settings.debug else None,
|
||||
redoc_url="/redoc" if settings.debug else None,
|
||||
openapi_url="/openapi.json" if settings.debug else None,
|
||||
)
|
||||
|
||||
# CORS middleware with configurable origins
|
||||
# Add rate limiter
|
||||
app.state.limiter = limiter
|
||||
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
||||
|
||||
# Security middleware (order matters - these run in reverse order)
|
||||
app.add_middleware(SecurityHeadersMiddleware)
|
||||
app.add_middleware(RequestSizeLimitMiddleware)
|
||||
|
||||
# CORS middleware - restricted for production
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.allowed_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
allow_credentials=False, # Don't allow credentials unless needed
|
||||
allow_methods=["GET", "POST"], # Only allow needed methods
|
||||
allow_headers=["Content-Type", "X-API-Key"], # Only allow needed headers
|
||||
)
|
||||
|
||||
|
||||
@@ -77,128 +199,187 @@ async def serve_rankings():
|
||||
return FileResponse(settings.frontend_dir / "index.html")
|
||||
|
||||
|
||||
@app.get("/api/config")
|
||||
async def get_config():
|
||||
"""Return public configuration for the frontend."""
|
||||
return {
|
||||
"ga_measurement_id": settings.ga_measurement_id
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/schools")
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_schools(
|
||||
search: Optional[str] = Query(None, description="Search by school name"),
|
||||
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type"),
|
||||
postcode: Optional[str] = Query(None, description="Search near postcode"),
|
||||
request: Request,
|
||||
search: Optional[str] = Query(None, description="Search by school name", max_length=100),
|
||||
local_authority: Optional[str] = Query(
|
||||
None, description="Filter by local authority", max_length=100
|
||||
),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type", max_length=100),
|
||||
postcode: Optional[str] = Query(None, description="Search near postcode", max_length=10),
|
||||
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
|
||||
page: int = Query(1, ge=1, description="Page number"),
|
||||
page: int = Query(1, ge=1, le=1000, description="Page number"),
|
||||
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
|
||||
):
|
||||
"""
|
||||
Get list of unique primary schools with pagination.
|
||||
|
||||
|
||||
Returns paginated results with total count for efficient loading.
|
||||
Supports location-based search using postcode.
|
||||
"""
|
||||
# Sanitize inputs
|
||||
search = sanitize_search_input(search)
|
||||
local_authority = sanitize_search_input(local_authority)
|
||||
school_type = sanitize_search_input(school_type)
|
||||
postcode = validate_postcode(postcode)
|
||||
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
return {"schools": [], "total": 0, "page": page, "page_size": 0}
|
||||
|
||||
|
||||
# Use configured default if not specified
|
||||
if page_size is None:
|
||||
page_size = settings.default_page_size
|
||||
|
||||
|
||||
# Get unique schools (latest year data for each)
|
||||
latest_year = df.groupby('urn')['year'].max().reset_index()
|
||||
df_latest = df.merge(latest_year, on=['urn', 'year'])
|
||||
|
||||
# Include lat/long in columns for location search
|
||||
location_cols = ['latitude', 'longitude']
|
||||
available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
|
||||
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
|
||||
|
||||
# Location-based search
|
||||
latest_year = df.groupby("urn")["year"].max().reset_index()
|
||||
df_latest = df.merge(latest_year, on=["urn", "year"])
|
||||
|
||||
# Calculate trend by comparing to previous year
|
||||
# Get second-latest year for each school
|
||||
df_sorted = df.sort_values(["urn", "year"], ascending=[True, False])
|
||||
df_prev = df_sorted.groupby("urn").nth(1).reset_index()
|
||||
if not df_prev.empty and "rwm_expected_pct" in df_prev.columns:
|
||||
prev_rwm = df_prev[["urn", "rwm_expected_pct"]].rename(
|
||||
columns={"rwm_expected_pct": "prev_rwm_expected_pct"}
|
||||
)
|
||||
df_latest = df_latest.merge(prev_rwm, on="urn", how="left")
|
||||
|
||||
# Include key result metrics for display on cards
|
||||
location_cols = ["latitude", "longitude"]
|
||||
result_cols = [
|
||||
"year",
|
||||
"rwm_expected_pct",
|
||||
"rwm_high_pct",
|
||||
"prev_rwm_expected_pct",
|
||||
"reading_expected_pct",
|
||||
"writing_expected_pct",
|
||||
"maths_expected_pct",
|
||||
"total_pupils",
|
||||
]
|
||||
available_cols = [
|
||||
c
|
||||
for c in SCHOOL_COLUMNS + location_cols + result_cols
|
||||
if c in df_latest.columns
|
||||
]
|
||||
schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"])
|
||||
|
||||
# Location-based search (uses pre-geocoded data from database)
|
||||
search_coords = None
|
||||
if postcode:
|
||||
coords = geocode_single_postcode(postcode)
|
||||
if coords:
|
||||
search_coords = coords
|
||||
schools_df = schools_df.copy()
|
||||
|
||||
# Geocode school postcodes on-demand if not already cached
|
||||
if 'postcode' in schools_df.columns:
|
||||
unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
|
||||
geocoded = geocode_postcodes_bulk(unique_postcodes)
|
||||
|
||||
# Add lat/long from geocoded data
|
||||
schools_df['latitude'] = schools_df['postcode'].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
|
||||
)
|
||||
schools_df['longitude'] = schools_df['postcode'].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
|
||||
)
|
||||
|
||||
# Filter by distance
|
||||
def calc_distance(row):
|
||||
if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
|
||||
return float('inf')
|
||||
return haversine_distance(
|
||||
search_coords[0], search_coords[1],
|
||||
row['latitude'], row['longitude']
|
||||
)
|
||||
|
||||
schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
|
||||
schools_df = schools_df[schools_df['distance'] <= radius]
|
||||
schools_df = schools_df.sort_values('distance')
|
||||
|
||||
|
||||
# Filter by distance using pre-geocoded lat/long from database
|
||||
# Use vectorized haversine calculation for better performance
|
||||
lat1, lon1 = search_coords
|
||||
|
||||
# Handle potential duplicate columns by taking first occurrence
|
||||
lat_col = schools_df.loc[:, "latitude"]
|
||||
lon_col = schools_df.loc[:, "longitude"]
|
||||
if isinstance(lat_col, pd.DataFrame):
|
||||
lat_col = lat_col.iloc[:, 0]
|
||||
if isinstance(lon_col, pd.DataFrame):
|
||||
lon_col = lon_col.iloc[:, 0]
|
||||
|
||||
lat2 = lat_col.values
|
||||
lon2 = lon_col.values
|
||||
|
||||
# Vectorized haversine formula
|
||||
R = 3959 # Earth's radius in miles
|
||||
lat1_rad = np.radians(lat1)
|
||||
lat2_rad = np.radians(lat2)
|
||||
dlat = np.radians(lat2 - lat1)
|
||||
dlon = np.radians(lon2 - lon1)
|
||||
|
||||
a = np.sin(dlat / 2) ** 2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2) ** 2
|
||||
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
|
||||
distances = R * c
|
||||
|
||||
# Handle missing coordinates
|
||||
has_coords = ~(pd.isna(lat_col) | pd.isna(lon_col))
|
||||
distances = np.where(has_coords.values, distances, float("inf"))
|
||||
schools_df["distance"] = distances
|
||||
schools_df = schools_df[schools_df["distance"] <= radius]
|
||||
schools_df = schools_df.sort_values("distance")
|
||||
|
||||
# Apply filters
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
||||
mask = (
|
||||
schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
||||
)
|
||||
if "address" in schools_df.columns:
|
||||
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
|
||||
mask = mask | schools_df["address"].str.lower().str.contains(
|
||||
search_lower, na=False
|
||||
)
|
||||
schools_df = schools_df[mask]
|
||||
|
||||
|
||||
if local_authority:
|
||||
schools_df = schools_df[schools_df["local_authority"].str.lower() == local_authority.lower()]
|
||||
|
||||
schools_df = schools_df[
|
||||
schools_df["local_authority"].str.lower() == local_authority.lower()
|
||||
]
|
||||
|
||||
if school_type:
|
||||
schools_df = schools_df[schools_df["school_type"].str.lower() == school_type.lower()]
|
||||
|
||||
schools_df = schools_df[
|
||||
schools_df["school_type"].str.lower() == school_type.lower()
|
||||
]
|
||||
|
||||
# Pagination
|
||||
total = len(schools_df)
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
schools_df = schools_df.iloc[start_idx:end_idx]
|
||||
|
||||
# Remove internal columns before sending
|
||||
output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
|
||||
if 'distance' in schools_df.columns:
|
||||
output_cols.append('distance')
|
||||
|
||||
|
||||
return {
|
||||
"schools": clean_for_json(schools_df[output_cols]),
|
||||
"schools": clean_for_json(schools_df),
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
|
||||
"search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
|
||||
"search_location": {"postcode": postcode, "radius": radius}
|
||||
if search_coords
|
||||
else None,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/schools/{urn}")
|
||||
async def get_school_details(urn: int):
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_school_details(request: Request, urn: int):
|
||||
"""Get detailed KS2 data for a specific primary school across all years."""
|
||||
# Validate URN range (UK school URNs are 6 digits)
|
||||
if not (100000 <= urn <= 999999):
|
||||
raise HTTPException(status_code=400, detail="Invalid URN format")
|
||||
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
raise HTTPException(status_code=404, detail="No data available")
|
||||
|
||||
|
||||
school_data = df[df["urn"] == urn]
|
||||
|
||||
|
||||
if school_data.empty:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
|
||||
# Sort by year
|
||||
school_data = school_data.sort_values("year")
|
||||
|
||||
|
||||
# Get latest info for the school
|
||||
latest = school_data.iloc[-1]
|
||||
|
||||
|
||||
return {
|
||||
"school_info": {
|
||||
"urn": urn,
|
||||
@@ -206,30 +387,45 @@ async def get_school_details(urn: int):
|
||||
"local_authority": latest.get("local_authority", ""),
|
||||
"school_type": latest.get("school_type", ""),
|
||||
"address": latest.get("address", ""),
|
||||
"religious_denomination": latest.get("religious_denomination", ""),
|
||||
"age_range": latest.get("age_range", ""),
|
||||
"latitude": latest.get("latitude"),
|
||||
"longitude": latest.get("longitude"),
|
||||
"phase": "Primary",
|
||||
},
|
||||
"yearly_data": clean_for_json(school_data)
|
||||
"yearly_data": clean_for_json(school_data),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/compare")
|
||||
async def compare_schools(urns: str = Query(..., description="Comma-separated URNs")):
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def compare_schools(
|
||||
request: Request,
|
||||
urns: str = Query(..., description="Comma-separated URNs", max_length=100)
|
||||
):
|
||||
"""Compare multiple primary schools side by side."""
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
raise HTTPException(status_code=404, detail="No data available")
|
||||
|
||||
|
||||
try:
|
||||
urn_list = [int(u.strip()) for u in urns.split(",")]
|
||||
# Limit number of schools to compare
|
||||
if len(urn_list) > 10:
|
||||
raise HTTPException(status_code=400, detail="Maximum 10 schools can be compared")
|
||||
# Validate URN format
|
||||
for urn in urn_list:
|
||||
if not (100000 <= urn <= 999999):
|
||||
raise HTTPException(status_code=400, detail="Invalid URN format")
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid URN format")
|
||||
|
||||
|
||||
comparison_data = df[df["urn"].isin(urn_list)]
|
||||
|
||||
|
||||
if comparison_data.empty:
|
||||
raise HTTPException(status_code=404, detail="No schools found")
|
||||
|
||||
|
||||
result = {}
|
||||
for urn in urn_list:
|
||||
school_data = comparison_data[comparison_data["urn"] == urn].sort_values("year")
|
||||
@@ -242,24 +438,25 @@ async def compare_schools(urns: str = Query(..., description="Comma-separated UR
|
||||
"local_authority": latest.get("local_authority", ""),
|
||||
"address": latest.get("address", ""),
|
||||
},
|
||||
"yearly_data": clean_for_json(school_data)
|
||||
"yearly_data": clean_for_json(school_data),
|
||||
}
|
||||
|
||||
|
||||
return {"comparison": result}
|
||||
|
||||
|
||||
@app.get("/api/filters")
|
||||
async def get_filter_options():
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_filter_options(request: Request):
|
||||
"""Get available filter options (local authorities, school types, years)."""
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
return {
|
||||
"local_authorities": [],
|
||||
"school_types": [],
|
||||
"years": [],
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
|
||||
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
|
||||
@@ -268,39 +465,53 @@ async def get_filter_options():
|
||||
|
||||
|
||||
@app.get("/api/metrics")
|
||||
async def get_available_metrics():
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_available_metrics(request: Request):
|
||||
"""
|
||||
Get list of available KS2 performance metrics for primary schools.
|
||||
|
||||
|
||||
This is the single source of truth for metric definitions.
|
||||
Frontend should consume this to avoid duplication.
|
||||
"""
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
available = []
|
||||
for key, info in METRIC_DEFINITIONS.items():
|
||||
if df.empty or key in df.columns:
|
||||
available.append({"key": key, **info})
|
||||
|
||||
|
||||
return {"metrics": available}
|
||||
|
||||
|
||||
@app.get("/api/rankings")
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_rankings(
|
||||
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by"),
|
||||
year: Optional[int] = Query(None, description="Specific year (defaults to most recent)"),
|
||||
request: Request,
|
||||
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by", max_length=50),
|
||||
year: Optional[int] = Query(
|
||||
None, description="Specific year (defaults to most recent)", ge=2000, le=2100
|
||||
),
|
||||
limit: int = Query(20, ge=1, le=100, description="Number of schools to return"),
|
||||
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
|
||||
local_authority: Optional[str] = Query(
|
||||
None, description="Filter by local authority", max_length=100
|
||||
),
|
||||
):
|
||||
"""Get primary school rankings by a specific KS2 metric."""
|
||||
# Sanitize local authority input
|
||||
local_authority = sanitize_search_input(local_authority)
|
||||
|
||||
# Validate metric name (only allow alphanumeric and underscore)
|
||||
if not re.match(r"^[a-z0-9_]+$", metric):
|
||||
raise HTTPException(status_code=400, detail="Invalid metric name")
|
||||
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
return {"metric": metric, "year": None, "rankings": [], "total": 0}
|
||||
|
||||
|
||||
if metric not in df.columns:
|
||||
raise HTTPException(status_code=400, detail=f"Metric '{metric}' not available")
|
||||
|
||||
|
||||
# Filter by year
|
||||
if year:
|
||||
df = df[df["year"] == year]
|
||||
@@ -308,22 +519,22 @@ async def get_rankings(
|
||||
# Use most recent year
|
||||
max_year = df["year"].max()
|
||||
df = df[df["year"] == max_year]
|
||||
|
||||
|
||||
# Filter by local authority if specified
|
||||
if local_authority:
|
||||
df = df[df["local_authority"].str.lower() == local_authority.lower()]
|
||||
|
||||
|
||||
# Sort and rank (exclude rows with no data for this metric)
|
||||
df = df.dropna(subset=[metric])
|
||||
total = len(df)
|
||||
|
||||
|
||||
# For progress scores, higher is better. For percentages, higher is also better.
|
||||
df = df.sort_values(metric, ascending=False).head(limit)
|
||||
|
||||
|
||||
# Return only relevant fields for rankings
|
||||
available_cols = [c for c in RANKING_COLUMNS if c in df.columns]
|
||||
df = df[available_cols]
|
||||
|
||||
|
||||
return {
|
||||
"metric": metric,
|
||||
"year": int(df["year"].iloc[0]) if not df.empty else None,
|
||||
@@ -333,32 +544,39 @@ async def get_rankings(
|
||||
|
||||
|
||||
@app.get("/api/data-info")
|
||||
async def get_data_info():
|
||||
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
|
||||
async def get_data_info(request: Request):
|
||||
"""Get information about loaded data."""
|
||||
# Get info directly from database
|
||||
db_info = get_db_info()
|
||||
|
||||
|
||||
if db_info["total_schools"] == 0:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"message": "No data in database. Run the migration script: python scripts/migrate_csv_to_db.py",
|
||||
"data_source": "PostgreSQL",
|
||||
}
|
||||
|
||||
|
||||
# Also get DataFrame-based stats for backwards compatibility
|
||||
df = load_school_data()
|
||||
|
||||
|
||||
if df.empty:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"message": "No data available",
|
||||
"data_source": "PostgreSQL",
|
||||
}
|
||||
|
||||
|
||||
years = [int(y) for y in sorted(df["year"].unique())]
|
||||
schools_per_year = {str(int(k)): int(v) for k, v in df.groupby("year")["urn"].nunique().to_dict().items()}
|
||||
la_counts = {str(k): int(v) for k, v in df["local_authority"].value_counts().to_dict().items()}
|
||||
|
||||
schools_per_year = {
|
||||
str(int(k)): int(v)
|
||||
for k, v in df.groupby("year")["urn"].nunique().to_dict().items()
|
||||
}
|
||||
la_counts = {
|
||||
str(k): int(v)
|
||||
for k, v in df["local_authority"].value_counts().to_dict().items()
|
||||
}
|
||||
|
||||
return {
|
||||
"status": "loaded",
|
||||
"data_source": "PostgreSQL",
|
||||
@@ -371,13 +589,43 @@ async def get_data_info():
|
||||
|
||||
|
||||
@app.post("/api/admin/reload")
|
||||
async def reload_data():
|
||||
"""Admin endpoint to force data reload (useful after data updates)."""
|
||||
@limiter.limit("5/minute")
|
||||
async def reload_data(
|
||||
request: Request,
|
||||
_: bool = Depends(verify_admin_api_key)
|
||||
):
|
||||
"""
|
||||
Admin endpoint to force data reload (useful after data updates).
|
||||
Requires X-API-Key header with valid admin API key.
|
||||
"""
|
||||
clear_cache()
|
||||
load_school_data()
|
||||
return {"status": "reloaded"}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SEO FILES
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@app.get("/favicon.svg")
|
||||
async def favicon():
|
||||
"""Serve favicon."""
|
||||
return FileResponse(settings.frontend_dir / "favicon.svg", media_type="image/svg+xml")
|
||||
|
||||
|
||||
@app.get("/robots.txt")
|
||||
async def robots_txt():
|
||||
"""Serve robots.txt for search engine crawlers."""
|
||||
return FileResponse(settings.frontend_dir / "robots.txt", media_type="text/plain")
|
||||
|
||||
|
||||
@app.get("/sitemap.xml")
|
||||
async def sitemap_xml():
|
||||
"""Serve sitemap.xml for search engine indexing."""
|
||||
return FileResponse(settings.frontend_dir / "sitemap.xml", media_type="application/xml")
|
||||
|
||||
|
||||
# Mount static files directly (must be after all routes to avoid catching API calls)
|
||||
if settings.frontend_dir.exists():
|
||||
app.mount("/static", StaticFiles(directory=settings.frontend_dir), name="static")
|
||||
@@ -385,4 +633,5 @@ if settings.frontend_dir.exists():
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host=settings.host, port=settings.port)
|
||||
|
||||
@@ -3,33 +3,44 @@ Application configuration using pydantic-settings.
|
||||
Loads from environment variables and .env file.
|
||||
"""
|
||||
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
import os
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings loaded from environment."""
|
||||
|
||||
|
||||
# Paths
|
||||
data_dir: Path = Path(__file__).parent.parent / "data"
|
||||
frontend_dir: Path = Path(__file__).parent.parent / "frontend"
|
||||
|
||||
|
||||
# Server
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 80
|
||||
|
||||
debug: bool = False # Set to False in production
|
||||
|
||||
# Database
|
||||
database_url: str = "postgresql://schoolcompare:schoolcompare@localhost:5432/schoolcompare"
|
||||
|
||||
# CORS
|
||||
allowed_origins: List[str] = ["https://schoolcompare.co.uk", "http://localhost:8000", "http://localhost:3000"]
|
||||
|
||||
|
||||
# CORS - Production should only allow the actual domain
|
||||
allowed_origins: List[str] = ["https://schoolcompare.co.uk"]
|
||||
|
||||
# API
|
||||
default_page_size: int = 50
|
||||
max_page_size: int = 100
|
||||
|
||||
|
||||
# Security
|
||||
admin_api_key: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
|
||||
rate_limit_per_minute: int = 60 # Requests per minute per IP
|
||||
rate_limit_burst: int = 10 # Allow burst of requests
|
||||
max_request_size: int = 1024 * 1024 # 1MB max request size
|
||||
|
||||
# Analytics
|
||||
ga_measurement_id: Optional[str] = "G-J0PCVT14NY" # Google Analytics 4 Measurement ID
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
"""
|
||||
Data loading module that queries from PostgreSQL database.
|
||||
Provides efficient queries with caching and lazy loading.
|
||||
|
||||
Note: School geocoding is handled by a separate cron job (scripts/geocode_schools.py).
|
||||
Only user search postcodes are geocoded on-demand via geocode_single_postcode().
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
@@ -14,57 +17,37 @@ from sqlalchemy.orm import joinedload, Session
|
||||
from .config import settings
|
||||
from .database import SessionLocal, get_db_session
|
||||
from .models import School, SchoolResult
|
||||
from .schemas import SCHOOL_TYPE_MAP
|
||||
|
||||
# Cache for postcode geocoding
|
||||
# Cache for user search postcode geocoding (not for school data)
|
||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Check cache first
|
||||
uncached = []
|
||||
for pc in postcodes:
|
||||
if pc and isinstance(pc, str):
|
||||
pc_upper = pc.strip().upper()
|
||||
if pc_upper in _postcode_cache:
|
||||
results[pc_upper] = _postcode_cache[pc_upper]
|
||||
elif len(pc_upper) >= 5:
|
||||
uncached.append(pc_upper)
|
||||
|
||||
if not uncached:
|
||||
return results
|
||||
|
||||
uncached = list(set(uncached))
|
||||
|
||||
# postcodes.io allows max 100 postcodes per request
|
||||
batch_size = 100
|
||||
for i in range(0, len(uncached), batch_size):
|
||||
batch = uncached[i:i + batch_size]
|
||||
try:
|
||||
response = requests.post(
|
||||
'https://api.postcodes.io/postcodes',
|
||||
json={'postcodes': batch},
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get('result', []):
|
||||
if item and item.get('result'):
|
||||
pc = item['query'].upper()
|
||||
lat = item['result'].get('latitude')
|
||||
lon = item['result'].get('longitude')
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
_postcode_cache[pc] = (lat, lon)
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
def normalize_school_type(school_type: Optional[str]) -> Optional[str]:
|
||||
"""Convert cryptic school type codes to user-friendly names."""
|
||||
if not school_type:
|
||||
return None
|
||||
# Check if it's a code that needs mapping
|
||||
code = school_type.strip().upper()
|
||||
if code in SCHOOL_TYPE_MAP:
|
||||
return SCHOOL_TYPE_MAP[code]
|
||||
# Return original if already a friendly name or unknown code
|
||||
return school_type
|
||||
|
||||
|
||||
def get_school_type_codes_for_filter(school_type: str) -> List[str]:
|
||||
"""Get all database codes that map to a given friendly name."""
|
||||
if not school_type:
|
||||
return []
|
||||
school_type_lower = school_type.lower()
|
||||
# Collect all codes that map to this friendly name
|
||||
codes = []
|
||||
for code, friendly_name in SCHOOL_TYPE_MAP.items():
|
||||
if friendly_name.lower() == school_type_lower:
|
||||
codes.append(code.lower())
|
||||
# Also include the school_type itself (case-insensitive) in case it's stored as-is
|
||||
codes.append(school_type_lower)
|
||||
return codes
|
||||
|
||||
|
||||
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
|
||||
@@ -160,18 +143,24 @@ def get_available_local_authorities(db: Session = None) -> List[str]:
|
||||
|
||||
|
||||
def get_available_school_types(db: Session = None) -> List[str]:
|
||||
"""Get list of available school types."""
|
||||
"""Get list of available school types (normalized to user-friendly names)."""
|
||||
close_db = db is None
|
||||
if db is None:
|
||||
db = get_db()
|
||||
|
||||
|
||||
try:
|
||||
result = db.query(School.school_type)\
|
||||
.filter(School.school_type.isnot(None))\
|
||||
.distinct()\
|
||||
.order_by(School.school_type)\
|
||||
.all()
|
||||
return [r[0] for r in result if r[0]]
|
||||
# Normalize codes to friendly names and deduplicate
|
||||
normalized = set()
|
||||
for r in result:
|
||||
if r[0]:
|
||||
friendly_name = normalize_school_type(r[0])
|
||||
if friendly_name:
|
||||
normalized.add(friendly_name)
|
||||
return sorted(normalized)
|
||||
finally:
|
||||
if close_db:
|
||||
db.close()
|
||||
@@ -217,17 +206,20 @@ def get_schools(
|
||||
|
||||
if local_authority:
|
||||
query = query.filter(func.lower(School.local_authority) == local_authority.lower())
|
||||
|
||||
|
||||
if school_type:
|
||||
query = query.filter(func.lower(School.school_type) == school_type.lower())
|
||||
|
||||
# Filter by all codes that map to this friendly name
|
||||
type_codes = get_school_type_codes_for_filter(school_type)
|
||||
if type_codes:
|
||||
query = query.filter(func.lower(School.school_type).in_(type_codes))
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * page_size
|
||||
schools = query.order_by(School.school_name).offset(offset).limit(page_size).all()
|
||||
|
||||
|
||||
return schools, total
|
||||
|
||||
|
||||
@@ -265,10 +257,13 @@ def get_schools_near_location(
|
||||
|
||||
if local_authority:
|
||||
query = query.filter(func.lower(School.local_authority) == local_authority.lower())
|
||||
|
||||
|
||||
if school_type:
|
||||
query = query.filter(func.lower(School.school_type) == school_type.lower())
|
||||
|
||||
# Filter by all codes that map to this friendly name
|
||||
type_codes = get_school_type_codes_for_filter(school_type)
|
||||
if type_codes:
|
||||
query = query.filter(func.lower(School.school_type).in_(type_codes))
|
||||
|
||||
# Get all matching schools and calculate distances
|
||||
all_schools = query.all()
|
||||
|
||||
@@ -380,17 +375,17 @@ def school_to_dict(school: School, include_results: bool = False) -> dict:
|
||||
"urn": school.urn,
|
||||
"school_name": school.school_name,
|
||||
"local_authority": school.local_authority,
|
||||
"school_type": school.school_type,
|
||||
"school_type": normalize_school_type(school.school_type),
|
||||
"address": school.address,
|
||||
"town": school.town,
|
||||
"postcode": school.postcode,
|
||||
"latitude": school.latitude,
|
||||
"longitude": school.longitude,
|
||||
}
|
||||
|
||||
|
||||
if include_results and school.results:
|
||||
data["results"] = [result_to_dict(r) for r in school.results]
|
||||
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -455,11 +450,11 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
||||
close_db = db is None
|
||||
if db is None:
|
||||
db = get_db()
|
||||
|
||||
|
||||
try:
|
||||
# Query all schools with their results
|
||||
schools = db.query(School).options(joinedload(School.results)).all()
|
||||
|
||||
|
||||
rows = []
|
||||
for school in schools:
|
||||
for result in school.results:
|
||||
@@ -467,7 +462,7 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
||||
"urn": school.urn,
|
||||
"school_name": school.school_name,
|
||||
"local_authority": school.local_authority,
|
||||
"school_type": school.school_type,
|
||||
"school_type": normalize_school_type(school.school_type),
|
||||
"address": school.address,
|
||||
"town": school.town,
|
||||
"postcode": school.postcode,
|
||||
@@ -476,7 +471,7 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
||||
**result_to_dict(result)
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
|
||||
if rows:
|
||||
return pd.DataFrame(rows)
|
||||
return pd.DataFrame()
|
||||
|
||||
67
backend/database.py
Normal file
67
backend/database.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
Database connection setup using SQLAlchemy.
|
||||
"""
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
from contextlib import contextmanager
|
||||
|
||||
from .config import settings
|
||||
|
||||
# Create engine
|
||||
engine = create_engine(
|
||||
settings.database_url,
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
pool_pre_ping=True, # Verify connections before use
|
||||
echo=False, # Set to True for SQL debugging
|
||||
)
|
||||
|
||||
# Session factory
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
# Base class for models
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
def get_db():
|
||||
"""
|
||||
Dependency for FastAPI routes to get a database session.
|
||||
"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_db_session():
|
||||
"""
|
||||
Context manager for database sessions.
|
||||
Use in non-FastAPI contexts (scripts, etc).
|
||||
"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def init_db():
|
||||
"""
|
||||
Initialize database - create all tables.
|
||||
"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def drop_db():
|
||||
"""
|
||||
Drop all tables - use with caution!
|
||||
"""
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
|
||||
190
backend/models.py
Normal file
190
backend/models.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""
|
||||
SQLAlchemy database models for school data.
|
||||
Normalized schema with separate tables for schools and yearly results.
|
||||
"""
|
||||
|
||||
from sqlalchemy import (
|
||||
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
|
||||
Text, Boolean
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from .database import Base
|
||||
|
||||
|
||||
class School(Base):
|
||||
"""
|
||||
Core school information - relatively static data.
|
||||
"""
|
||||
__tablename__ = "schools"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
urn = Column(Integer, unique=True, nullable=False, index=True)
|
||||
school_name = Column(String(255), nullable=False)
|
||||
local_authority = Column(String(100))
|
||||
local_authority_code = Column(Integer)
|
||||
school_type = Column(String(100))
|
||||
school_type_code = Column(String(10))
|
||||
religious_denomination = Column(String(100))
|
||||
age_range = Column(String(20))
|
||||
|
||||
# Address
|
||||
address1 = Column(String(255))
|
||||
address2 = Column(String(255))
|
||||
town = Column(String(100))
|
||||
postcode = Column(String(20), index=True)
|
||||
|
||||
# Geocoding (cached)
|
||||
latitude = Column(Float)
|
||||
longitude = Column(Float)
|
||||
|
||||
# Relationships
|
||||
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<School(urn={self.urn}, name='{self.school_name}')>"
|
||||
|
||||
@property
|
||||
def address(self):
|
||||
"""Combine address fields into single string."""
|
||||
parts = [self.address1, self.address2, self.town, self.postcode]
|
||||
return ", ".join(p for p in parts if p)
|
||||
|
||||
|
||||
class SchoolResult(Base):
|
||||
"""
|
||||
Yearly KS2 results for a school.
|
||||
Each school can have multiple years of results.
|
||||
"""
|
||||
__tablename__ = "school_results"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False)
|
||||
year = Column(Integer, nullable=False, index=True)
|
||||
|
||||
# Pupil numbers
|
||||
total_pupils = Column(Integer)
|
||||
eligible_pupils = Column(Integer)
|
||||
|
||||
# Core KS2 metrics - Expected Standard
|
||||
rwm_expected_pct = Column(Float)
|
||||
reading_expected_pct = Column(Float)
|
||||
writing_expected_pct = Column(Float)
|
||||
maths_expected_pct = Column(Float)
|
||||
gps_expected_pct = Column(Float)
|
||||
science_expected_pct = Column(Float)
|
||||
|
||||
# Higher Standard
|
||||
rwm_high_pct = Column(Float)
|
||||
reading_high_pct = Column(Float)
|
||||
writing_high_pct = Column(Float)
|
||||
maths_high_pct = Column(Float)
|
||||
gps_high_pct = Column(Float)
|
||||
|
||||
# Progress Scores
|
||||
reading_progress = Column(Float)
|
||||
writing_progress = Column(Float)
|
||||
maths_progress = Column(Float)
|
||||
|
||||
# Average Scores
|
||||
reading_avg_score = Column(Float)
|
||||
maths_avg_score = Column(Float)
|
||||
gps_avg_score = Column(Float)
|
||||
|
||||
# School Context
|
||||
disadvantaged_pct = Column(Float)
|
||||
eal_pct = Column(Float)
|
||||
sen_support_pct = Column(Float)
|
||||
sen_ehcp_pct = Column(Float)
|
||||
stability_pct = Column(Float)
|
||||
|
||||
# Gender Breakdown
|
||||
rwm_expected_boys_pct = Column(Float)
|
||||
rwm_expected_girls_pct = Column(Float)
|
||||
rwm_high_boys_pct = Column(Float)
|
||||
rwm_high_girls_pct = Column(Float)
|
||||
|
||||
# Disadvantaged Performance
|
||||
rwm_expected_disadvantaged_pct = Column(Float)
|
||||
rwm_expected_non_disadvantaged_pct = Column(Float)
|
||||
disadvantaged_gap = Column(Float)
|
||||
|
||||
# 3-Year Averages
|
||||
rwm_expected_3yr_pct = Column(Float)
|
||||
reading_avg_3yr = Column(Float)
|
||||
maths_avg_3yr = Column(Float)
|
||||
|
||||
# Relationship
|
||||
school = relationship("School", back_populates="results")
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint('school_id', 'year', name='uq_school_year'),
|
||||
Index('ix_school_results_school_year', 'school_id', 'year'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SchoolResult(school_id={self.school_id}, year={self.year})>"
|
||||
|
||||
|
||||
# Mapping from CSV columns to model fields
|
||||
SCHOOL_FIELD_MAPPING = {
|
||||
'urn': 'urn',
|
||||
'school_name': 'school_name',
|
||||
'local_authority': 'local_authority',
|
||||
'local_authority_code': 'local_authority_code',
|
||||
'school_type': 'school_type',
|
||||
'school_type_code': 'school_type_code',
|
||||
'religious_denomination': 'religious_denomination',
|
||||
'age_range': 'age_range',
|
||||
'address1': 'address1',
|
||||
'address2': 'address2',
|
||||
'town': 'town',
|
||||
'postcode': 'postcode',
|
||||
}
|
||||
|
||||
RESULT_FIELD_MAPPING = {
|
||||
'year': 'year',
|
||||
'total_pupils': 'total_pupils',
|
||||
'eligible_pupils': 'eligible_pupils',
|
||||
# Expected Standard
|
||||
'rwm_expected_pct': 'rwm_expected_pct',
|
||||
'reading_expected_pct': 'reading_expected_pct',
|
||||
'writing_expected_pct': 'writing_expected_pct',
|
||||
'maths_expected_pct': 'maths_expected_pct',
|
||||
'gps_expected_pct': 'gps_expected_pct',
|
||||
'science_expected_pct': 'science_expected_pct',
|
||||
# Higher Standard
|
||||
'rwm_high_pct': 'rwm_high_pct',
|
||||
'reading_high_pct': 'reading_high_pct',
|
||||
'writing_high_pct': 'writing_high_pct',
|
||||
'maths_high_pct': 'maths_high_pct',
|
||||
'gps_high_pct': 'gps_high_pct',
|
||||
# Progress
|
||||
'reading_progress': 'reading_progress',
|
||||
'writing_progress': 'writing_progress',
|
||||
'maths_progress': 'maths_progress',
|
||||
# Averages
|
||||
'reading_avg_score': 'reading_avg_score',
|
||||
'maths_avg_score': 'maths_avg_score',
|
||||
'gps_avg_score': 'gps_avg_score',
|
||||
# Context
|
||||
'disadvantaged_pct': 'disadvantaged_pct',
|
||||
'eal_pct': 'eal_pct',
|
||||
'sen_support_pct': 'sen_support_pct',
|
||||
'sen_ehcp_pct': 'sen_ehcp_pct',
|
||||
'stability_pct': 'stability_pct',
|
||||
# Gender
|
||||
'rwm_expected_boys_pct': 'rwm_expected_boys_pct',
|
||||
'rwm_expected_girls_pct': 'rwm_expected_girls_pct',
|
||||
'rwm_high_boys_pct': 'rwm_high_boys_pct',
|
||||
'rwm_high_girls_pct': 'rwm_high_girls_pct',
|
||||
# Disadvantaged
|
||||
'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct',
|
||||
'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct',
|
||||
'disadvantaged_gap': 'disadvantaged_gap',
|
||||
# 3-Year
|
||||
'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct',
|
||||
'reading_avg_3yr': 'reading_avg_3yr',
|
||||
'maths_avg_3yr': 'maths_avg_3yr',
|
||||
}
|
||||
|
||||
@@ -5,94 +5,125 @@ Single source of truth for all data transformations.
|
||||
|
||||
# Column name mappings from DfE CSV to API field names
|
||||
COLUMN_MAPPINGS = {
|
||||
'URN': 'urn',
|
||||
'SCHNAME': 'school_name',
|
||||
'ADDRESS1': 'address1',
|
||||
'ADDRESS2': 'address2',
|
||||
'TOWN': 'town',
|
||||
'PCODE': 'postcode',
|
||||
'NFTYPE': 'school_type_code',
|
||||
'RELDENOM': 'religious_denomination',
|
||||
'AGERANGE': 'age_range',
|
||||
'TOTPUPS': 'total_pupils',
|
||||
'TELIG': 'eligible_pupils',
|
||||
"URN": "urn",
|
||||
"SCHNAME": "school_name",
|
||||
"ADDRESS1": "address1",
|
||||
"ADDRESS2": "address2",
|
||||
"TOWN": "town",
|
||||
"PCODE": "postcode",
|
||||
"NFTYPE": "school_type_code",
|
||||
"RELDENOM": "religious_denomination",
|
||||
"AGERANGE": "age_range",
|
||||
"TOTPUPS": "total_pupils",
|
||||
"TELIG": "eligible_pupils",
|
||||
# Core KS2 metrics
|
||||
'PTRWM_EXP': 'rwm_expected_pct',
|
||||
'PTRWM_HIGH': 'rwm_high_pct',
|
||||
'READPROG': 'reading_progress',
|
||||
'WRITPROG': 'writing_progress',
|
||||
'MATPROG': 'maths_progress',
|
||||
'PTREAD_EXP': 'reading_expected_pct',
|
||||
'PTWRITTA_EXP': 'writing_expected_pct',
|
||||
'PTMAT_EXP': 'maths_expected_pct',
|
||||
'READ_AVERAGE': 'reading_avg_score',
|
||||
'MAT_AVERAGE': 'maths_avg_score',
|
||||
'PTREAD_HIGH': 'reading_high_pct',
|
||||
'PTWRITTA_HIGH': 'writing_high_pct',
|
||||
'PTMAT_HIGH': 'maths_high_pct',
|
||||
"PTRWM_EXP": "rwm_expected_pct",
|
||||
"PTRWM_HIGH": "rwm_high_pct",
|
||||
"READPROG": "reading_progress",
|
||||
"WRITPROG": "writing_progress",
|
||||
"MATPROG": "maths_progress",
|
||||
"PTREAD_EXP": "reading_expected_pct",
|
||||
"PTWRITTA_EXP": "writing_expected_pct",
|
||||
"PTMAT_EXP": "maths_expected_pct",
|
||||
"READ_AVERAGE": "reading_avg_score",
|
||||
"MAT_AVERAGE": "maths_avg_score",
|
||||
"PTREAD_HIGH": "reading_high_pct",
|
||||
"PTWRITTA_HIGH": "writing_high_pct",
|
||||
"PTMAT_HIGH": "maths_high_pct",
|
||||
# GPS (Grammar, Punctuation & Spelling)
|
||||
'PTGPS_EXP': 'gps_expected_pct',
|
||||
'PTGPS_HIGH': 'gps_high_pct',
|
||||
'GPS_AVERAGE': 'gps_avg_score',
|
||||
"PTGPS_EXP": "gps_expected_pct",
|
||||
"PTGPS_HIGH": "gps_high_pct",
|
||||
"GPS_AVERAGE": "gps_avg_score",
|
||||
# Science
|
||||
'PTSCITA_EXP': 'science_expected_pct',
|
||||
"PTSCITA_EXP": "science_expected_pct",
|
||||
# School context
|
||||
'PTFSM6CLA1A': 'disadvantaged_pct',
|
||||
'PTEALGRP2': 'eal_pct',
|
||||
'PSENELK': 'sen_support_pct',
|
||||
'PSENELE': 'sen_ehcp_pct',
|
||||
'PTMOBN': 'stability_pct',
|
||||
"PTFSM6CLA1A": "disadvantaged_pct",
|
||||
"PTEALGRP2": "eal_pct",
|
||||
"PSENELK": "sen_support_pct",
|
||||
"PSENELE": "sen_ehcp_pct",
|
||||
"PTMOBN": "stability_pct",
|
||||
# Gender breakdown
|
||||
'PTRWM_EXP_B': 'rwm_expected_boys_pct',
|
||||
'PTRWM_EXP_G': 'rwm_expected_girls_pct',
|
||||
'PTRWM_HIGH_B': 'rwm_high_boys_pct',
|
||||
'PTRWM_HIGH_G': 'rwm_high_girls_pct',
|
||||
"PTRWM_EXP_B": "rwm_expected_boys_pct",
|
||||
"PTRWM_EXP_G": "rwm_expected_girls_pct",
|
||||
"PTRWM_HIGH_B": "rwm_high_boys_pct",
|
||||
"PTRWM_HIGH_G": "rwm_high_girls_pct",
|
||||
# Disadvantaged performance
|
||||
'PTRWM_EXP_FSM6CLA1A': 'rwm_expected_disadvantaged_pct',
|
||||
'PTRWM_EXP_NotFSM6CLA1A': 'rwm_expected_non_disadvantaged_pct',
|
||||
'DIFFN_RWM_EXP': 'disadvantaged_gap',
|
||||
"PTRWM_EXP_FSM6CLA1A": "rwm_expected_disadvantaged_pct",
|
||||
"PTRWM_EXP_NotFSM6CLA1A": "rwm_expected_non_disadvantaged_pct",
|
||||
"DIFFN_RWM_EXP": "disadvantaged_gap",
|
||||
# 3-year averages
|
||||
'PTRWM_EXP_3YR': 'rwm_expected_3yr_pct',
|
||||
'READ_AVERAGE_3YR': 'reading_avg_3yr',
|
||||
'MAT_AVERAGE_3YR': 'maths_avg_3yr',
|
||||
"PTRWM_EXP_3YR": "rwm_expected_3yr_pct",
|
||||
"READ_AVERAGE_3YR": "reading_avg_3yr",
|
||||
"MAT_AVERAGE_3YR": "maths_avg_3yr",
|
||||
}
|
||||
|
||||
# Numeric columns that need parsing
|
||||
NUMERIC_COLUMNS = [
|
||||
# Core metrics
|
||||
'rwm_expected_pct', 'rwm_high_pct', 'reading_progress',
|
||||
'writing_progress', 'maths_progress', 'reading_expected_pct',
|
||||
'writing_expected_pct', 'maths_expected_pct', 'reading_avg_score',
|
||||
'maths_avg_score', 'reading_high_pct', 'writing_high_pct', 'maths_high_pct',
|
||||
"rwm_expected_pct",
|
||||
"rwm_high_pct",
|
||||
"reading_progress",
|
||||
"writing_progress",
|
||||
"maths_progress",
|
||||
"reading_expected_pct",
|
||||
"writing_expected_pct",
|
||||
"maths_expected_pct",
|
||||
"reading_avg_score",
|
||||
"maths_avg_score",
|
||||
"reading_high_pct",
|
||||
"writing_high_pct",
|
||||
"maths_high_pct",
|
||||
# GPS & Science
|
||||
'gps_expected_pct', 'gps_high_pct', 'gps_avg_score', 'science_expected_pct',
|
||||
"gps_expected_pct",
|
||||
"gps_high_pct",
|
||||
"gps_avg_score",
|
||||
"science_expected_pct",
|
||||
# School context
|
||||
'total_pupils', 'eligible_pupils', 'disadvantaged_pct', 'eal_pct',
|
||||
'sen_support_pct', 'sen_ehcp_pct', 'stability_pct',
|
||||
"total_pupils",
|
||||
"eligible_pupils",
|
||||
"disadvantaged_pct",
|
||||
"eal_pct",
|
||||
"sen_support_pct",
|
||||
"sen_ehcp_pct",
|
||||
"stability_pct",
|
||||
# Gender breakdown
|
||||
'rwm_expected_boys_pct', 'rwm_expected_girls_pct',
|
||||
'rwm_high_boys_pct', 'rwm_high_girls_pct',
|
||||
"rwm_expected_boys_pct",
|
||||
"rwm_expected_girls_pct",
|
||||
"rwm_high_boys_pct",
|
||||
"rwm_high_girls_pct",
|
||||
# Disadvantaged performance
|
||||
'rwm_expected_disadvantaged_pct', 'rwm_expected_non_disadvantaged_pct', 'disadvantaged_gap',
|
||||
"rwm_expected_disadvantaged_pct",
|
||||
"rwm_expected_non_disadvantaged_pct",
|
||||
"disadvantaged_gap",
|
||||
# 3-year averages
|
||||
'rwm_expected_3yr_pct', 'reading_avg_3yr', 'maths_avg_3yr',
|
||||
"rwm_expected_3yr_pct",
|
||||
"reading_avg_3yr",
|
||||
"maths_avg_3yr",
|
||||
]
|
||||
|
||||
# School type code to name mapping
|
||||
# School type code to user-friendly name mapping
|
||||
SCHOOL_TYPE_MAP = {
|
||||
'AC': 'Academy',
|
||||
'ACC': 'Academy Converter',
|
||||
'ACS': 'Academy Sponsor Led',
|
||||
'CY': 'Community School',
|
||||
'VA': 'Voluntary Aided',
|
||||
'VC': 'Voluntary Controlled',
|
||||
'FD': 'Foundation',
|
||||
'F': 'Foundation',
|
||||
'FS': 'Free School',
|
||||
# Academies
|
||||
"AC": "Academy",
|
||||
"ACC": "Academy",
|
||||
"ACCS": "Academy",
|
||||
"ACS": "Academy (Sponsor Led)",
|
||||
# Community Schools
|
||||
"CY": "Community",
|
||||
"CYS": "Community",
|
||||
# Voluntary Schools
|
||||
"VA": "Voluntary Aided",
|
||||
"VC": "Voluntary Controlled",
|
||||
# Foundation Schools
|
||||
"FD": "Foundation",
|
||||
"F": "Foundation",
|
||||
"FDS": "Foundation",
|
||||
# Free Schools
|
||||
"FS": "Free School",
|
||||
}
|
||||
|
||||
# Special values to treat as null
|
||||
NULL_VALUES = ['SUPP', 'NE', 'NA', 'NP', 'NEW', 'LOW', '']
|
||||
NULL_VALUES = ["SUPP", "NE", "NA", "NP", "NEW", "LOW", ""]
|
||||
|
||||
# KS2 Metric definitions - single source of truth
|
||||
# Used by both backend API and frontend
|
||||
@@ -103,42 +134,42 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "RWM %",
|
||||
"description": "% meeting expected standard in reading, writing and maths",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
"reading_expected_pct": {
|
||||
"name": "Reading Expected %",
|
||||
"short_name": "Reading %",
|
||||
"description": "% meeting expected standard in reading",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
"writing_expected_pct": {
|
||||
"name": "Writing Expected %",
|
||||
"short_name": "Writing %",
|
||||
"description": "% meeting expected standard in writing",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
"maths_expected_pct": {
|
||||
"name": "Maths Expected %",
|
||||
"short_name": "Maths %",
|
||||
"description": "% meeting expected standard in maths",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
"gps_expected_pct": {
|
||||
"name": "GPS Expected %",
|
||||
"short_name": "GPS %",
|
||||
"description": "% meeting expected standard in grammar, punctuation & spelling",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
"science_expected_pct": {
|
||||
"name": "Science Expected %",
|
||||
"short_name": "Science %",
|
||||
"description": "% meeting expected standard in science",
|
||||
"type": "percentage",
|
||||
"category": "expected"
|
||||
"category": "expected",
|
||||
},
|
||||
# Higher Standard
|
||||
"rwm_high_pct": {
|
||||
@@ -146,35 +177,35 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "RWM Higher %",
|
||||
"description": "% achieving higher standard in RWM combined",
|
||||
"type": "percentage",
|
||||
"category": "higher"
|
||||
"category": "higher",
|
||||
},
|
||||
"reading_high_pct": {
|
||||
"name": "Reading Higher %",
|
||||
"short_name": "Reading Higher %",
|
||||
"description": "% achieving higher standard in reading",
|
||||
"type": "percentage",
|
||||
"category": "higher"
|
||||
"category": "higher",
|
||||
},
|
||||
"writing_high_pct": {
|
||||
"name": "Writing Higher %",
|
||||
"short_name": "Writing Higher %",
|
||||
"description": "% achieving greater depth in writing",
|
||||
"type": "percentage",
|
||||
"category": "higher"
|
||||
"category": "higher",
|
||||
},
|
||||
"maths_high_pct": {
|
||||
"name": "Maths Higher %",
|
||||
"short_name": "Maths Higher %",
|
||||
"description": "% achieving higher standard in maths",
|
||||
"type": "percentage",
|
||||
"category": "higher"
|
||||
"category": "higher",
|
||||
},
|
||||
"gps_high_pct": {
|
||||
"name": "GPS Higher %",
|
||||
"short_name": "GPS Higher %",
|
||||
"description": "% achieving higher standard in GPS",
|
||||
"type": "percentage",
|
||||
"category": "higher"
|
||||
"category": "higher",
|
||||
},
|
||||
# Progress Scores
|
||||
"reading_progress": {
|
||||
@@ -182,21 +213,21 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "Reading Progress",
|
||||
"description": "Progress in reading from KS1 to KS2",
|
||||
"type": "score",
|
||||
"category": "progress"
|
||||
"category": "progress",
|
||||
},
|
||||
"writing_progress": {
|
||||
"name": "Writing Progress",
|
||||
"short_name": "Writing Progress",
|
||||
"description": "Progress in writing from KS1 to KS2",
|
||||
"type": "score",
|
||||
"category": "progress"
|
||||
"category": "progress",
|
||||
},
|
||||
"maths_progress": {
|
||||
"name": "Maths Progress",
|
||||
"short_name": "Maths Progress",
|
||||
"description": "Progress in maths from KS1 to KS2",
|
||||
"type": "score",
|
||||
"category": "progress"
|
||||
"category": "progress",
|
||||
},
|
||||
# Average Scores
|
||||
"reading_avg_score": {
|
||||
@@ -204,21 +235,21 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "Reading Avg",
|
||||
"description": "Average scaled score in reading",
|
||||
"type": "score",
|
||||
"category": "average"
|
||||
"category": "average",
|
||||
},
|
||||
"maths_avg_score": {
|
||||
"name": "Maths Average Score",
|
||||
"short_name": "Maths Avg",
|
||||
"description": "Average scaled score in maths",
|
||||
"type": "score",
|
||||
"category": "average"
|
||||
"category": "average",
|
||||
},
|
||||
"gps_avg_score": {
|
||||
"name": "GPS Average Score",
|
||||
"short_name": "GPS Avg",
|
||||
"description": "Average scaled score in GPS",
|
||||
"type": "score",
|
||||
"category": "average"
|
||||
"category": "average",
|
||||
},
|
||||
# Gender Performance
|
||||
"rwm_expected_boys_pct": {
|
||||
@@ -226,28 +257,28 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "Boys RWM %",
|
||||
"description": "% of boys meeting expected standard",
|
||||
"type": "percentage",
|
||||
"category": "gender"
|
||||
"category": "gender",
|
||||
},
|
||||
"rwm_expected_girls_pct": {
|
||||
"name": "RWM Expected % (Girls)",
|
||||
"short_name": "Girls RWM %",
|
||||
"description": "% of girls meeting expected standard",
|
||||
"type": "percentage",
|
||||
"category": "gender"
|
||||
"category": "gender",
|
||||
},
|
||||
"rwm_high_boys_pct": {
|
||||
"name": "RWM Higher % (Boys)",
|
||||
"short_name": "Boys Higher %",
|
||||
"description": "% of boys at higher standard",
|
||||
"type": "percentage",
|
||||
"category": "gender"
|
||||
"category": "gender",
|
||||
},
|
||||
"rwm_high_girls_pct": {
|
||||
"name": "RWM Higher % (Girls)",
|
||||
"short_name": "Girls Higher %",
|
||||
"description": "% of girls at higher standard",
|
||||
"type": "percentage",
|
||||
"category": "gender"
|
||||
"category": "gender",
|
||||
},
|
||||
# Disadvantaged Performance
|
||||
"rwm_expected_disadvantaged_pct": {
|
||||
@@ -255,21 +286,21 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "Disadvantaged %",
|
||||
"description": "% of disadvantaged pupils meeting expected",
|
||||
"type": "percentage",
|
||||
"category": "equity"
|
||||
"category": "equity",
|
||||
},
|
||||
"rwm_expected_non_disadvantaged_pct": {
|
||||
"name": "RWM Expected % (Non-Disadvantaged)",
|
||||
"short_name": "Non-Disadv %",
|
||||
"description": "% of non-disadvantaged pupils meeting expected",
|
||||
"type": "percentage",
|
||||
"category": "equity"
|
||||
"category": "equity",
|
||||
},
|
||||
"disadvantaged_gap": {
|
||||
"name": "Disadvantaged Gap",
|
||||
"short_name": "Disadv Gap",
|
||||
"description": "Gap between disadvantaged and national non-disadvantaged",
|
||||
"type": "score",
|
||||
"category": "equity"
|
||||
"category": "equity",
|
||||
},
|
||||
# School Context
|
||||
"disadvantaged_pct": {
|
||||
@@ -277,28 +308,28 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "% Disadvantaged",
|
||||
"description": "% of pupils eligible for free school meals or looked after",
|
||||
"type": "percentage",
|
||||
"category": "context"
|
||||
"category": "context",
|
||||
},
|
||||
"eal_pct": {
|
||||
"name": "% EAL Pupils",
|
||||
"short_name": "% EAL",
|
||||
"description": "% of pupils with English as additional language",
|
||||
"type": "percentage",
|
||||
"category": "context"
|
||||
"category": "context",
|
||||
},
|
||||
"sen_support_pct": {
|
||||
"name": "% SEN Support",
|
||||
"short_name": "% SEN",
|
||||
"description": "% of pupils with SEN support",
|
||||
"type": "percentage",
|
||||
"category": "context"
|
||||
"category": "context",
|
||||
},
|
||||
"stability_pct": {
|
||||
"name": "% Pupil Stability",
|
||||
"short_name": "% Stable",
|
||||
"description": "% of non-mobile pupils (stayed at school)",
|
||||
"type": "percentage",
|
||||
"category": "context"
|
||||
"category": "context",
|
||||
},
|
||||
# 3-Year Averages
|
||||
"rwm_expected_3yr_pct": {
|
||||
@@ -306,122 +337,261 @@ METRIC_DEFINITIONS = {
|
||||
"short_name": "RWM 3yr %",
|
||||
"description": "3-year average % meeting expected",
|
||||
"type": "percentage",
|
||||
"category": "trends"
|
||||
"category": "trends",
|
||||
},
|
||||
"reading_avg_3yr": {
|
||||
"name": "Reading Score (3-Year Avg)",
|
||||
"short_name": "Reading 3yr",
|
||||
"description": "3-year average reading score",
|
||||
"type": "score",
|
||||
"category": "trends"
|
||||
"category": "trends",
|
||||
},
|
||||
"maths_avg_3yr": {
|
||||
"name": "Maths Score (3-Year Avg)",
|
||||
"short_name": "Maths 3yr",
|
||||
"description": "3-year average maths score",
|
||||
"type": "score",
|
||||
"category": "trends"
|
||||
"category": "trends",
|
||||
},
|
||||
}
|
||||
|
||||
# Ranking columns to include in rankings response
|
||||
RANKING_COLUMNS = [
|
||||
"urn", "school_name", "local_authority", "school_type", "address", "year", "total_pupils",
|
||||
"urn",
|
||||
"school_name",
|
||||
"local_authority",
|
||||
"school_type",
|
||||
"address",
|
||||
"year",
|
||||
"total_pupils",
|
||||
# Core expected
|
||||
"rwm_expected_pct", "reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
|
||||
"gps_expected_pct", "science_expected_pct",
|
||||
"rwm_expected_pct",
|
||||
"reading_expected_pct",
|
||||
"writing_expected_pct",
|
||||
"maths_expected_pct",
|
||||
"gps_expected_pct",
|
||||
"science_expected_pct",
|
||||
# Core higher
|
||||
"rwm_high_pct", "reading_high_pct", "writing_high_pct", "maths_high_pct", "gps_high_pct",
|
||||
"rwm_high_pct",
|
||||
"reading_high_pct",
|
||||
"writing_high_pct",
|
||||
"maths_high_pct",
|
||||
"gps_high_pct",
|
||||
# Progress & averages
|
||||
"reading_progress", "writing_progress", "maths_progress",
|
||||
"reading_avg_score", "maths_avg_score", "gps_avg_score",
|
||||
"reading_progress",
|
||||
"writing_progress",
|
||||
"maths_progress",
|
||||
"reading_avg_score",
|
||||
"maths_avg_score",
|
||||
"gps_avg_score",
|
||||
# Gender
|
||||
"rwm_expected_boys_pct", "rwm_expected_girls_pct", "rwm_high_boys_pct", "rwm_high_girls_pct",
|
||||
"rwm_expected_boys_pct",
|
||||
"rwm_expected_girls_pct",
|
||||
"rwm_high_boys_pct",
|
||||
"rwm_high_girls_pct",
|
||||
# Equity
|
||||
"rwm_expected_disadvantaged_pct", "rwm_expected_non_disadvantaged_pct", "disadvantaged_gap",
|
||||
"rwm_expected_disadvantaged_pct",
|
||||
"rwm_expected_non_disadvantaged_pct",
|
||||
"disadvantaged_gap",
|
||||
# Context
|
||||
"disadvantaged_pct", "eal_pct", "sen_support_pct", "stability_pct",
|
||||
"disadvantaged_pct",
|
||||
"eal_pct",
|
||||
"sen_support_pct",
|
||||
"stability_pct",
|
||||
# 3-year
|
||||
"rwm_expected_3yr_pct", "reading_avg_3yr", "maths_avg_3yr",
|
||||
"rwm_expected_3yr_pct",
|
||||
"reading_avg_3yr",
|
||||
"maths_avg_3yr",
|
||||
]
|
||||
|
||||
# School listing columns
|
||||
SCHOOL_COLUMNS = ["urn", "school_name", "local_authority", "school_type", "address", "town", "postcode"]
|
||||
SCHOOL_COLUMNS = [
|
||||
"urn",
|
||||
"school_name",
|
||||
"local_authority",
|
||||
"school_type",
|
||||
"address",
|
||||
"town",
|
||||
"postcode",
|
||||
"religious_denomination",
|
||||
"age_range",
|
||||
"latitude",
|
||||
"longitude",
|
||||
]
|
||||
|
||||
# Local Authority code to name mapping (for fallback when LANAME column missing)
|
||||
# Source: https://www.gov.uk/government/publications/local-authority-codes
|
||||
LA_CODE_TO_NAME = {
|
||||
# Inner London
|
||||
201: "City of London", 202: "Camden", 203: "Greenwich", 204: "Hackney",
|
||||
205: "Hammersmith and Fulham", 206: "Islington", 207: "Kensington and Chelsea",
|
||||
208: "Lambeth", 209: "Lewisham", 210: "Southwark", 211: "Tower Hamlets",
|
||||
212: "Wandsworth", 213: "Westminster",
|
||||
201: "City of London",
|
||||
202: "Camden",
|
||||
203: "Greenwich",
|
||||
204: "Hackney",
|
||||
205: "Hammersmith and Fulham",
|
||||
206: "Islington",
|
||||
207: "Kensington and Chelsea",
|
||||
208: "Lambeth",
|
||||
209: "Lewisham",
|
||||
210: "Southwark",
|
||||
211: "Tower Hamlets",
|
||||
212: "Wandsworth",
|
||||
213: "Westminster",
|
||||
# Outer London
|
||||
301: "Barking and Dagenham", 302: "Barnet", 303: "Bexley", 304: "Brent",
|
||||
305: "Bromley", 306: "Croydon", 307: "Ealing", 308: "Enfield", 309: "Haringey",
|
||||
310: "Harrow", 311: "Havering", 312: "Hillingdon", 313: "Hounslow",
|
||||
314: "Kingston upon Thames", 315: "Merton", 316: "Newham", 317: "Redbridge",
|
||||
318: "Richmond upon Thames", 319: "Sutton", 320: "Waltham Forest",
|
||||
301: "Barking and Dagenham",
|
||||
302: "Barnet",
|
||||
303: "Bexley",
|
||||
304: "Brent",
|
||||
305: "Bromley",
|
||||
306: "Croydon",
|
||||
307: "Ealing",
|
||||
308: "Enfield",
|
||||
309: "Haringey",
|
||||
310: "Harrow",
|
||||
311: "Havering",
|
||||
312: "Hillingdon",
|
||||
313: "Hounslow",
|
||||
314: "Kingston upon Thames",
|
||||
315: "Merton",
|
||||
316: "Newham",
|
||||
317: "Redbridge",
|
||||
318: "Richmond upon Thames",
|
||||
319: "Sutton",
|
||||
320: "Waltham Forest",
|
||||
# West Midlands
|
||||
330: "Birmingham", 331: "Coventry", 332: "Dudley", 333: "Sandwell",
|
||||
334: "Solihull", 335: "Walsall", 336: "Wolverhampton",
|
||||
330: "Birmingham",
|
||||
331: "Coventry",
|
||||
332: "Dudley",
|
||||
333: "Sandwell",
|
||||
334: "Solihull",
|
||||
335: "Walsall",
|
||||
336: "Wolverhampton",
|
||||
# Merseyside
|
||||
340: "Knowsley", 341: "Liverpool", 342: "St. Helens", 343: "Sefton", 344: "Wirral",
|
||||
340: "Knowsley",
|
||||
341: "Liverpool",
|
||||
342: "St. Helens",
|
||||
343: "Sefton",
|
||||
344: "Wirral",
|
||||
# Greater Manchester
|
||||
350: "Bolton", 351: "Bury", 352: "Manchester", 353: "Oldham", 354: "Rochdale",
|
||||
355: "Salford", 356: "Stockport", 357: "Tameside", 358: "Trafford", 359: "Wigan",
|
||||
350: "Bolton",
|
||||
351: "Bury",
|
||||
352: "Manchester",
|
||||
353: "Oldham",
|
||||
354: "Rochdale",
|
||||
355: "Salford",
|
||||
356: "Stockport",
|
||||
357: "Tameside",
|
||||
358: "Trafford",
|
||||
359: "Wigan",
|
||||
# South Yorkshire
|
||||
370: "Barnsley", 371: "Doncaster", 372: "Rotherham", 373: "Sheffield",
|
||||
370: "Barnsley",
|
||||
371: "Doncaster",
|
||||
372: "Rotherham",
|
||||
373: "Sheffield",
|
||||
# West Yorkshire
|
||||
380: "Bradford", 381: "Calderdale", 382: "Kirklees", 383: "Leeds", 384: "Wakefield",
|
||||
380: "Bradford",
|
||||
381: "Calderdale",
|
||||
382: "Kirklees",
|
||||
383: "Leeds",
|
||||
384: "Wakefield",
|
||||
# Tyne and Wear
|
||||
390: "Gateshead", 391: "Newcastle upon Tyne", 392: "North Tyneside",
|
||||
393: "South Tyneside", 394: "Sunderland",
|
||||
390: "Gateshead",
|
||||
391: "Newcastle upon Tyne",
|
||||
392: "North Tyneside",
|
||||
393: "South Tyneside",
|
||||
394: "Sunderland",
|
||||
# Isles of Scilly
|
||||
420: "Isles of Scilly",
|
||||
# Unitary authorities (800+)
|
||||
800: "Bath and North East Somerset", 801: "Bristol, City of", 802: "North Somerset",
|
||||
803: "South Gloucestershire", 805: "Hartlepool", 806: "Middlesbrough",
|
||||
807: "Redcar and Cleveland", 808: "Stockton-on-Tees",
|
||||
810: "Kingston Upon Hull, City of", 811: "East Riding of Yorkshire",
|
||||
812: "North East Lincolnshire", 813: "North Lincolnshire",
|
||||
815: "North Yorkshire", 816: "York",
|
||||
820: "Bedford", 821: "Central Bedfordshire", 822: "Luton",
|
||||
823: "West Northamptonshire", 824: "North Northamptonshire",
|
||||
825: "Buckinghamshire", 826: "Milton Keynes",
|
||||
830: "Derbyshire", 831: "Derby",
|
||||
835: "Dorset", 836: "Bournemouth, Christchurch and Poole",
|
||||
837: "Poole", 838: "Bournemouth", # Historic codes (merged into 836)
|
||||
839: "Durham", 840: "Darlington",
|
||||
841: "East Sussex", 845: "Brighton and Hove",
|
||||
846: "Hampshire", 850: "Portsmouth", 851: "Southampton", 852: "Isle of Wight",
|
||||
855: "Leicestershire", 856: "Leicester", 857: "Rutland",
|
||||
860: "Staffordshire", 861: "Stoke-on-Trent",
|
||||
865: "Wiltshire", 866: "Swindon",
|
||||
867: "Bracknell Forest", 868: "Windsor and Maidenhead", 869: "West Berkshire",
|
||||
870: "Reading", 871: "Slough", 872: "Wokingham",
|
||||
873: "Cambridgeshire", 874: "Peterborough",
|
||||
876: "Halton", 877: "Warrington",
|
||||
878: "Devon", 879: "Plymouth", 880: "Torbay",
|
||||
881: "Essex", 882: "Southend-on-Sea", 883: "Thurrock",
|
||||
884: "Herefordshire", 885: "Worcestershire",
|
||||
886: "Kent", 887: "Medway",
|
||||
888: "Lancashire", 889: "Blackburn with Darwen", 890: "Blackpool",
|
||||
891: "Nottinghamshire", 892: "Nottingham",
|
||||
893: "Shropshire", 894: "Telford and Wrekin",
|
||||
895: "Cheshire East", 896: "Cheshire West and Chester",
|
||||
800: "Bath and North East Somerset",
|
||||
801: "Bristol, City of",
|
||||
802: "North Somerset",
|
||||
803: "South Gloucestershire",
|
||||
805: "Hartlepool",
|
||||
806: "Middlesbrough",
|
||||
807: "Redcar and Cleveland",
|
||||
808: "Stockton-on-Tees",
|
||||
810: "Kingston Upon Hull, City of",
|
||||
811: "East Riding of Yorkshire",
|
||||
812: "North East Lincolnshire",
|
||||
813: "North Lincolnshire",
|
||||
815: "North Yorkshire",
|
||||
816: "York",
|
||||
820: "Bedford",
|
||||
821: "Central Bedfordshire",
|
||||
822: "Luton",
|
||||
823: "West Northamptonshire",
|
||||
824: "North Northamptonshire",
|
||||
825: "Buckinghamshire",
|
||||
826: "Milton Keynes",
|
||||
830: "Derbyshire",
|
||||
831: "Derby",
|
||||
835: "Dorset",
|
||||
836: "Bournemouth, Christchurch and Poole",
|
||||
837: "Poole",
|
||||
838: "Bournemouth", # Historic codes (merged into 836)
|
||||
839: "Durham",
|
||||
840: "Darlington",
|
||||
841: "East Sussex",
|
||||
845: "Brighton and Hove",
|
||||
846: "Hampshire",
|
||||
850: "Portsmouth",
|
||||
851: "Southampton",
|
||||
852: "Isle of Wight",
|
||||
855: "Leicestershire",
|
||||
856: "Leicester",
|
||||
857: "Rutland",
|
||||
860: "Staffordshire",
|
||||
861: "Stoke-on-Trent",
|
||||
865: "Wiltshire",
|
||||
866: "Swindon",
|
||||
867: "Bracknell Forest",
|
||||
868: "Windsor and Maidenhead",
|
||||
869: "West Berkshire",
|
||||
870: "Reading",
|
||||
871: "Slough",
|
||||
872: "Wokingham",
|
||||
873: "Cambridgeshire",
|
||||
874: "Peterborough",
|
||||
876: "Halton",
|
||||
877: "Warrington",
|
||||
878: "Devon",
|
||||
879: "Plymouth",
|
||||
880: "Torbay",
|
||||
881: "Essex",
|
||||
882: "Southend-on-Sea",
|
||||
883: "Thurrock",
|
||||
884: "Herefordshire",
|
||||
885: "Worcestershire",
|
||||
886: "Kent",
|
||||
887: "Medway",
|
||||
888: "Lancashire",
|
||||
889: "Blackburn with Darwen",
|
||||
890: "Blackpool",
|
||||
891: "Nottinghamshire",
|
||||
892: "Nottingham",
|
||||
893: "Shropshire",
|
||||
894: "Telford and Wrekin",
|
||||
895: "Cheshire East",
|
||||
896: "Cheshire West and Chester",
|
||||
# County councils (900+)
|
||||
908: "Cornwall", 909: "Cumbria",
|
||||
916: "Gloucestershire", 919: "Hertfordshire",
|
||||
921: "Norfolk", 925: "Lincolnshire",
|
||||
908: "Cornwall",
|
||||
909: "Cumbria",
|
||||
916: "Gloucestershire",
|
||||
919: "Hertfordshire",
|
||||
921: "Norfolk",
|
||||
925: "Lincolnshire",
|
||||
926: "Northamptonshire", # Historic (split into 823/824 in 2021)
|
||||
928: "Northumberland", 929: "Oxfordshire",
|
||||
931: "Somerset", 933: "Suffolk", 935: "Surrey",
|
||||
936: "Warwickshire", 937: "West Sussex",
|
||||
928: "Northumberland",
|
||||
929: "Oxfordshire",
|
||||
931: "Somerset",
|
||||
933: "Suffolk",
|
||||
935: "Surrey",
|
||||
936: "Warwickshire",
|
||||
937: "West Sussex",
|
||||
# New authorities (2023 reorganization)
|
||||
938: "Westmorland and Furness", 940: "Cumberland",
|
||||
938: "Westmorland and Furness",
|
||||
940: "Cumberland",
|
||||
941: "North Yorkshire", # New unitary
|
||||
942: "Somerset", # New unitary (replaced 931)
|
||||
943: "Buckinghamshire", # New unitary (2020, replacing 825 in some datasets)
|
||||
}
|
||||
|
||||
|
||||
114
claude.md
Normal file
114
claude.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# SchoolCompare.co.uk - Project Context
|
||||
|
||||
## Overview
|
||||
|
||||
SchoolCompare is a web application for comparing UK primary school (KS2) performance data. It allows users to:
|
||||
- Search and browse schools by name, location (postcode), or local authority
|
||||
- Compare multiple schools side-by-side with charts and tables
|
||||
- View school rankings by various KS2 metrics
|
||||
- See historical performance trends across years
|
||||
|
||||
## Architecture
|
||||
|
||||
### Backend (Python/FastAPI)
|
||||
- **Framework**: FastAPI with uvicorn
|
||||
- **Database**: PostgreSQL with SQLAlchemy ORM
|
||||
- **Data Source**: UK Government "Compare School Performance" CSV downloads
|
||||
|
||||
Key files:
|
||||
- `backend/app.py` - Main FastAPI application, API routes
|
||||
- `backend/config.py` - Configuration via pydantic-settings (env vars, .env file)
|
||||
- `backend/database.py` - SQLAlchemy engine, session management
|
||||
- `backend/models.py` - Database models (School, SchoolResult)
|
||||
- `backend/data_loader.py` - Data queries, geocoding, legacy DataFrame compatibility
|
||||
- `backend/schemas.py` - Column mappings, metric definitions, LA code mappings
|
||||
|
||||
### Frontend (Vanilla JS)
|
||||
- Single-page application with hash-based routing
|
||||
- Chart.js for data visualization
|
||||
- No build step required
|
||||
|
||||
Key files:
|
||||
- `frontend/index.html` - Main HTML structure
|
||||
- `frontend/app.js` - All application logic, API calls, rendering
|
||||
- `frontend/styles.css` - Styling (CSS variables, responsive design)
|
||||
|
||||
### Database Schema
|
||||
|
||||
```
|
||||
schools school_results
|
||||
├── id (PK) ├── id (PK)
|
||||
├── urn (unique, indexed) ├── school_id (FK → schools.id)
|
||||
├── school_name ├── year (indexed)
|
||||
├── local_authority ├── rwm_expected_pct
|
||||
├── school_type ├── reading_expected_pct
|
||||
├── postcode ├── ... (all KS2 metrics)
|
||||
├── latitude, longitude └── unique(school_id, year)
|
||||
└── results → SchoolResult[]
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Environment variables (or `.env` file):
|
||||
- `DATABASE_URL` - PostgreSQL connection string (default: `postgresql://schoolcompare:schoolcompare@localhost:5432/schoolcompare`)
|
||||
- `HOST`, `PORT` - Server binding (default: `0.0.0.0:80`)
|
||||
- `ALLOWED_ORIGINS` - CORS origins
|
||||
|
||||
## Running Locally
|
||||
|
||||
1. Start PostgreSQL:
|
||||
```bash
|
||||
docker compose up -d db
|
||||
```
|
||||
|
||||
2. Run migration to import CSV data:
|
||||
```bash
|
||||
python scripts/migrate_csv_to_db.py --drop
|
||||
# Add --geocode to geocode postcodes (slower, adds lat/long)
|
||||
```
|
||||
|
||||
3. Start the app:
|
||||
```bash
|
||||
uvicorn backend.app:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
## Docker Deployment
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
This starts:
|
||||
- `db` - PostgreSQL 16 with persistent volume
|
||||
- `app` - FastAPI application on port 80
|
||||
|
||||
## Data
|
||||
|
||||
- Source: UK Government Compare School Performance downloads
|
||||
- Location: `data/` directory with year folders (e.g., `2023-2024/england_ks2final.csv`)
|
||||
- The `scripts/download_data.py` can fetch data from the government website
|
||||
|
||||
## Key Features
|
||||
|
||||
- **Location Search**: Enter postcode to find nearby schools (uses postcodes.io API)
|
||||
- **Multi-school Comparison**: Select multiple schools, view metrics across years
|
||||
- **Rankings**: Top schools by any KS2 metric, filterable by local authority
|
||||
- **Variability Analysis**: Shows standard deviation of scores across years
|
||||
|
||||
## API Endpoints
|
||||
|
||||
- `GET /api/schools` - List/search schools (supports pagination, location search)
|
||||
- `GET /api/schools/{urn}` - School details with all yearly data
|
||||
- `GET /api/compare?urns=123,456` - Compare multiple schools
|
||||
- `GET /api/rankings` - School rankings by metric
|
||||
- `GET /api/filters` - Available filter options (LAs, types, years)
|
||||
- `GET /api/metrics` - Metric definitions (single source of truth)
|
||||
- `GET /api/data-info` - Database stats
|
||||
|
||||
## Recent Changes
|
||||
|
||||
- Migrated from CSV file storage to PostgreSQL database
|
||||
- Added location-based search using postcode geocoding
|
||||
- Added local authority filter to rankings
|
||||
- Improved frontend with featured schools, loading states, API caching
|
||||
|
||||
2710
frontend/app.js
2710
frontend/app.js
File diff suppressed because it is too large
Load Diff
6
frontend/favicon.svg
Normal file
6
frontend/favicon.svg
Normal file
@@ -0,0 +1,6 @@
|
||||
<svg viewBox="0 0 40 40" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="40" height="40" rx="8" fill="#1a1612"/>
|
||||
<circle cx="20" cy="20" r="14" stroke="#e07256" stroke-width="2"/>
|
||||
<path d="M20 8L20 32M12 14L28 14M10 20L30 20M12 26L28 26" stroke="#e07256" stroke-width="1.5" stroke-linecap="round"/>
|
||||
<circle cx="20" cy="20" r="3" fill="#e07256"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 374 B |
@@ -4,18 +4,72 @@
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>SchoolCompare | Compare Primary School Performance</title>
|
||||
|
||||
<!-- Primary Meta Tags -->
|
||||
<meta name="description" content="Compare primary school KS2 performance across England. Search, filter and compare Reading, Writing and Maths results for thousands of schools.">
|
||||
<meta name="keywords" content="school comparison, KS2 results, primary school performance, England schools, SATs results">
|
||||
<meta name="author" content="SchoolCompare">
|
||||
<meta name="robots" content="index, follow">
|
||||
|
||||
<!-- Favicon -->
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg">
|
||||
|
||||
<!-- Canonical -->
|
||||
<link rel="canonical" href="https://schoolcompare.co.uk/">
|
||||
|
||||
<!-- Open Graph / Facebook -->
|
||||
<meta property="og:type" content="website">
|
||||
<meta property="og:url" content="https://schoolcompare.co.uk/">
|
||||
<meta property="og:title" content="SchoolCompare | Compare Primary School Performance">
|
||||
<meta property="og:description" content="Compare primary school KS2 performance across England. Search and compare Reading, Writing and Maths results.">
|
||||
<meta property="og:site_name" content="SchoolCompare">
|
||||
|
||||
<!-- Twitter -->
|
||||
<meta name="twitter:card" content="summary">
|
||||
<meta name="twitter:url" content="https://schoolcompare.co.uk/">
|
||||
<meta name="twitter:title" content="SchoolCompare | Compare Primary School Performance">
|
||||
<meta name="twitter:description" content="Compare primary school KS2 performance across England.">
|
||||
|
||||
<!-- JSON-LD Structured Data -->
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "WebApplication",
|
||||
"name": "SchoolCompare",
|
||||
"url": "https://schoolcompare.co.uk",
|
||||
"description": "Compare primary school KS2 performance across England",
|
||||
"applicationCategory": "EducationalApplication",
|
||||
"operatingSystem": "Web",
|
||||
"offers": {
|
||||
"@type": "Offer",
|
||||
"price": "0",
|
||||
"priceCurrency": "GBP"
|
||||
},
|
||||
"author": {
|
||||
"@type": "Organization",
|
||||
"name": "SchoolCompare",
|
||||
"url": "https://schoolcompare.co.uk"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700&family=Playfair+Display:wght@600;700&display=swap" rel="stylesheet">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<!-- Leaflet Map Library -->
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css" integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY=" crossorigin="">
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js" integrity="sha256-20nQCchB9co0qIjJZRGuk2/Z9VM+kNiyxNV1lvTlZBo=" crossorigin=""></script>
|
||||
<link rel="stylesheet" href="/static/styles.css">
|
||||
<!-- Cookie Consent Banner -->
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/silktide/consent-manager@main/silktide-consent-manager.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="noise-overlay"></div>
|
||||
|
||||
<header class="header">
|
||||
<div class="header-content">
|
||||
<div class="logo">
|
||||
<a href="/" class="logo">
|
||||
<div class="logo-icon">
|
||||
<svg viewBox="0 0 40 40" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<circle cx="20" cy="20" r="18" stroke="currentColor" stroke-width="2"/>
|
||||
@@ -27,9 +81,9 @@
|
||||
<span class="logo-title">SchoolCompare</span>
|
||||
<span class="logo-subtitle">schoolcompare.co.uk</span>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
<nav class="nav">
|
||||
<a href="/" class="nav-link active" data-view="dashboard">Dashboard</a>
|
||||
<a href="/" class="nav-link active" data-view="home">Home</a>
|
||||
<a href="/compare" class="nav-link" data-view="compare">Compare</a>
|
||||
<a href="/rankings" class="nav-link" data-view="rankings">Rankings</a>
|
||||
</nav>
|
||||
@@ -37,44 +91,66 @@
|
||||
</header>
|
||||
|
||||
<main class="main">
|
||||
<!-- Dashboard View -->
|
||||
<section id="dashboard-view" class="view active">
|
||||
<!-- Home View -->
|
||||
<section id="home-view" class="view active">
|
||||
<div class="hero">
|
||||
<h1 class="hero-title">Compare Primary School Performance</h1>
|
||||
<p class="hero-subtitle">Search and compare KS2 results across England's primary schools</p>
|
||||
</div>
|
||||
|
||||
<div class="search-section">
|
||||
<div class="search-container">
|
||||
<input type="text" id="school-search" class="search-input" placeholder="Search primary schools by name...">
|
||||
<div class="search-icon">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<div class="search-mode-toggle">
|
||||
<button class="search-mode-btn active" data-mode="name">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16">
|
||||
<circle cx="11" cy="11" r="8"/>
|
||||
<path d="M21 21l-4.35-4.35"/>
|
||||
</svg>
|
||||
Find by Name
|
||||
</button>
|
||||
<button class="search-mode-btn" data-mode="location">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" width="16" height="16">
|
||||
<path d="M21 10c0 7-9 13-9 13s-9-6-9-13a9 9 0 0 1 18 0z"/>
|
||||
<circle cx="12" cy="10" r="3"/>
|
||||
</svg>
|
||||
Find by Location
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div id="name-search-panel" class="search-panel active">
|
||||
<div class="search-container">
|
||||
<input type="text" id="school-search" class="search-input" placeholder="Search primary schools by name...">
|
||||
<div class="search-icon">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<circle cx="11" cy="11" r="8"/>
|
||||
<path d="M21 21l-4.35-4.35"/>
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
<div class="filter-row">
|
||||
<select id="local-authority-filter" class="filter-select">
|
||||
<option value="">All Areas</option>
|
||||
</select>
|
||||
<select id="type-filter" class="filter-select">
|
||||
<option value="">All School Types</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="location-search">
|
||||
|
||||
<div id="location-search-panel" class="search-panel">
|
||||
<div class="location-input-group">
|
||||
<input type="text" id="postcode-search" class="search-input postcode-input" placeholder="Enter postcode (e.g. SW18 4TF)">
|
||||
<input type="text" id="postcode-search" class="search-input postcode-input" placeholder="Enter postcode...">
|
||||
<select id="radius-select" class="filter-select radius-select">
|
||||
<option value="0.5" selected>1/2 mile</option>
|
||||
<option value="1">1 mile</option>
|
||||
<option value="2">2 miles</option>
|
||||
<option value="5" selected>5 miles</option>
|
||||
<option value="10">10 miles</option>
|
||||
<option value="20">20 miles</option>
|
||||
</select>
|
||||
<button id="location-search-btn" class="btn btn-primary location-btn">Find Nearby</button>
|
||||
<button id="clear-location-btn" class="btn location-clear-btn" style="display: none;">Clear</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="filter-row">
|
||||
<select id="local-authority-filter" class="filter-select">
|
||||
<option value="">All Areas</option>
|
||||
</select>
|
||||
<select id="type-filter" class="filter-select">
|
||||
<option value="">All School Types</option>
|
||||
</select>
|
||||
<div class="filter-row">
|
||||
<select id="type-filter-location" class="filter-select">
|
||||
<option value="">All School Types</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -90,6 +166,11 @@
|
||||
<p class="section-subtitle">Select schools to compare their KS2 performance over time</p>
|
||||
</div>
|
||||
|
||||
<div class="compare-search-section">
|
||||
<input type="text" id="compare-search" class="search-input" placeholder="Add a school to compare...">
|
||||
<div id="compare-results" class="compare-results"></div>
|
||||
</div>
|
||||
|
||||
<div class="selected-schools" id="selected-schools">
|
||||
<div class="empty-selection">
|
||||
<div class="empty-icon">
|
||||
@@ -104,11 +185,6 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="compare-search-section">
|
||||
<input type="text" id="compare-search" class="search-input" placeholder="Add a school to compare...">
|
||||
<div id="compare-results" class="compare-results"></div>
|
||||
</div>
|
||||
|
||||
<div class="charts-section" id="charts-section" style="display: none;">
|
||||
<div class="metric-selector">
|
||||
<label>Select KS2 Metric:</label>
|
||||
@@ -251,27 +327,134 @@
|
||||
</svg>
|
||||
</button>
|
||||
<div class="modal-header">
|
||||
<button class="btn btn-primary modal-compare-btn" id="add-to-compare">Add to Compare</button>
|
||||
<h2 id="modal-school-name"></h2>
|
||||
<div class="modal-meta" id="modal-meta"></div>
|
||||
<div class="modal-details" id="modal-details"></div>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="modal-chart-container">
|
||||
<canvas id="school-detail-chart"></canvas>
|
||||
</div>
|
||||
<div class="modal-stats" id="modal-stats"></div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-primary" id="add-to-compare">Add to Compare</button>
|
||||
<div class="modal-map-container" id="modal-map-container">
|
||||
<h4>Location</h4>
|
||||
<div class="modal-map" id="modal-map"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<footer class="footer">
|
||||
<p>Data source: <a href="https://www.compare-school-performance.service.gov.uk/" target="_blank">UK Government - Compare School Performance</a></p>
|
||||
<p class="footer-note">Primary school (KS2) data for England. Data from 2019-2020, 2020-2021, 2021-2022 unavailable due to COVID-19 disruption.</p>
|
||||
<div class="footer-content">
|
||||
<div class="footer-contact">
|
||||
<h3>Contact Us</h3>
|
||||
<p>Have questions, feedback, or suggestions? We'd love to hear from you.</p>
|
||||
<form action="https://formsubmit.co/contact@schoolcompare.co.uk" method="POST" class="contact-form">
|
||||
<input type="hidden" name="_subject" value="SchoolCompare Contact Form">
|
||||
<input type="hidden" name="_captcha" value="false">
|
||||
<input type="text" name="_honey" style="display:none">
|
||||
<div class="form-row">
|
||||
<input type="text" name="name" placeholder="Your Name" required class="form-input">
|
||||
<input type="email" name="email" placeholder="Your Email" required class="form-input">
|
||||
</div>
|
||||
<textarea name="message" placeholder="Your Message" required class="form-input form-textarea"></textarea>
|
||||
<button type="submit" class="btn btn-primary">Send Message</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="footer-source">
|
||||
<p>Data source: <a href="https://www.compare-school-performance.service.gov.uk/" target="_blank">UK Government - Compare School Performance</a></p>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
<script src="/static/app.js"></script>
|
||||
|
||||
<!-- Google Analytics (loaded conditionally after consent) -->
|
||||
<script>
|
||||
var GA_MEASUREMENT_ID = null;
|
||||
var analyticsConsentGiven = false;
|
||||
|
||||
function loadGoogleAnalytics() {
|
||||
if (window.gaLoaded || !GA_MEASUREMENT_ID) return;
|
||||
window.gaLoaded = true;
|
||||
|
||||
// Load gtag.js script
|
||||
var script = document.createElement('script');
|
||||
script.async = true;
|
||||
script.src = 'https://www.googletagmanager.com/gtag/js?id=' + GA_MEASUREMENT_ID;
|
||||
document.head.appendChild(script);
|
||||
|
||||
// Initialize dataLayer and gtag function
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
window.gtag = gtag;
|
||||
gtag('js', new Date());
|
||||
gtag('config', GA_MEASUREMENT_ID);
|
||||
}
|
||||
|
||||
// Fetch GA ID from server config, then load GA if consent already given
|
||||
fetch('/api/config')
|
||||
.then(function(response) { return response.json(); })
|
||||
.then(function(config) {
|
||||
if (config.ga_measurement_id) {
|
||||
GA_MEASUREMENT_ID = config.ga_measurement_id;
|
||||
// If consent was already given before config loaded, load GA now
|
||||
if (analyticsConsentGiven) {
|
||||
loadGoogleAnalytics();
|
||||
}
|
||||
}
|
||||
})
|
||||
.catch(function(err) { console.warn('Failed to load config:', err); });
|
||||
</script>
|
||||
|
||||
<!-- Cookie Consent Banner -->
|
||||
<script src="https://cdn.jsdelivr.net/gh/silktide/consent-manager@main/silktide-consent-manager.js"></script>
|
||||
<script>
|
||||
window.silktideConsentManager.init({
|
||||
consentTypes: [
|
||||
{
|
||||
id: 'necessary',
|
||||
label: 'Necessary',
|
||||
description: 'Essential cookies required for the website to function properly.',
|
||||
required: true,
|
||||
defaultValue: true
|
||||
},
|
||||
{
|
||||
id: 'analytics',
|
||||
label: 'Analytics',
|
||||
description: 'Help us understand how visitors use our site so we can improve it.',
|
||||
required: false,
|
||||
defaultValue: false
|
||||
}
|
||||
],
|
||||
text: {
|
||||
title: 'Cookie Preferences',
|
||||
description: 'We use cookies to improve your experience. Analytics cookies help us understand how you use the site.',
|
||||
acceptAll: 'Accept All',
|
||||
rejectAll: 'Reject All',
|
||||
save: 'Save Preferences'
|
||||
},
|
||||
onConsentChange: function(consent) {
|
||||
if (consent.analytics) {
|
||||
analyticsConsentGiven = true;
|
||||
loadGoogleAnalytics();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Check existing consent state after initialization
|
||||
(function() {
|
||||
var manager = window.silktideConsentManager.getInstance();
|
||||
if (manager) {
|
||||
var analyticsConsent = manager.getConsentChoice('analytics');
|
||||
if (analyticsConsent === true) {
|
||||
analyticsConsentGiven = true;
|
||||
loadGoogleAnalytics();
|
||||
}
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
8
frontend/robots.txt
Normal file
8
frontend/robots.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
User-agent: *
|
||||
Allow: /
|
||||
Allow: /compare
|
||||
Allow: /rankings
|
||||
|
||||
Disallow: /api/
|
||||
|
||||
Sitemap: https://schoolcompare.co.uk/sitemap.xml
|
||||
18
frontend/sitemap.xml
Normal file
18
frontend/sitemap.xml
Normal file
@@ -0,0 +1,18 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://schoolcompare.co.uk/</loc>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>1.0</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://schoolcompare.co.uk/compare</loc>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://schoolcompare.co.uk/rankings</loc>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
</urlset>
|
||||
@@ -96,6 +96,8 @@ body {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.logo-icon {
|
||||
@@ -198,6 +200,62 @@ body {
|
||||
margin: 2rem auto 3rem;
|
||||
}
|
||||
|
||||
/* Search Mode Toggle */
|
||||
.search-mode-toggle {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 0;
|
||||
margin-bottom: 1.5rem;
|
||||
background: var(--bg-card);
|
||||
border: 2px solid var(--border-color);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 4px;
|
||||
max-width: 400px;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
.search-mode-btn {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.75rem 1.25rem;
|
||||
font-size: 0.95rem;
|
||||
font-family: inherit;
|
||||
font-weight: 500;
|
||||
border: none;
|
||||
border-radius: var(--radius-md);
|
||||
background: transparent;
|
||||
color: var(--text-muted);
|
||||
cursor: pointer;
|
||||
transition: var(--transition);
|
||||
}
|
||||
|
||||
.search-mode-btn:hover {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.search-mode-btn.active {
|
||||
background: var(--accent-coral);
|
||||
color: white;
|
||||
box-shadow: 0 2px 8px rgba(224, 114, 86, 0.3);
|
||||
}
|
||||
|
||||
.search-mode-btn svg {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Search Panels */
|
||||
.search-panel {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.search-panel.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.search-container {
|
||||
position: relative;
|
||||
margin-bottom: 1rem;
|
||||
@@ -258,16 +316,13 @@ body {
|
||||
}
|
||||
|
||||
/* Location Search */
|
||||
.location-search {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.location-input-group {
|
||||
margin-bottom: 1rem;
|
||||
display: flex;
|
||||
gap: 0.75rem;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
flex-wrap: wrap;
|
||||
flex-wrap: nowrap;
|
||||
}
|
||||
|
||||
.postcode-input {
|
||||
@@ -275,7 +330,10 @@ body {
|
||||
max-width: 180px;
|
||||
padding: 0.6rem 1rem;
|
||||
font-size: 0.95rem;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.postcode-input::placeholder {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
.radius-select {
|
||||
@@ -375,6 +433,8 @@ body {
|
||||
}
|
||||
|
||||
.school-card {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: var(--bg-card);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: var(--radius-lg);
|
||||
@@ -383,6 +443,7 @@ body {
|
||||
transition: var(--transition);
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
z-index: 0;
|
||||
}
|
||||
|
||||
.school-card::before {
|
||||
@@ -435,18 +496,40 @@ body {
|
||||
color: var(--accent-teal);
|
||||
}
|
||||
|
||||
.school-tag.faith {
|
||||
background: rgba(138, 43, 226, 0.1);
|
||||
color: #8a2be2;
|
||||
}
|
||||
|
||||
.school-address {
|
||||
font-size: 0.85rem;
|
||||
color: var(--text-muted);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.school-details {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1rem;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.age-range {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
padding: 0.2rem 0.5rem;
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
|
||||
.school-stats {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.75rem;
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
gap: 0.5rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid var(--border-color);
|
||||
margin-top: auto;
|
||||
}
|
||||
|
||||
.stat {
|
||||
@@ -454,9 +537,13 @@ body {
|
||||
}
|
||||
|
||||
.stat-value {
|
||||
font-size: 1.25rem;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 700;
|
||||
color: var(--text-primary);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
|
||||
.stat-value.positive {
|
||||
@@ -467,11 +554,121 @@ body {
|
||||
color: var(--accent-coral);
|
||||
}
|
||||
|
||||
/* Trend indicators */
|
||||
.trend-indicator {
|
||||
font-size: 0.75rem;
|
||||
cursor: help;
|
||||
}
|
||||
|
||||
.trend-up {
|
||||
color: var(--accent-teal);
|
||||
}
|
||||
|
||||
.trend-down {
|
||||
color: var(--accent-coral);
|
||||
}
|
||||
|
||||
.trend-stable {
|
||||
color: var(--text-muted);
|
||||
font-size: 0.6rem;
|
||||
}
|
||||
|
||||
.stat-label {
|
||||
font-size: 0.7rem;
|
||||
font-size: 0.65rem;
|
||||
color: var(--text-muted);
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
/* School Card Map */
|
||||
.school-map {
|
||||
height: 150px;
|
||||
margin-top: 1rem;
|
||||
border-radius: var(--radius-md);
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
border: 1px solid var(--border-color);
|
||||
transition: var(--transition);
|
||||
}
|
||||
|
||||
.school-map:hover {
|
||||
border-color: var(--accent-coral);
|
||||
box-shadow: var(--shadow-small);
|
||||
}
|
||||
|
||||
/* Fullscreen Map Modal */
|
||||
.map-modal-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background: rgba(0, 0, 0, 0.8);
|
||||
z-index: 2000;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 1rem;
|
||||
animation: fadeIn 0.2s ease;
|
||||
}
|
||||
|
||||
.map-modal {
|
||||
background: var(--bg-card);
|
||||
border-radius: var(--radius-lg);
|
||||
width: 100%;
|
||||
max-width: 900px;
|
||||
max-height: 90vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
overflow: hidden;
|
||||
box-shadow: var(--shadow-large);
|
||||
animation: slideUp 0.3s ease;
|
||||
}
|
||||
|
||||
.map-modal-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 1rem 1.5rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.map-modal-header h3 {
|
||||
font-family: 'Playfair Display', Georgia, serif;
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.map-modal-close {
|
||||
background: none;
|
||||
border: none;
|
||||
font-size: 1.75rem;
|
||||
color: var(--text-muted);
|
||||
cursor: pointer;
|
||||
padding: 0.25rem 0.5rem;
|
||||
line-height: 1;
|
||||
transition: var(--transition);
|
||||
}
|
||||
|
||||
.map-modal-close:hover {
|
||||
color: var(--accent-coral);
|
||||
}
|
||||
|
||||
.map-modal-content {
|
||||
height: 500px;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.map-modal {
|
||||
max-height: 80vh;
|
||||
}
|
||||
|
||||
.map-modal-content {
|
||||
height: 400px;
|
||||
}
|
||||
}
|
||||
|
||||
/* Section Titles */
|
||||
@@ -796,7 +993,7 @@ body {
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
z-index: 200;
|
||||
z-index: 2000;
|
||||
display: none;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@@ -862,6 +1059,7 @@ body {
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
position: relative;
|
||||
padding: 2rem 2rem 1rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
@@ -872,7 +1070,7 @@ body {
|
||||
font-weight: 700;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.5rem;
|
||||
padding-right: 3rem;
|
||||
padding-right: 10rem;
|
||||
}
|
||||
|
||||
.modal-meta {
|
||||
@@ -881,6 +1079,26 @@ body {
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.modal-details {
|
||||
margin-top: 0.75rem;
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.modal-details .modal-address {
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.modal-compare-btn {
|
||||
position: absolute;
|
||||
top: 1rem;
|
||||
right: 4rem;
|
||||
}
|
||||
|
||||
.modal-details .modal-age-range {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.modal-body {
|
||||
padding: 2rem;
|
||||
}
|
||||
@@ -936,13 +1154,25 @@ body {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.modal-footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid var(--border-color);
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
.modal-map-container {
|
||||
margin-top: 2rem;
|
||||
}
|
||||
|
||||
.modal-map-container h4 {
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.modal-map {
|
||||
height: 200px;
|
||||
border-radius: var(--radius-md);
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
|
||||
/* Buttons */
|
||||
.btn {
|
||||
padding: 0.75rem 1.5rem;
|
||||
@@ -967,14 +1197,82 @@ body {
|
||||
|
||||
/* Footer */
|
||||
.footer {
|
||||
text-align: center;
|
||||
padding: 2rem;
|
||||
margin-top: 3rem;
|
||||
border-top: 1px solid var(--border-color);
|
||||
color: var(--text-muted);
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.footer-content {
|
||||
max-width: 600px;
|
||||
margin: 0 auto;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.footer-contact {
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.footer-contact h3 {
|
||||
font-family: 'Playfair Display', serif;
|
||||
font-size: 1.25rem;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.footer-contact > p {
|
||||
color: var(--text-muted);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.contact-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.contact-form .form-row {
|
||||
display: flex;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.contact-form .form-input {
|
||||
flex: 1;
|
||||
padding: 0.75rem 1rem;
|
||||
font-family: inherit;
|
||||
font-size: 0.9rem;
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: var(--radius-md);
|
||||
background: var(--bg-card);
|
||||
color: var(--text-primary);
|
||||
transition: var(--transition);
|
||||
}
|
||||
|
||||
.contact-form .form-input:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent-teal);
|
||||
box-shadow: 0 0 0 3px rgba(45, 106, 100, 0.1);
|
||||
}
|
||||
|
||||
.contact-form .form-input::placeholder {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.contact-form .form-textarea {
|
||||
min-height: 100px;
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
.contact-form .btn {
|
||||
align-self: flex-start;
|
||||
}
|
||||
|
||||
.footer-source {
|
||||
text-align: center;
|
||||
padding-top: 1.5rem;
|
||||
border-top: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.footer a {
|
||||
color: var(--accent-teal);
|
||||
text-decoration: none;
|
||||
@@ -984,9 +1282,14 @@ body {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.footer-note {
|
||||
margin-top: 0.5rem;
|
||||
font-size: 0.75rem;
|
||||
@media (max-width: 768px) {
|
||||
.contact-form .form-row {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.contact-form .btn {
|
||||
align-self: stretch;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loading State */
|
||||
@@ -1164,10 +1467,280 @@ body {
|
||||
margin: 1rem;
|
||||
max-height: calc(100vh - 2rem);
|
||||
}
|
||||
|
||||
|
||||
.modal-header {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.modal-header h2 {
|
||||
padding-right: 0;
|
||||
order: 1;
|
||||
}
|
||||
|
||||
.modal-meta {
|
||||
order: 2;
|
||||
}
|
||||
|
||||
.modal-details {
|
||||
order: 3;
|
||||
}
|
||||
|
||||
.modal-compare-btn {
|
||||
position: static;
|
||||
order: 4;
|
||||
margin-top: 1rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.modal-body {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.modal-chart-container {
|
||||
margin-bottom: 1.5rem;
|
||||
padding: 0.5rem;
|
||||
background: var(--bg-secondary);
|
||||
border-radius: var(--radius-md);
|
||||
}
|
||||
|
||||
.modal-chart-container canvas {
|
||||
max-height: 280px;
|
||||
}
|
||||
|
||||
.modal-stats-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.modal-stat {
|
||||
padding: 0.75rem 0.5rem;
|
||||
}
|
||||
|
||||
.modal-stat-value {
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.modal-stat-label {
|
||||
font-size: 0.65rem;
|
||||
}
|
||||
|
||||
.rankings-controls {
|
||||
flex-direction: column;
|
||||
align-items: stretch;
|
||||
}
|
||||
}
|
||||
|
||||
/* Extra small screens */
|
||||
@media (max-width: 480px) {
|
||||
.modal-content {
|
||||
margin: 0.5rem;
|
||||
max-height: calc(100vh - 1rem);
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.modal-header h2 {
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.modal-chart-container canvas {
|
||||
max-height: 260px;
|
||||
}
|
||||
|
||||
.modal-stats-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
|
||||
.modal-stat-value {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.modal-map {
|
||||
height: 160px;
|
||||
}
|
||||
}
|
||||
|
||||
/* =============================================================================
|
||||
TOOLTIP SYSTEM
|
||||
============================================================================= */
|
||||
|
||||
/* Info Icon Trigger */
|
||||
.info-trigger {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 0;
|
||||
margin-left: 0.25rem;
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: help;
|
||||
color: var(--text-muted);
|
||||
opacity: 0.6;
|
||||
transition: var(--transition);
|
||||
vertical-align: middle;
|
||||
border-radius: 50%;
|
||||
}
|
||||
|
||||
.info-trigger:hover,
|
||||
.info-trigger:focus {
|
||||
color: var(--accent-teal);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.info-trigger:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 0 2px var(--accent-teal);
|
||||
}
|
||||
|
||||
.info-trigger:focus:not(:focus-visible) {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.info-trigger:focus-visible {
|
||||
box-shadow: 0 0 0 2px var(--accent-teal);
|
||||
}
|
||||
|
||||
/* Info Icon SVG */
|
||||
.info-icon {
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.modal-stat-label .info-icon {
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
}
|
||||
|
||||
/* Tooltip Container */
|
||||
.tooltip {
|
||||
position: absolute;
|
||||
z-index: 3000;
|
||||
min-width: 200px;
|
||||
max-width: 280px;
|
||||
padding: 0.75rem 1rem;
|
||||
background: var(--bg-accent);
|
||||
color: var(--text-inverse);
|
||||
border-radius: var(--radius-md);
|
||||
box-shadow: var(--shadow-medium);
|
||||
font-family: 'DM Sans', sans-serif;
|
||||
font-size: 0.8125rem;
|
||||
line-height: 1.5;
|
||||
text-transform: none;
|
||||
letter-spacing: normal;
|
||||
text-align: left;
|
||||
opacity: 0;
|
||||
visibility: hidden;
|
||||
transition: opacity 150ms ease, visibility 150ms ease;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.tooltip.visible {
|
||||
opacity: 1;
|
||||
visibility: visible;
|
||||
pointer-events: auto;
|
||||
}
|
||||
|
||||
/* Tooltip Arrow - Top Placement (arrow points down) */
|
||||
.tooltip[data-placement="top"]::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
border: 8px solid transparent;
|
||||
border-top-color: var(--bg-accent);
|
||||
}
|
||||
|
||||
/* Tooltip Arrow - Bottom Placement (arrow points up) */
|
||||
.tooltip[data-placement="bottom"]::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
bottom: 100%;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
border: 8px solid transparent;
|
||||
border-bottom-color: var(--bg-accent);
|
||||
}
|
||||
|
||||
/* Tooltip Title */
|
||||
.tooltip-title {
|
||||
font-weight: 600;
|
||||
margin-bottom: 0.25rem;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* Tooltip Note (for context like national average) */
|
||||
.tooltip-note {
|
||||
margin-top: 0.5rem;
|
||||
padding-top: 0.5rem;
|
||||
border-top: 1px solid rgba(250, 247, 242, 0.2);
|
||||
font-size: 0.75rem;
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
/* Warning Trigger Button */
|
||||
.warning-trigger {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 0;
|
||||
margin-left: 0.5rem;
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: help;
|
||||
color: var(--accent-coral);
|
||||
opacity: 0.8;
|
||||
transition: var(--transition);
|
||||
vertical-align: middle;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.warning-trigger:hover,
|
||||
.warning-trigger:focus {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.warning-trigger:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 0 2px var(--accent-coral);
|
||||
}
|
||||
|
||||
.warning-trigger:focus:not(:focus-visible) {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.warning-trigger:focus-visible {
|
||||
box-shadow: 0 0 0 2px var(--accent-coral);
|
||||
}
|
||||
|
||||
/* Warning Icon SVG */
|
||||
.warning-icon {
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Warning Tooltip Styling */
|
||||
.tooltip.tooltip-warning {
|
||||
background: #8b4513;
|
||||
border-left: 3px solid var(--accent-coral);
|
||||
}
|
||||
|
||||
.tooltip.tooltip-warning[data-placement="top"]::after {
|
||||
border-top-color: #8b4513;
|
||||
}
|
||||
|
||||
.tooltip.tooltip-warning[data-placement="bottom"]::after {
|
||||
border-bottom-color: #8b4513;
|
||||
}
|
||||
|
||||
/* Label wrapper for inline icon */
|
||||
.stat-label-with-info {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
@@ -8,4 +8,6 @@ requests==2.31.0
|
||||
sqlalchemy==2.0.25
|
||||
psycopg2-binary==2.9.9
|
||||
alembic==1.13.1
|
||||
slowapi==0.1.9
|
||||
secure==0.3.0
|
||||
|
||||
|
||||
184
scripts/geocode_schools.py
Executable file
184
scripts/geocode_schools.py
Executable file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Geocode all school postcodes and update the database.
|
||||
|
||||
This script should be run as a weekly cron job to ensure all schools
|
||||
have up-to-date latitude/longitude coordinates.
|
||||
|
||||
Usage:
|
||||
python scripts/geocode_schools.py [--force]
|
||||
|
||||
Options:
|
||||
--force Re-geocode all postcodes, even if already geocoded
|
||||
|
||||
Crontab example (run every Sunday at 2am):
|
||||
0 2 * * 0 cd /path/to/school_compare && /path/to/venv/bin/python scripts/geocode_schools.py >> /var/log/geocode_schools.log 2>&1
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from backend.database import SessionLocal
|
||||
from backend.models import School
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
valid_postcodes = [
|
||||
p.strip().upper()
|
||||
for p in postcodes
|
||||
if p and isinstance(p, str) and len(p.strip()) >= 5
|
||||
]
|
||||
valid_postcodes = list(set(valid_postcodes))
|
||||
|
||||
if not valid_postcodes:
|
||||
return results
|
||||
|
||||
batch_size = 100
|
||||
total_batches = (len(valid_postcodes) + batch_size - 1) // batch_size
|
||||
|
||||
for i, batch_start in enumerate(range(0, len(valid_postcodes), batch_size)):
|
||||
batch = valid_postcodes[batch_start : batch_start + batch_size]
|
||||
print(f" Geocoding batch {i + 1}/{total_batches} ({len(batch)} postcodes)...")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.postcodes.io/postcodes",
|
||||
json={"postcodes": batch},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get("result", []):
|
||||
if item and item.get("result"):
|
||||
pc = item["query"].upper()
|
||||
lat = item["result"].get("latitude")
|
||||
lon = item["result"].get("longitude")
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
else:
|
||||
print(f" Warning: API returned status {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def geocode_schools(force: bool = False) -> None:
|
||||
"""
|
||||
Geocode all schools in the database.
|
||||
|
||||
Args:
|
||||
force: If True, re-geocode all postcodes even if already geocoded
|
||||
"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"School Geocoding Job - {datetime.now().isoformat()}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
# Get schools that need geocoding
|
||||
if force:
|
||||
schools = db.query(School).filter(School.postcode.isnot(None)).all()
|
||||
print(f"Force mode: Processing all {len(schools)} schools with postcodes")
|
||||
else:
|
||||
schools = db.query(School).filter(
|
||||
School.postcode.isnot(None),
|
||||
(School.latitude.is_(None)) | (School.longitude.is_(None))
|
||||
).all()
|
||||
print(f"Found {len(schools)} schools without coordinates")
|
||||
|
||||
if not schools:
|
||||
print("No schools to geocode. Exiting.")
|
||||
return
|
||||
|
||||
# Extract unique postcodes
|
||||
postcodes = list(set(
|
||||
s.postcode.strip().upper()
|
||||
for s in schools
|
||||
if s.postcode
|
||||
))
|
||||
print(f"Unique postcodes to geocode: {len(postcodes)}")
|
||||
|
||||
# Geocode in bulk
|
||||
print("\nGeocoding postcodes...")
|
||||
geocoded = geocode_postcodes_bulk(postcodes)
|
||||
print(f"Successfully geocoded: {len(geocoded)} postcodes")
|
||||
|
||||
# Update database
|
||||
print("\nUpdating database...")
|
||||
updated_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for school in schools:
|
||||
if not school.postcode:
|
||||
continue
|
||||
|
||||
pc_upper = school.postcode.strip().upper()
|
||||
coords = geocoded.get(pc_upper)
|
||||
|
||||
if coords:
|
||||
school.latitude = coords[0]
|
||||
school.longitude = coords[1]
|
||||
updated_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" - Updated: {updated_count} schools")
|
||||
print(f" - Failed (invalid/not found): {failed_count} postcodes")
|
||||
|
||||
# Summary stats
|
||||
total_with_coords = db.query(School).filter(
|
||||
School.latitude.isnot(None),
|
||||
School.longitude.isnot(None)
|
||||
).count()
|
||||
total_schools = db.query(School).count()
|
||||
|
||||
print(f"\nDatabase summary:")
|
||||
print(f" - Total schools: {total_schools}")
|
||||
print(f" - Schools with coordinates: {total_with_coords}")
|
||||
print(f" - Coverage: {100*total_with_coords/total_schools:.1f}%")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during geocoding: {e}")
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Geocoding job completed - {datetime.now().isoformat()}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Geocode school postcodes and update database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-geocode all postcodes, even if already geocoded"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
geocode_schools(force=args.force)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
442
scripts/migrate_csv_to_db.py
Normal file
442
scripts/migrate_csv_to_db.py
Normal file
@@ -0,0 +1,442 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to import CSV data into PostgreSQL database.
|
||||
|
||||
Usage:
|
||||
python scripts/migrate_csv_to_db.py [--drop] [--geocode]
|
||||
|
||||
Options:
|
||||
--drop Drop existing tables before migration
|
||||
--geocode Geocode postcodes (requires network access)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from typing import Dict, Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
from backend.config import settings
|
||||
from backend.database import Base, engine, get_db_session
|
||||
from backend.models import (
|
||||
RESULT_FIELD_MAPPING,
|
||||
SCHOOL_FIELD_MAPPING,
|
||||
School,
|
||||
SchoolResult,
|
||||
)
|
||||
from backend.schemas import (
|
||||
COLUMN_MAPPINGS,
|
||||
LA_CODE_TO_NAME,
|
||||
NULL_VALUES,
|
||||
NUMERIC_COLUMNS,
|
||||
SCHOOL_TYPE_MAP,
|
||||
)
|
||||
|
||||
|
||||
def parse_numeric(value) -> Optional[float]:
|
||||
"""Parse a numeric value, handling special cases."""
|
||||
if pd.isna(value):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value) if not np.isnan(value) else None
|
||||
str_val = str(value).strip().upper()
|
||||
if str_val in NULL_VALUES or str_val == "":
|
||||
return None
|
||||
# Remove percentage signs if present
|
||||
str_val = str_val.replace("%", "")
|
||||
try:
|
||||
return float(str_val)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
||||
"""Extract year from folder name like '2023-2024'."""
|
||||
match = re.search(r"(\d{4})-(\d{4})", folder_name)
|
||||
if match:
|
||||
return int(match.group(2))
|
||||
match = re.search(r"(\d{4})", folder_name)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, tuple]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
valid_postcodes = [
|
||||
p.strip().upper()
|
||||
for p in postcodes
|
||||
if p and isinstance(p, str) and len(p.strip()) >= 5
|
||||
]
|
||||
valid_postcodes = list(set(valid_postcodes))
|
||||
|
||||
if not valid_postcodes:
|
||||
return results
|
||||
|
||||
batch_size = 100
|
||||
total_batches = (len(valid_postcodes) + batch_size - 1) // batch_size
|
||||
|
||||
for i, batch_start in enumerate(range(0, len(valid_postcodes), batch_size)):
|
||||
batch = valid_postcodes[batch_start : batch_start + batch_size]
|
||||
print(
|
||||
f" Geocoding batch {i + 1}/{total_batches} ({len(batch)} postcodes)..."
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.postcodes.io/postcodes",
|
||||
json={"postcodes": batch},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get("result", []):
|
||||
if item and item.get("result"):
|
||||
pc = item["query"].upper()
|
||||
lat = item["result"].get("latitude")
|
||||
lon = item["result"].get("longitude")
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def load_csv_data(data_dir: Path) -> pd.DataFrame:
|
||||
"""Load all CSV data from data directory."""
|
||||
all_data = []
|
||||
|
||||
for folder in sorted(data_dir.iterdir()):
|
||||
if not folder.is_dir():
|
||||
continue
|
||||
|
||||
year = extract_year_from_folder(folder.name)
|
||||
if not year:
|
||||
continue
|
||||
|
||||
# Specifically look for the KS2 results file
|
||||
ks2_file = folder / "england_ks2final.csv"
|
||||
if not ks2_file.exists():
|
||||
continue
|
||||
|
||||
csv_file = ks2_file
|
||||
print(f" Loading {csv_file.name} (year {year})...")
|
||||
|
||||
try:
|
||||
df = pd.read_csv(csv_file, encoding="latin-1", low_memory=False)
|
||||
except Exception as e:
|
||||
print(f" Error loading {csv_file}: {e}")
|
||||
continue
|
||||
|
||||
# Rename columns
|
||||
df.rename(columns=COLUMN_MAPPINGS, inplace=True)
|
||||
df["year"] = year
|
||||
|
||||
# Handle local authority name
|
||||
la_name_cols = ["LANAME", "LA (name)", "LA_NAME", "LA NAME"]
|
||||
la_name_col = next((c for c in la_name_cols if c in df.columns), None)
|
||||
|
||||
if la_name_col and la_name_col != "local_authority":
|
||||
df["local_authority"] = df[la_name_col]
|
||||
elif "LEA" in df.columns:
|
||||
df["local_authority_code"] = pd.to_numeric(df["LEA"], errors="coerce")
|
||||
df["local_authority"] = (
|
||||
df["local_authority_code"]
|
||||
.map(LA_CODE_TO_NAME)
|
||||
.fillna(df["LEA"].astype(str))
|
||||
)
|
||||
|
||||
# Store LEA code
|
||||
if "LEA" in df.columns:
|
||||
df["local_authority_code"] = pd.to_numeric(df["LEA"], errors="coerce")
|
||||
|
||||
# Map school type
|
||||
if "school_type_code" in df.columns:
|
||||
df["school_type"] = (
|
||||
df["school_type_code"]
|
||||
.map(SCHOOL_TYPE_MAP)
|
||||
.fillna(df["school_type_code"])
|
||||
)
|
||||
|
||||
# Create combined address
|
||||
addr_parts = ["address1", "address2", "town", "postcode"]
|
||||
for col in addr_parts:
|
||||
if col not in df.columns:
|
||||
df[col] = None
|
||||
|
||||
df["address"] = df.apply(
|
||||
lambda r: ", ".join(
|
||||
str(v)
|
||||
for v in [
|
||||
r.get("address1"),
|
||||
r.get("address2"),
|
||||
r.get("town"),
|
||||
r.get("postcode"),
|
||||
]
|
||||
if pd.notna(v) and str(v).strip()
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
all_data.append(df)
|
||||
print(f" Loaded {len(df)} records")
|
||||
|
||||
if all_data:
|
||||
result = pd.concat(all_data, ignore_index=True)
|
||||
print(f"\nTotal records loaded: {len(result)}")
|
||||
print(f"Unique schools: {result['urn'].nunique()}")
|
||||
print(f"Years: {sorted(result['year'].unique())}")
|
||||
return result
|
||||
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
||||
"""Migrate DataFrame data to database."""
|
||||
|
||||
# Clean URN column - convert to integer, drop invalid values
|
||||
df = df.copy()
|
||||
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||
df = df.dropna(subset=["urn"])
|
||||
df["urn"] = df["urn"].astype(int)
|
||||
|
||||
# Group by URN to get unique schools (use latest year's data)
|
||||
school_data = (
|
||||
df.sort_values("year", ascending=False).groupby("urn").first().reset_index()
|
||||
)
|
||||
print(f"\nMigrating {len(school_data)} unique schools...")
|
||||
|
||||
# Geocode if requested
|
||||
geocoded = {}
|
||||
if geocode and "postcode" in df.columns:
|
||||
print("\nGeocoding postcodes...")
|
||||
postcodes = df["postcode"].dropna().unique().tolist()
|
||||
geocoded = geocode_postcodes_bulk(postcodes)
|
||||
print(f" Successfully geocoded {len(geocoded)} postcodes")
|
||||
|
||||
with get_db_session() as db:
|
||||
# Create schools
|
||||
urn_to_school_id = {}
|
||||
schools_created = 0
|
||||
|
||||
for _, row in school_data.iterrows():
|
||||
# Safely parse URN - handle None, NaN, whitespace, and invalid values
|
||||
urn_val = row.get("urn")
|
||||
urn = None
|
||||
if pd.notna(urn_val):
|
||||
try:
|
||||
urn_str = str(urn_val).strip()
|
||||
if urn_str:
|
||||
urn = int(float(urn_str)) # Handle "12345.0" format
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not urn:
|
||||
continue
|
||||
|
||||
# Skip if we've already added this URN (handles duplicates in source data)
|
||||
if urn in urn_to_school_id:
|
||||
continue
|
||||
|
||||
# Get geocoding data
|
||||
postcode = row.get("postcode")
|
||||
lat, lon = None, None
|
||||
if postcode and pd.notna(postcode):
|
||||
coords = geocoded.get(str(postcode).strip().upper())
|
||||
if coords:
|
||||
lat, lon = coords
|
||||
|
||||
# Safely parse local_authority_code
|
||||
la_code = None
|
||||
la_code_val = row.get("local_authority_code")
|
||||
if pd.notna(la_code_val):
|
||||
try:
|
||||
la_code_str = str(la_code_val).strip()
|
||||
if la_code_str:
|
||||
la_code = int(float(la_code_str))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
school = School(
|
||||
urn=urn,
|
||||
school_name=row.get("school_name")
|
||||
if pd.notna(row.get("school_name"))
|
||||
else "Unknown",
|
||||
local_authority=row.get("local_authority")
|
||||
if pd.notna(row.get("local_authority"))
|
||||
else None,
|
||||
local_authority_code=la_code,
|
||||
school_type=row.get("school_type")
|
||||
if pd.notna(row.get("school_type"))
|
||||
else None,
|
||||
school_type_code=row.get("school_type_code")
|
||||
if pd.notna(row.get("school_type_code"))
|
||||
else None,
|
||||
religious_denomination=row.get("religious_denomination")
|
||||
if pd.notna(row.get("religious_denomination"))
|
||||
else None,
|
||||
age_range=row.get("age_range")
|
||||
if pd.notna(row.get("age_range"))
|
||||
else None,
|
||||
address1=row.get("address1") if pd.notna(row.get("address1")) else None,
|
||||
address2=row.get("address2") if pd.notna(row.get("address2")) else None,
|
||||
town=row.get("town") if pd.notna(row.get("town")) else None,
|
||||
postcode=row.get("postcode") if pd.notna(row.get("postcode")) else None,
|
||||
latitude=lat,
|
||||
longitude=lon,
|
||||
)
|
||||
db.add(school)
|
||||
db.flush() # Get the ID
|
||||
urn_to_school_id[urn] = school.id
|
||||
schools_created += 1
|
||||
|
||||
if schools_created % 1000 == 0:
|
||||
print(f" Created {schools_created} schools...")
|
||||
|
||||
print(f" Created {schools_created} schools")
|
||||
|
||||
# Create results
|
||||
print(f"\nMigrating {len(df)} yearly results...")
|
||||
results_created = 0
|
||||
|
||||
for _, row in df.iterrows():
|
||||
# Safely parse URN
|
||||
urn_val = row.get("urn")
|
||||
urn = None
|
||||
if pd.notna(urn_val):
|
||||
try:
|
||||
urn_str = str(urn_val).strip()
|
||||
if urn_str:
|
||||
urn = int(float(urn_str))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not urn or urn not in urn_to_school_id:
|
||||
continue
|
||||
|
||||
school_id = urn_to_school_id[urn]
|
||||
|
||||
# Safely parse year
|
||||
year_val = row.get("year")
|
||||
year = None
|
||||
if pd.notna(year_val):
|
||||
try:
|
||||
year = int(float(str(year_val).strip()))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not year:
|
||||
continue
|
||||
|
||||
result = SchoolResult(
|
||||
school_id=school_id,
|
||||
year=year,
|
||||
total_pupils=parse_numeric(row.get("total_pupils")),
|
||||
eligible_pupils=parse_numeric(row.get("eligible_pupils")),
|
||||
# Expected Standard
|
||||
rwm_expected_pct=parse_numeric(row.get("rwm_expected_pct")),
|
||||
reading_expected_pct=parse_numeric(row.get("reading_expected_pct")),
|
||||
writing_expected_pct=parse_numeric(row.get("writing_expected_pct")),
|
||||
maths_expected_pct=parse_numeric(row.get("maths_expected_pct")),
|
||||
gps_expected_pct=parse_numeric(row.get("gps_expected_pct")),
|
||||
science_expected_pct=parse_numeric(row.get("science_expected_pct")),
|
||||
# Higher Standard
|
||||
rwm_high_pct=parse_numeric(row.get("rwm_high_pct")),
|
||||
reading_high_pct=parse_numeric(row.get("reading_high_pct")),
|
||||
writing_high_pct=parse_numeric(row.get("writing_high_pct")),
|
||||
maths_high_pct=parse_numeric(row.get("maths_high_pct")),
|
||||
gps_high_pct=parse_numeric(row.get("gps_high_pct")),
|
||||
# Progress
|
||||
reading_progress=parse_numeric(row.get("reading_progress")),
|
||||
writing_progress=parse_numeric(row.get("writing_progress")),
|
||||
maths_progress=parse_numeric(row.get("maths_progress")),
|
||||
# Averages
|
||||
reading_avg_score=parse_numeric(row.get("reading_avg_score")),
|
||||
maths_avg_score=parse_numeric(row.get("maths_avg_score")),
|
||||
gps_avg_score=parse_numeric(row.get("gps_avg_score")),
|
||||
# Context
|
||||
disadvantaged_pct=parse_numeric(row.get("disadvantaged_pct")),
|
||||
eal_pct=parse_numeric(row.get("eal_pct")),
|
||||
sen_support_pct=parse_numeric(row.get("sen_support_pct")),
|
||||
sen_ehcp_pct=parse_numeric(row.get("sen_ehcp_pct")),
|
||||
stability_pct=parse_numeric(row.get("stability_pct")),
|
||||
# Gender
|
||||
rwm_expected_boys_pct=parse_numeric(row.get("rwm_expected_boys_pct")),
|
||||
rwm_expected_girls_pct=parse_numeric(row.get("rwm_expected_girls_pct")),
|
||||
rwm_high_boys_pct=parse_numeric(row.get("rwm_high_boys_pct")),
|
||||
rwm_high_girls_pct=parse_numeric(row.get("rwm_high_girls_pct")),
|
||||
# Disadvantaged
|
||||
rwm_expected_disadvantaged_pct=parse_numeric(
|
||||
row.get("rwm_expected_disadvantaged_pct")
|
||||
),
|
||||
rwm_expected_non_disadvantaged_pct=parse_numeric(
|
||||
row.get("rwm_expected_non_disadvantaged_pct")
|
||||
),
|
||||
disadvantaged_gap=parse_numeric(row.get("disadvantaged_gap")),
|
||||
# 3-Year
|
||||
rwm_expected_3yr_pct=parse_numeric(row.get("rwm_expected_3yr_pct")),
|
||||
reading_avg_3yr=parse_numeric(row.get("reading_avg_3yr")),
|
||||
maths_avg_3yr=parse_numeric(row.get("maths_avg_3yr")),
|
||||
)
|
||||
db.add(result)
|
||||
results_created += 1
|
||||
|
||||
if results_created % 10000 == 0:
|
||||
print(f" Created {results_created} results...")
|
||||
db.flush()
|
||||
|
||||
print(f" Created {results_created} results")
|
||||
|
||||
# Commit all changes
|
||||
db.commit()
|
||||
print("\nMigration complete!")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Migrate CSV data to PostgreSQL database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--drop", action="store_true", help="Drop existing tables before migration"
|
||||
)
|
||||
parser.add_argument("--geocode", action="store_true", help="Geocode postcodes")
|
||||
args = parser.parse_args()
|
||||
|
||||
print("=" * 60)
|
||||
print("School Data Migration: CSV -> PostgreSQL")
|
||||
print("=" * 60)
|
||||
print(f"\nDatabase: {settings.database_url.split('@')[-1]}")
|
||||
print(f"Data directory: {settings.data_dir}")
|
||||
|
||||
if args.drop:
|
||||
print("\n⚠️ Dropping existing tables...")
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
|
||||
print("\nCreating tables...")
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
print("\nLoading CSV data...")
|
||||
df = load_csv_data(settings.data_dir)
|
||||
|
||||
if df.empty:
|
||||
print("No data found to migrate!")
|
||||
return 1
|
||||
|
||||
migrate_data(df, geocode=args.geocode)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user