location search beta 1
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s

This commit is contained in:
Tudor Sitaru
2026-01-06 16:59:25 +00:00
parent 7684ceb9c0
commit bd3640d50f
6 changed files with 484 additions and 35 deletions

View File

@@ -5,6 +5,7 @@ Uses real data from UK Government Compare School Performance downloads.
"""
from contextlib import asynccontextmanager
import pandas as pd
from fastapi import FastAPI, HTTPException, Query
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
@@ -13,7 +14,7 @@ from typing import Optional
from .config import settings
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
from .data_loader import load_school_data, clear_cache
from .data_loader import load_school_data, clear_cache, geocode_single_postcode, geocode_postcodes_bulk, haversine_distance
from .utils import clean_for_json
@@ -54,11 +55,25 @@ async def root():
return FileResponse(settings.frontend_dir / "index.html")
@app.get("/compare")
async def serve_compare():
"""Serve the frontend for /compare route (SPA routing)."""
return FileResponse(settings.frontend_dir / "index.html")
@app.get("/rankings")
async def serve_rankings():
"""Serve the frontend for /rankings route (SPA routing)."""
return FileResponse(settings.frontend_dir / "index.html")
@app.get("/api/schools")
async def get_schools(
search: Optional[str] = Query(None, description="Search by school name"),
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
school_type: Optional[str] = Query(None, description="Filter by school type"),
postcode: Optional[str] = Query(None, description="Search near postcode"),
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
):
@@ -66,6 +81,7 @@ async def get_schools(
Get list of unique primary schools with pagination.
Returns paginated results with total count for efficient loading.
Supports location-based search using postcode.
"""
df = load_school_data()
@@ -80,9 +96,45 @@ async def get_schools(
latest_year = df.groupby('urn')['year'].max().reset_index()
df_latest = df.merge(latest_year, on=['urn', 'year'])
available_cols = [c for c in SCHOOL_COLUMNS if c in df_latest.columns]
# Include lat/long in columns for location search
location_cols = ['latitude', 'longitude']
available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
# Location-based search
search_coords = None
if postcode:
coords = geocode_single_postcode(postcode)
if coords:
search_coords = coords
schools_df = schools_df.copy()
# Geocode school postcodes on-demand if not already cached
if 'postcode' in schools_df.columns:
unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
geocoded = geocode_postcodes_bulk(unique_postcodes)
# Add lat/long from geocoded data
schools_df['latitude'] = schools_df['postcode'].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
)
schools_df['longitude'] = schools_df['postcode'].apply(
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
)
# Filter by distance
def calc_distance(row):
if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
return float('inf')
return haversine_distance(
search_coords[0], search_coords[1],
row['latitude'], row['longitude']
)
schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
schools_df = schools_df[schools_df['distance'] <= radius]
schools_df = schools_df.sort_values('distance')
# Apply filters
if search:
search_lower = search.lower()
@@ -103,12 +155,18 @@ async def get_schools(
end_idx = start_idx + page_size
schools_df = schools_df.iloc[start_idx:end_idx]
# Remove internal columns before sending
output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
if 'distance' in schools_df.columns:
output_cols.append('distance')
return {
"schools": clean_for_json(schools_df),
"schools": clean_for_json(schools_df[output_cols]),
"total": total,
"page": page,
"page_size": page_size,
"total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
"search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
}

View File

@@ -8,7 +8,8 @@ import numpy as np
from pathlib import Path
from functools import lru_cache
import re
from typing import Optional
import requests
from typing import Optional, Dict, Tuple
from .config import settings
from .schemas import (
@@ -19,6 +20,78 @@ from .schemas import (
LA_CODE_TO_NAME,
)
# Cache for postcode geocoding
_postcode_cache: Dict[str, Tuple[float, float]] = {}
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
"""
Geocode postcodes in bulk using postcodes.io API.
Returns dict of postcode -> (latitude, longitude).
"""
results = {}
# Remove invalid postcodes and deduplicate
valid_postcodes = [p.strip().upper() for p in postcodes if p and isinstance(p, str) and len(p.strip()) >= 5]
valid_postcodes = list(set(valid_postcodes))
if not valid_postcodes:
return results
# postcodes.io allows max 100 postcodes per request
batch_size = 100
for i in range(0, len(valid_postcodes), batch_size):
batch = valid_postcodes[i:i + batch_size]
try:
response = requests.post(
'https://api.postcodes.io/postcodes',
json={'postcodes': batch},
timeout=30
)
if response.status_code == 200:
data = response.json()
for item in data.get('result', []):
if item and item.get('result'):
pc = item['query'].upper()
lat = item['result'].get('latitude')
lon = item['result'].get('longitude')
if lat and lon:
results[pc] = (lat, lon)
except Exception as e:
print(f" Warning: Geocoding batch failed: {e}")
return results
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
"""Geocode a single postcode using postcodes.io API."""
if not postcode:
return None
postcode = postcode.strip().upper()
# Check cache first
if postcode in _postcode_cache:
return _postcode_cache[postcode]
try:
response = requests.get(
f'https://api.postcodes.io/postcodes/{postcode}',
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get('result'):
lat = data['result'].get('latitude')
lon = data['result'].get('longitude')
if lat and lon:
_postcode_cache[postcode] = (lat, lon)
return (lat, lon)
except Exception:
pass
return None
def extract_year_from_folder(folder_name: str) -> Optional[int]:
"""Extract the end year from folder name like '2023-2024' -> 2024."""
@@ -151,6 +224,10 @@ def load_year_data(year_folder: Path, year: int) -> Optional[pd.DataFrame]:
if col in df.columns:
df[col] = parse_numeric_vectorized(df[col])
# Initialize lat/long columns
df['latitude'] = None
df['longitude'] = None
print(f" Loaded {len(df)} schools for year {year}")
return df
@@ -184,6 +261,10 @@ def load_school_data() -> pd.DataFrame:
print(f"\nTotal records loaded: {len(result)}")
print(f"Unique schools: {result['urn'].nunique()}")
print(f"Years: {sorted(result['year'].unique())}")
# Note: Geocoding is done lazily when location search is used
# This keeps startup fast
return result
else:
print("No data files found. Creating empty DataFrame.")
@@ -194,3 +275,24 @@ def clear_cache():
"""Clear the data cache to force reload."""
load_school_data.cache_clear()
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the great circle distance between two points on Earth (in miles).
"""
from math import radians, cos, sin, asin, sqrt
# Convert to radians
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlat = lat2 - lat1
dlon = lon2 - lon1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
# Earth's radius in miles
r = 3956
return c * r