location search beta 1
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s
This commit is contained in:
@@ -5,6 +5,7 @@ Uses real data from UK Government Compare School Performance downloads.
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
import pandas as pd
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
@@ -13,7 +14,7 @@ from typing import Optional
|
||||
|
||||
from .config import settings
|
||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||
from .data_loader import load_school_data, clear_cache
|
||||
from .data_loader import load_school_data, clear_cache, geocode_single_postcode, geocode_postcodes_bulk, haversine_distance
|
||||
from .utils import clean_for_json
|
||||
|
||||
|
||||
@@ -54,11 +55,25 @@ async def root():
|
||||
return FileResponse(settings.frontend_dir / "index.html")
|
||||
|
||||
|
||||
@app.get("/compare")
|
||||
async def serve_compare():
|
||||
"""Serve the frontend for /compare route (SPA routing)."""
|
||||
return FileResponse(settings.frontend_dir / "index.html")
|
||||
|
||||
|
||||
@app.get("/rankings")
|
||||
async def serve_rankings():
|
||||
"""Serve the frontend for /rankings route (SPA routing)."""
|
||||
return FileResponse(settings.frontend_dir / "index.html")
|
||||
|
||||
|
||||
@app.get("/api/schools")
|
||||
async def get_schools(
|
||||
search: Optional[str] = Query(None, description="Search by school name"),
|
||||
local_authority: Optional[str] = Query(None, description="Filter by local authority"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type"),
|
||||
postcode: Optional[str] = Query(None, description="Search near postcode"),
|
||||
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
|
||||
page: int = Query(1, ge=1, description="Page number"),
|
||||
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
|
||||
):
|
||||
@@ -66,6 +81,7 @@ async def get_schools(
|
||||
Get list of unique primary schools with pagination.
|
||||
|
||||
Returns paginated results with total count for efficient loading.
|
||||
Supports location-based search using postcode.
|
||||
"""
|
||||
df = load_school_data()
|
||||
|
||||
@@ -80,9 +96,45 @@ async def get_schools(
|
||||
latest_year = df.groupby('urn')['year'].max().reset_index()
|
||||
df_latest = df.merge(latest_year, on=['urn', 'year'])
|
||||
|
||||
available_cols = [c for c in SCHOOL_COLUMNS if c in df_latest.columns]
|
||||
# Include lat/long in columns for location search
|
||||
location_cols = ['latitude', 'longitude']
|
||||
available_cols = [c for c in SCHOOL_COLUMNS + location_cols if c in df_latest.columns]
|
||||
schools_df = df_latest[available_cols].drop_duplicates(subset=['urn'])
|
||||
|
||||
# Location-based search
|
||||
search_coords = None
|
||||
if postcode:
|
||||
coords = geocode_single_postcode(postcode)
|
||||
if coords:
|
||||
search_coords = coords
|
||||
schools_df = schools_df.copy()
|
||||
|
||||
# Geocode school postcodes on-demand if not already cached
|
||||
if 'postcode' in schools_df.columns:
|
||||
unique_postcodes = schools_df['postcode'].dropna().unique().tolist()
|
||||
geocoded = geocode_postcodes_bulk(unique_postcodes)
|
||||
|
||||
# Add lat/long from geocoded data
|
||||
schools_df['latitude'] = schools_df['postcode'].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0] if pd.notna(pc) else None
|
||||
)
|
||||
schools_df['longitude'] = schools_df['postcode'].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1] if pd.notna(pc) else None
|
||||
)
|
||||
|
||||
# Filter by distance
|
||||
def calc_distance(row):
|
||||
if pd.isna(row.get('latitude')) or pd.isna(row.get('longitude')):
|
||||
return float('inf')
|
||||
return haversine_distance(
|
||||
search_coords[0], search_coords[1],
|
||||
row['latitude'], row['longitude']
|
||||
)
|
||||
|
||||
schools_df['distance'] = schools_df.apply(calc_distance, axis=1)
|
||||
schools_df = schools_df[schools_df['distance'] <= radius]
|
||||
schools_df = schools_df.sort_values('distance')
|
||||
|
||||
# Apply filters
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
@@ -103,12 +155,18 @@ async def get_schools(
|
||||
end_idx = start_idx + page_size
|
||||
schools_df = schools_df.iloc[start_idx:end_idx]
|
||||
|
||||
# Remove internal columns before sending
|
||||
output_cols = [c for c in schools_df.columns if c not in ['latitude', 'longitude']]
|
||||
if 'distance' in schools_df.columns:
|
||||
output_cols.append('distance')
|
||||
|
||||
return {
|
||||
"schools": clean_for_json(schools_df),
|
||||
"schools": clean_for_json(schools_df[output_cols]),
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"total_pages": (total + page_size - 1) // page_size if page_size > 0 else 0,
|
||||
"search_location": {"postcode": postcode, "radius": radius} if search_coords else None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,8 @@ import numpy as np
|
||||
from pathlib import Path
|
||||
from functools import lru_cache
|
||||
import re
|
||||
from typing import Optional
|
||||
import requests
|
||||
from typing import Optional, Dict, Tuple
|
||||
|
||||
from .config import settings
|
||||
from .schemas import (
|
||||
@@ -19,6 +20,78 @@ from .schemas import (
|
||||
LA_CODE_TO_NAME,
|
||||
)
|
||||
|
||||
# Cache for postcode geocoding
|
||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Remove invalid postcodes and deduplicate
|
||||
valid_postcodes = [p.strip().upper() for p in postcodes if p and isinstance(p, str) and len(p.strip()) >= 5]
|
||||
valid_postcodes = list(set(valid_postcodes))
|
||||
|
||||
if not valid_postcodes:
|
||||
return results
|
||||
|
||||
# postcodes.io allows max 100 postcodes per request
|
||||
batch_size = 100
|
||||
for i in range(0, len(valid_postcodes), batch_size):
|
||||
batch = valid_postcodes[i:i + batch_size]
|
||||
try:
|
||||
response = requests.post(
|
||||
'https://api.postcodes.io/postcodes',
|
||||
json={'postcodes': batch},
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get('result', []):
|
||||
if item and item.get('result'):
|
||||
pc = item['query'].upper()
|
||||
lat = item['result'].get('latitude')
|
||||
lon = item['result'].get('longitude')
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
|
||||
"""Geocode a single postcode using postcodes.io API."""
|
||||
if not postcode:
|
||||
return None
|
||||
|
||||
postcode = postcode.strip().upper()
|
||||
|
||||
# Check cache first
|
||||
if postcode in _postcode_cache:
|
||||
return _postcode_cache[postcode]
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
f'https://api.postcodes.io/postcodes/{postcode}',
|
||||
timeout=10
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('result'):
|
||||
lat = data['result'].get('latitude')
|
||||
lon = data['result'].get('longitude')
|
||||
if lat and lon:
|
||||
_postcode_cache[postcode] = (lat, lon)
|
||||
return (lat, lon)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
||||
"""Extract the end year from folder name like '2023-2024' -> 2024."""
|
||||
@@ -151,6 +224,10 @@ def load_year_data(year_folder: Path, year: int) -> Optional[pd.DataFrame]:
|
||||
if col in df.columns:
|
||||
df[col] = parse_numeric_vectorized(df[col])
|
||||
|
||||
# Initialize lat/long columns
|
||||
df['latitude'] = None
|
||||
df['longitude'] = None
|
||||
|
||||
print(f" Loaded {len(df)} schools for year {year}")
|
||||
return df
|
||||
|
||||
@@ -184,6 +261,10 @@ def load_school_data() -> pd.DataFrame:
|
||||
print(f"\nTotal records loaded: {len(result)}")
|
||||
print(f"Unique schools: {result['urn'].nunique()}")
|
||||
print(f"Years: {sorted(result['year'].unique())}")
|
||||
|
||||
# Note: Geocoding is done lazily when location search is used
|
||||
# This keeps startup fast
|
||||
|
||||
return result
|
||||
else:
|
||||
print("No data files found. Creating empty DataFrame.")
|
||||
@@ -194,3 +275,24 @@ def clear_cache():
|
||||
"""Clear the data cache to force reload."""
|
||||
load_school_data.cache_clear()
|
||||
|
||||
|
||||
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""
|
||||
Calculate the great circle distance between two points on Earth (in miles).
|
||||
"""
|
||||
from math import radians, cos, sin, asin, sqrt
|
||||
|
||||
# Convert to radians
|
||||
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
||||
|
||||
# Haversine formula
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
||||
c = 2 * asin(sqrt(a))
|
||||
|
||||
# Earth's radius in miles
|
||||
r = 3956
|
||||
|
||||
return c * r
|
||||
|
||||
|
||||
Reference in New Issue
Block a user