diff --git a/backend/app.py b/backend/app.py index d0f5dcd..49368b2 100644 --- a/backend/app.py +++ b/backend/app.py @@ -8,6 +8,7 @@ import re from contextlib import asynccontextmanager from typing import Optional +import numpy as np import pandas as pd from fastapi import FastAPI, HTTPException, Query, Request, Depends, Header from fastapi.middleware.cors import CORSMiddleware @@ -21,7 +22,6 @@ from starlette.middleware.base import BaseHTTPMiddleware from .config import settings from .data_loader import ( clear_cache, - haversine_distance, load_school_data, geocode_single_postcode, ) @@ -276,17 +276,29 @@ async def get_schools( schools_df = schools_df.copy() # Filter by distance using pre-geocoded lat/long from database - def calc_distance(row): - if pd.isna(row["latitude"]) or pd.isna(row["longitude"]): - return float("inf") - return haversine_distance( - search_coords[0], - search_coords[1], - row["latitude"], - row["longitude"], - ) + # Use vectorized haversine calculation for better performance + lat1, lon1 = search_coords + lat2 = schools_df["latitude"].values + lon2 = schools_df["longitude"].values - schools_df["distance"] = schools_df.apply(calc_distance, axis=1) + # Vectorized haversine formula + R = 3959 # Earth's radius in miles + lat1_rad = np.radians(lat1) + lat2_rad = np.radians(lat2) + dlat = np.radians(lat2 - lat1) + dlon = np.radians(lon2 - lon1) + + a = np.sin(dlat / 2) ** 2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2) ** 2 + c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) + distances = R * c + + # Handle missing coordinates + distances = np.where( + pd.isna(schools_df["latitude"]) | pd.isna(schools_df["longitude"]), + float("inf"), + distances + ) + schools_df["distance"] = distances schools_df = schools_df[schools_df["distance"] <= radius] schools_df = schools_df.sort_values("distance")