location search beta 1
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s

This commit is contained in:
Tudor Sitaru
2026-01-06 16:59:25 +00:00
parent 7684ceb9c0
commit bd3640d50f
6 changed files with 484 additions and 35 deletions

View File

@@ -8,7 +8,8 @@ import numpy as np
from pathlib import Path
from functools import lru_cache
import re
from typing import Optional
import requests
from typing import Optional, Dict, Tuple
from .config import settings
from .schemas import (
@@ -19,6 +20,78 @@ from .schemas import (
LA_CODE_TO_NAME,
)
# Cache for postcode geocoding
_postcode_cache: Dict[str, Tuple[float, float]] = {}
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
"""
Geocode postcodes in bulk using postcodes.io API.
Returns dict of postcode -> (latitude, longitude).
"""
results = {}
# Remove invalid postcodes and deduplicate
valid_postcodes = [p.strip().upper() for p in postcodes if p and isinstance(p, str) and len(p.strip()) >= 5]
valid_postcodes = list(set(valid_postcodes))
if not valid_postcodes:
return results
# postcodes.io allows max 100 postcodes per request
batch_size = 100
for i in range(0, len(valid_postcodes), batch_size):
batch = valid_postcodes[i:i + batch_size]
try:
response = requests.post(
'https://api.postcodes.io/postcodes',
json={'postcodes': batch},
timeout=30
)
if response.status_code == 200:
data = response.json()
for item in data.get('result', []):
if item and item.get('result'):
pc = item['query'].upper()
lat = item['result'].get('latitude')
lon = item['result'].get('longitude')
if lat and lon:
results[pc] = (lat, lon)
except Exception as e:
print(f" Warning: Geocoding batch failed: {e}")
return results
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
"""Geocode a single postcode using postcodes.io API."""
if not postcode:
return None
postcode = postcode.strip().upper()
# Check cache first
if postcode in _postcode_cache:
return _postcode_cache[postcode]
try:
response = requests.get(
f'https://api.postcodes.io/postcodes/{postcode}',
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get('result'):
lat = data['result'].get('latitude')
lon = data['result'].get('longitude')
if lat and lon:
_postcode_cache[postcode] = (lat, lon)
return (lat, lon)
except Exception:
pass
return None
def extract_year_from_folder(folder_name: str) -> Optional[int]:
"""Extract the end year from folder name like '2023-2024' -> 2024."""
@@ -151,6 +224,10 @@ def load_year_data(year_folder: Path, year: int) -> Optional[pd.DataFrame]:
if col in df.columns:
df[col] = parse_numeric_vectorized(df[col])
# Initialize lat/long columns
df['latitude'] = None
df['longitude'] = None
print(f" Loaded {len(df)} schools for year {year}")
return df
@@ -184,6 +261,10 @@ def load_school_data() -> pd.DataFrame:
print(f"\nTotal records loaded: {len(result)}")
print(f"Unique schools: {result['urn'].nunique()}")
print(f"Years: {sorted(result['year'].unique())}")
# Note: Geocoding is done lazily when location search is used
# This keeps startup fast
return result
else:
print("No data files found. Creating empty DataFrame.")
@@ -194,3 +275,24 @@ def clear_cache():
"""Clear the data cache to force reload."""
load_school_data.cache_clear()
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the great circle distance between two points on Earth (in miles).
"""
from math import radians, cos, sin, asin, sqrt
# Convert to radians
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlat = lat2 - lat1
dlon = lon2 - lon1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
# Earth's radius in miles
r = 3956
return c * r