location search beta 1
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 1m3s
This commit is contained in:
@@ -8,7 +8,8 @@ import numpy as np
|
||||
from pathlib import Path
|
||||
from functools import lru_cache
|
||||
import re
|
||||
from typing import Optional
|
||||
import requests
|
||||
from typing import Optional, Dict, Tuple
|
||||
|
||||
from .config import settings
|
||||
from .schemas import (
|
||||
@@ -19,6 +20,78 @@ from .schemas import (
|
||||
LA_CODE_TO_NAME,
|
||||
)
|
||||
|
||||
# Cache for postcode geocoding
|
||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Remove invalid postcodes and deduplicate
|
||||
valid_postcodes = [p.strip().upper() for p in postcodes if p and isinstance(p, str) and len(p.strip()) >= 5]
|
||||
valid_postcodes = list(set(valid_postcodes))
|
||||
|
||||
if not valid_postcodes:
|
||||
return results
|
||||
|
||||
# postcodes.io allows max 100 postcodes per request
|
||||
batch_size = 100
|
||||
for i in range(0, len(valid_postcodes), batch_size):
|
||||
batch = valid_postcodes[i:i + batch_size]
|
||||
try:
|
||||
response = requests.post(
|
||||
'https://api.postcodes.io/postcodes',
|
||||
json={'postcodes': batch},
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get('result', []):
|
||||
if item and item.get('result'):
|
||||
pc = item['query'].upper()
|
||||
lat = item['result'].get('latitude')
|
||||
lon = item['result'].get('longitude')
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
|
||||
"""Geocode a single postcode using postcodes.io API."""
|
||||
if not postcode:
|
||||
return None
|
||||
|
||||
postcode = postcode.strip().upper()
|
||||
|
||||
# Check cache first
|
||||
if postcode in _postcode_cache:
|
||||
return _postcode_cache[postcode]
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
f'https://api.postcodes.io/postcodes/{postcode}',
|
||||
timeout=10
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('result'):
|
||||
lat = data['result'].get('latitude')
|
||||
lon = data['result'].get('longitude')
|
||||
if lat and lon:
|
||||
_postcode_cache[postcode] = (lat, lon)
|
||||
return (lat, lon)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
||||
"""Extract the end year from folder name like '2023-2024' -> 2024."""
|
||||
@@ -151,6 +224,10 @@ def load_year_data(year_folder: Path, year: int) -> Optional[pd.DataFrame]:
|
||||
if col in df.columns:
|
||||
df[col] = parse_numeric_vectorized(df[col])
|
||||
|
||||
# Initialize lat/long columns
|
||||
df['latitude'] = None
|
||||
df['longitude'] = None
|
||||
|
||||
print(f" Loaded {len(df)} schools for year {year}")
|
||||
return df
|
||||
|
||||
@@ -184,6 +261,10 @@ def load_school_data() -> pd.DataFrame:
|
||||
print(f"\nTotal records loaded: {len(result)}")
|
||||
print(f"Unique schools: {result['urn'].nunique()}")
|
||||
print(f"Years: {sorted(result['year'].unique())}")
|
||||
|
||||
# Note: Geocoding is done lazily when location search is used
|
||||
# This keeps startup fast
|
||||
|
||||
return result
|
||||
else:
|
||||
print("No data files found. Creating empty DataFrame.")
|
||||
@@ -194,3 +275,24 @@ def clear_cache():
|
||||
"""Clear the data cache to force reload."""
|
||||
load_school_data.cache_clear()
|
||||
|
||||
|
||||
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""
|
||||
Calculate the great circle distance between two points on Earth (in miles).
|
||||
"""
|
||||
from math import radians, cos, sin, asin, sqrt
|
||||
|
||||
# Convert to radians
|
||||
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
||||
|
||||
# Haversine formula
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
||||
c = 2 * asin(sqrt(a))
|
||||
|
||||
# Earth's radius in miles
|
||||
r = 3956
|
||||
|
||||
return c * r
|
||||
|
||||
|
||||
Reference in New Issue
Block a user