moving geocoding to a background task
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 57s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 57s
This commit is contained in:
@@ -21,10 +21,9 @@ from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from .config import settings
|
||||
from .data_loader import (
|
||||
clear_cache,
|
||||
geocode_postcodes_bulk,
|
||||
geocode_single_postcode,
|
||||
haversine_distance,
|
||||
load_school_data,
|
||||
geocode_single_postcode,
|
||||
)
|
||||
from .data_loader import get_data_info as get_db_info
|
||||
from .database import init_db
|
||||
@@ -256,7 +255,7 @@ async def get_schools(
|
||||
]
|
||||
schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"])
|
||||
|
||||
# Location-based search
|
||||
# Location-based search (uses pre-geocoded data from database)
|
||||
search_coords = None
|
||||
if postcode:
|
||||
coords = geocode_single_postcode(postcode)
|
||||
@@ -264,24 +263,7 @@ async def get_schools(
|
||||
search_coords = coords
|
||||
schools_df = schools_df.copy()
|
||||
|
||||
# Geocode school postcodes on-demand if not already cached
|
||||
if "postcode" in schools_df.columns:
|
||||
unique_postcodes = schools_df["postcode"].dropna().unique().tolist()
|
||||
geocoded = geocode_postcodes_bulk(unique_postcodes)
|
||||
|
||||
# Add lat/long from geocoded data
|
||||
schools_df["latitude"] = schools_df["postcode"].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[0]
|
||||
if pd.notna(pc)
|
||||
else None
|
||||
)
|
||||
schools_df["longitude"] = schools_df["postcode"].apply(
|
||||
lambda pc: geocoded.get(str(pc).strip().upper(), (None, None))[1]
|
||||
if pd.notna(pc)
|
||||
else None
|
||||
)
|
||||
|
||||
# Filter by distance
|
||||
# Filter by distance using pre-geocoded lat/long from database
|
||||
def calc_distance(row):
|
||||
if pd.isna(row.get("latitude")) or pd.isna(row.get("longitude")):
|
||||
return float("inf")
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
"""
|
||||
Data loading module that queries from PostgreSQL database.
|
||||
Provides efficient queries with caching and lazy loading.
|
||||
|
||||
Note: School geocoding is handled by a separate cron job (scripts/geocode_schools.py).
|
||||
Only user search postcodes are geocoded on-demand via geocode_single_postcode().
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
@@ -15,58 +18,10 @@ from .config import settings
|
||||
from .database import SessionLocal, get_db_session
|
||||
from .models import School, SchoolResult
|
||||
|
||||
# Cache for postcode geocoding
|
||||
# Cache for user search postcode geocoding (not for school data)
|
||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Check cache first
|
||||
uncached = []
|
||||
for pc in postcodes:
|
||||
if pc and isinstance(pc, str):
|
||||
pc_upper = pc.strip().upper()
|
||||
if pc_upper in _postcode_cache:
|
||||
results[pc_upper] = _postcode_cache[pc_upper]
|
||||
elif len(pc_upper) >= 5:
|
||||
uncached.append(pc_upper)
|
||||
|
||||
if not uncached:
|
||||
return results
|
||||
|
||||
uncached = list(set(uncached))
|
||||
|
||||
# postcodes.io allows max 100 postcodes per request
|
||||
batch_size = 100
|
||||
for i in range(0, len(uncached), batch_size):
|
||||
batch = uncached[i:i + batch_size]
|
||||
try:
|
||||
response = requests.post(
|
||||
'https://api.postcodes.io/postcodes',
|
||||
json={'postcodes': batch},
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get('result', []):
|
||||
if item and item.get('result'):
|
||||
pc = item['query'].upper()
|
||||
lat = item['result'].get('latitude')
|
||||
lon = item['result'].get('longitude')
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
_postcode_cache[pc] = (lat, lon)
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def geocode_single_postcode(postcode: str) -> Optional[Tuple[float, float]]:
|
||||
"""Geocode a single postcode using postcodes.io API."""
|
||||
if not postcode:
|
||||
|
||||
Reference in New Issue
Block a user