moving geocoding to a background task
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 57s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 57s
This commit is contained in:
184
scripts/geocode_schools.py
Executable file
184
scripts/geocode_schools.py
Executable file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Geocode all school postcodes and update the database.
|
||||
|
||||
This script should be run as a weekly cron job to ensure all schools
|
||||
have up-to-date latitude/longitude coordinates.
|
||||
|
||||
Usage:
|
||||
python scripts/geocode_schools.py [--force]
|
||||
|
||||
Options:
|
||||
--force Re-geocode all postcodes, even if already geocoded
|
||||
|
||||
Crontab example (run every Sunday at 2am):
|
||||
0 2 * * 0 cd /path/to/school_compare && /path/to/venv/bin/python scripts/geocode_schools.py >> /var/log/geocode_schools.log 2>&1
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from backend.database import SessionLocal
|
||||
from backend.models import School
|
||||
|
||||
|
||||
def geocode_postcodes_bulk(postcodes: list) -> Dict[str, Tuple[float, float]]:
|
||||
"""
|
||||
Geocode postcodes in bulk using postcodes.io API.
|
||||
Returns dict of postcode -> (latitude, longitude).
|
||||
"""
|
||||
results = {}
|
||||
valid_postcodes = [
|
||||
p.strip().upper()
|
||||
for p in postcodes
|
||||
if p and isinstance(p, str) and len(p.strip()) >= 5
|
||||
]
|
||||
valid_postcodes = list(set(valid_postcodes))
|
||||
|
||||
if not valid_postcodes:
|
||||
return results
|
||||
|
||||
batch_size = 100
|
||||
total_batches = (len(valid_postcodes) + batch_size - 1) // batch_size
|
||||
|
||||
for i, batch_start in enumerate(range(0, len(valid_postcodes), batch_size)):
|
||||
batch = valid_postcodes[batch_start : batch_start + batch_size]
|
||||
print(f" Geocoding batch {i + 1}/{total_batches} ({len(batch)} postcodes)...")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.postcodes.io/postcodes",
|
||||
json={"postcodes": batch},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for item in data.get("result", []):
|
||||
if item and item.get("result"):
|
||||
pc = item["query"].upper()
|
||||
lat = item["result"].get("latitude")
|
||||
lon = item["result"].get("longitude")
|
||||
if lat and lon:
|
||||
results[pc] = (lat, lon)
|
||||
else:
|
||||
print(f" Warning: API returned status {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f" Warning: Geocoding batch failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def geocode_schools(force: bool = False) -> None:
|
||||
"""
|
||||
Geocode all schools in the database.
|
||||
|
||||
Args:
|
||||
force: If True, re-geocode all postcodes even if already geocoded
|
||||
"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"School Geocoding Job - {datetime.now().isoformat()}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
# Get schools that need geocoding
|
||||
if force:
|
||||
schools = db.query(School).filter(School.postcode.isnot(None)).all()
|
||||
print(f"Force mode: Processing all {len(schools)} schools with postcodes")
|
||||
else:
|
||||
schools = db.query(School).filter(
|
||||
School.postcode.isnot(None),
|
||||
(School.latitude.is_(None)) | (School.longitude.is_(None))
|
||||
).all()
|
||||
print(f"Found {len(schools)} schools without coordinates")
|
||||
|
||||
if not schools:
|
||||
print("No schools to geocode. Exiting.")
|
||||
return
|
||||
|
||||
# Extract unique postcodes
|
||||
postcodes = list(set(
|
||||
s.postcode.strip().upper()
|
||||
for s in schools
|
||||
if s.postcode
|
||||
))
|
||||
print(f"Unique postcodes to geocode: {len(postcodes)}")
|
||||
|
||||
# Geocode in bulk
|
||||
print("\nGeocoding postcodes...")
|
||||
geocoded = geocode_postcodes_bulk(postcodes)
|
||||
print(f"Successfully geocoded: {len(geocoded)} postcodes")
|
||||
|
||||
# Update database
|
||||
print("\nUpdating database...")
|
||||
updated_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for school in schools:
|
||||
if not school.postcode:
|
||||
continue
|
||||
|
||||
pc_upper = school.postcode.strip().upper()
|
||||
coords = geocoded.get(pc_upper)
|
||||
|
||||
if coords:
|
||||
school.latitude = coords[0]
|
||||
school.longitude = coords[1]
|
||||
updated_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" - Updated: {updated_count} schools")
|
||||
print(f" - Failed (invalid/not found): {failed_count} postcodes")
|
||||
|
||||
# Summary stats
|
||||
total_with_coords = db.query(School).filter(
|
||||
School.latitude.isnot(None),
|
||||
School.longitude.isnot(None)
|
||||
).count()
|
||||
total_schools = db.query(School).count()
|
||||
|
||||
print(f"\nDatabase summary:")
|
||||
print(f" - Total schools: {total_schools}")
|
||||
print(f" - Schools with coordinates: {total_with_coords}")
|
||||
print(f" - Coverage: {100*total_with_coords/total_schools:.1f}%")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during geocoding: {e}")
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Geocoding job completed - {datetime.now().isoformat()}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Geocode school postcodes and update database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-geocode all postcodes, even if already geocoded"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
geocode_schools(force=args.force)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user