Introducing Postgresql for persistance
Some checks failed
Build and Push Docker Image / build-and-push (push) Failing after 32s

This commit is contained in:
Tudor Sitaru
2026-01-06 17:15:43 +00:00
parent bd3640d50f
commit 52fbade30c
6 changed files with 492 additions and 213 deletions

View File

@@ -14,17 +14,27 @@ from typing import Optional
from .config import settings
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
from .data_loader import load_school_data, clear_cache, geocode_single_postcode, geocode_postcodes_bulk, haversine_distance
from .data_loader import (
load_school_data, clear_cache, geocode_single_postcode,
geocode_postcodes_bulk, haversine_distance, get_data_info as get_db_info
)
from .database import init_db
from .utils import clean_for_json
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan - startup and shutdown events."""
# Startup: pre-load data
print("Starting up: Loading school data...")
load_school_data()
print("Data loaded successfully.")
# Startup: initialize database and pre-load data
print("Starting up: Initializing database...")
init_db() # Ensure tables exist
print("Loading school data from database...")
df = load_school_data()
if df.empty:
print("Warning: No data in database. Run the migration script to import data.")
else:
print("Data loaded successfully.")
yield # Application runs here
@@ -325,13 +335,24 @@ async def get_rankings(
@app.get("/api/data-info")
async def get_data_info():
"""Get information about loaded data."""
# Get info directly from database
db_info = get_db_info()
if db_info["total_schools"] == 0:
return {
"status": "no_data",
"message": "No data in database. Run the migration script: python scripts/migrate_csv_to_db.py",
"data_source": "PostgreSQL",
}
# Also get DataFrame-based stats for backwards compatibility
df = load_school_data()
if df.empty:
return {
"status": "no_data",
"message": "No data files found in data folder. Please download KS2 data from the government website.",
"data_folder": str(settings.data_dir),
"message": "No data available",
"data_source": "PostgreSQL",
}
years = [int(y) for y in sorted(df["year"].unique())]
@@ -340,6 +361,7 @@ async def get_data_info():
return {
"status": "loaded",
"data_source": "PostgreSQL",
"total_records": int(len(df)),
"unique_schools": int(df["urn"].nunique()),
"years_available": years,