diff --git a/backend/app.py b/backend/app.py index 45ec99a..49d368b 100644 --- a/backend/app.py +++ b/backend/app.py @@ -24,6 +24,7 @@ from .config import settings from .data_loader import ( clear_cache, load_school_data, + load_latest_school_data, geocode_single_postcode, get_supplementary_data, search_schools_typesense, @@ -223,6 +224,8 @@ async def lifespan(app: FastAPI): print("Warning: No data in marts. Run the annual EES pipeline to populate KS2 data.") else: print(f"Data loaded successfully: {len(df)} records.") + # Pre-compute the latest-year snapshot so the first search request is fast + await asyncio.to_thread(load_latest_school_data) try: _sitemap_xml = build_sitemap() n = _sitemap_xml.count("") @@ -321,44 +324,17 @@ async def get_schools( phase = sanitize_search_input(phase) postcode = validate_postcode(postcode) - df = load_school_data() + # Load the pre-computed latest-year snapshot (cached after first request / startup). + # This avoids rebuilding the expensive groupby + prev-year merge on every search. + df_latest = load_latest_school_data() - if df.empty: + if df_latest.empty: return {"schools": [], "total": 0, "page": page, "page_size": 0} # Use configured default if not specified if page_size is None: page_size = settings.default_page_size - # Schools with no performance data (special schools, PRUs, newly opened, etc.) - # have NULL year from the LEFT JOIN — keep them but skip the groupby/trend logic. - df_no_perf = df[df["year"].isna()].drop_duplicates(subset=["urn"]) - df = df[df["year"].notna()] - - # Get unique schools (latest year data for each) - latest_year = df.groupby("urn")["year"].max().reset_index() - df_latest = df.merge(latest_year, on=["urn", "year"]) - - # Calculate trend by comparing to previous year - # Get second-latest year for each school - df_sorted = df.sort_values(["urn", "year"], ascending=[True, False]) - df_prev = df_sorted.groupby("urn").nth(1).reset_index() - if not df_prev.empty and "rwm_expected_pct" in df_prev.columns: - prev_rwm = df_prev[["urn", "rwm_expected_pct"]].rename( - columns={"rwm_expected_pct": "prev_rwm_expected_pct"} - ) - if "attainment_8_score" in df_prev.columns: - prev_rwm = prev_rwm.merge( - df_prev[["urn", "attainment_8_score"]].rename( - columns={"attainment_8_score": "prev_attainment_8_score"} - ), - on="urn", how="outer" - ) - df_latest = df_latest.merge(prev_rwm, on="urn", how="left") - - # Merge back schools with no performance data - df_latest = pd.concat([df_latest, df_no_perf], ignore_index=True) - # Phase filter — uses PHASE_GROUPS so all-through/middle schools appear # in the correct phase(s) rather than being invisible to both filters. if phase: @@ -404,7 +380,8 @@ async def get_schools( # Location-based search (uses pre-geocoded data from database) search_coords = None if postcode: - coords = geocode_single_postcode(postcode) + # Offload the synchronous HTTP call to a thread so the event loop stays free + coords = await asyncio.to_thread(geocode_single_postcode, postcode) if coords: search_coords = coords schools_df = schools_df.copy() @@ -907,7 +884,8 @@ async def reload_data( Requires X-API-Key header with valid admin API key. """ clear_cache() - load_school_data() + await asyncio.to_thread(load_school_data) + await asyncio.to_thread(load_latest_school_data) return {"status": "reloaded"} diff --git a/backend/data_loader.py b/backend/data_loader.py index 6a8c341..41f698d 100644 --- a/backend/data_loader.py +++ b/backend/data_loader.py @@ -242,6 +242,8 @@ def load_school_data_as_dataframe() -> pd.DataFrame: # Cache for DataFrame _df_cache: Optional[pd.DataFrame] = None +# Pre-computed latest-year snapshot (one row per school, with prev-year trend columns) +_df_latest_cache: Optional[pd.DataFrame] = None def load_school_data() -> pd.DataFrame: @@ -260,10 +262,60 @@ def load_school_data() -> pd.DataFrame: return _df_cache +def load_latest_school_data() -> pd.DataFrame: + """Return a cached one-row-per-school DataFrame at the latest available year. + + The expensive groupby / merge / prev-year trend computation runs once at + startup (or after a cache clear) rather than on every search request. + Per-request filters (phase, gender, LA …) should be applied to the returned + DataFrame's copy; they must NOT modify the cached object. + """ + global _df_latest_cache + if _df_latest_cache is not None: + return _df_latest_cache + + df = load_school_data() + if df.empty: + return df + + # Schools that have no performance rows (PRUs, new schools, etc.) + df_no_perf = df[df["year"].isna()].drop_duplicates(subset=["urn"]) + df_with_perf = df[df["year"].notna()] + + # Reduce to the latest year per school + latest_year = df_with_perf.groupby("urn")["year"].max().reset_index() + df_latest = df_with_perf.merge(latest_year, on=["urn", "year"]) + + # Attach previous-year metrics for trend arrows (second-latest year per school) + df_sorted = df_with_perf.sort_values(["urn", "year"], ascending=[True, False]) + df_prev = df_sorted.groupby("urn").nth(1).reset_index() + if not df_prev.empty and "rwm_expected_pct" in df_prev.columns: + prev_rwm = df_prev[["urn", "rwm_expected_pct"]].rename( + columns={"rwm_expected_pct": "prev_rwm_expected_pct"} + ) + if "attainment_8_score" in df_prev.columns: + prev_rwm = prev_rwm.merge( + df_prev[["urn", "attainment_8_score"]].rename( + columns={"attainment_8_score": "prev_attainment_8_score"} + ), + on="urn", + how="outer", + ) + df_latest = df_latest.merge(prev_rwm, on="urn", how="left") + + # Merge back schools with no performance data + df_latest = pd.concat([df_latest, df_no_perf], ignore_index=True) + + print(f"Latest-snapshot cache built: {len(df_latest)} schools") + _df_latest_cache = df_latest + return _df_latest_cache + + def clear_cache(): """Clear all caches.""" - global _df_cache + global _df_cache, _df_latest_cache _df_cache = None + _df_latest_cache = None # ============================================================================= diff --git a/backend/database.py b/backend/database.py index e5ee0ad..435cac7 100644 --- a/backend/database.py +++ b/backend/database.py @@ -15,6 +15,7 @@ engine = create_engine( pool_size=10, max_overflow=20, pool_pre_ping=True, + pool_recycle=1800, # recycle connections every 30 min to avoid stale TCP echo=False, ) @@ -34,13 +35,9 @@ def get_db(): @contextmanager def get_db_session(): - """Context manager for non-FastAPI contexts.""" + """Context manager for non-FastAPI contexts (read-only).""" db = SessionLocal() try: yield db - db.commit() - except Exception: - db.rollback() - raise finally: db.close() diff --git a/nextjs-app/components/ComparisonChart.tsx b/nextjs-app/components/ComparisonChart.tsx index 6bb7521..3fa9af1 100644 --- a/nextjs-app/components/ComparisonChart.tsx +++ b/nextjs-app/components/ComparisonChart.tsx @@ -6,31 +6,11 @@ 'use client'; import { Line } from 'react-chartjs-2'; -import { - Chart as ChartJS, - CategoryScale, - LinearScale, - PointElement, - LineElement, - Title, - Tooltip, - Legend, - ChartOptions, -} from 'chart.js'; +import { ChartOptions } from 'chart.js'; +import '@/lib/chartSetup'; import type { ComparisonData } from '@/lib/types'; import { CHART_COLORS, formatAcademicYear } from '@/lib/utils'; -// Register Chart.js components -ChartJS.register( - CategoryScale, - LinearScale, - PointElement, - LineElement, - Title, - Tooltip, - Legend -); - interface ComparisonChartProps { comparisonData: Record; metric: string; diff --git a/nextjs-app/components/HomeView.tsx b/nextjs-app/components/HomeView.tsx index 3a017f7..5de09c1 100644 --- a/nextjs-app/components/HomeView.tsx +++ b/nextjs-app/components/HomeView.tsx @@ -74,6 +74,7 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp const [mapSchools, setMapSchools] = useState([]); const [isLoadingMap, setIsLoadingMap] = useState(false); const prevSearchParamsRef = useRef(searchParams.toString()); + const mapParamsRef = useRef(''); const [geoState, setGeoState] = useState<'idle' | 'requesting' | 'error'>('idle'); const [geoError, setGeoError] = useState(null); const [chipDays, setChipDays] = useState<(number | null)[]>(ADMISSIONS_CHIPS.map(() => null)); @@ -88,11 +89,12 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp || (!currentPhase && secondaryCount > primaryCount); const isMixedView = primaryCount > 0 && secondaryCount > 0 && !currentPhase; - // Reset pagination state when search params change + // Reset pagination and map cache when search params change useEffect(() => { const newParamsStr = searchParams.toString(); if (newParamsStr !== prevSearchParamsRef.current) { prevSearchParamsRef.current = newParamsStr; + mapParamsRef.current = ''; // allow map to re-fetch for new search setAllSchools(initialSchools.schools); setCurrentPage(initialSchools.page); setHasMore(initialSchools.total_pages > 1); @@ -105,9 +107,13 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp setSelectedMapSchool(null); }, [resultsView, searchParams]); - // Fetch all schools within radius when map view is active + // Fetch all schools within radius when map view is active. + // Guard with a ref so toggling back to map never re-fetches the same params. useEffect(() => { if (resultsView !== 'map' || !isLocationSearch) return; + const paramsKey = searchParams.toString(); + if (paramsKey === mapParamsRef.current) return; + mapParamsRef.current = paramsKey; setIsLoadingMap(true); const params: Record = {}; searchParams.forEach((value, key) => { params[key] = value; }); diff --git a/nextjs-app/components/PerformanceChart.tsx b/nextjs-app/components/PerformanceChart.tsx index 4ca0a0f..9e67498 100644 --- a/nextjs-app/components/PerformanceChart.tsx +++ b/nextjs-app/components/PerformanceChart.tsx @@ -6,24 +6,12 @@ 'use client'; import { Line } from 'react-chartjs-2'; -import { - Chart as ChartJS, - CategoryScale, - LinearScale, - PointElement, - LineElement, - Title, - Tooltip, - Legend, - ChartOptions, - ChartDataset, -} from 'chart.js'; +import { ChartOptions, ChartDataset } from 'chart.js'; +import '@/lib/chartSetup'; import type { SchoolResult } from '@/lib/types'; import { formatAcademicYear } from '@/lib/utils'; import styles from './PerformanceChart.module.css'; -ChartJS.register(CategoryScale, LinearScale, PointElement, LineElement, Title, Tooltip, Legend); - interface NationalByYear { year: number; primary: Record; diff --git a/nextjs-app/lib/chartSetup.ts b/nextjs-app/lib/chartSetup.ts new file mode 100644 index 0000000..5d801f5 --- /dev/null +++ b/nextjs-app/lib/chartSetup.ts @@ -0,0 +1,28 @@ +/** + * Single Chart.js registration point. + * Import this module (side-effect import) in any file that uses react-chartjs-2. + * Chart.js is idempotent about duplicate registrations, but importing the + * registration from one place ensures every primitive is only bundled once + * and the register() call is not duplicated at runtime. + */ + +import { + Chart as ChartJS, + CategoryScale, + LinearScale, + PointElement, + LineElement, + Title, + Tooltip, + Legend, +} from 'chart.js'; + +ChartJS.register( + CategoryScale, + LinearScale, + PointElement, + LineElement, + Title, + Tooltip, + Legend, +);