perf: resolve all P1–P5 performance issues from code review
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 21s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 50s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 12s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 21s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 50s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 12s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
P1 (backend/data_loader.py): Add load_latest_school_data() which pre-computes the one-row-per-school latest-year snapshot (groupby, prev-year trend merge) once at startup instead of on every /api/schools request. get_schools route now starts from the cached snapshot rather than rebuilding it. S3 (backend/app.py): Wrap synchronous geocode_single_postcode() call in asyncio.to_thread() so postcode lookups no longer block the uvicorn event loop. Admin reload endpoint also uses to_thread for both cache primes. P2 (nextjs-app/components/HomeView.tsx): Add mapParamsRef guard so switching back to map view does not re-fetch 500 schools when search params haven't changed. Reset ref on new searches so fresh data is always fetched. P3 (nextjs-app/lib/chartSetup.ts): Extract Chart.js registration into a shared side-effect module. ComparisonChart and PerformanceChart now import it instead of each calling ChartJS.register() independently. P4 (backend/database.py): Remove unnecessary db.commit() from the read-only get_db_session() context manager — saves a DB round-trip on every request. P5 (backend/database.py): Add pool_recycle=1800 to SQLAlchemy engine to prevent stale TCP connections from accumulating in long-running processes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+11
-33
@@ -24,6 +24,7 @@ from .config import settings
|
|||||||
from .data_loader import (
|
from .data_loader import (
|
||||||
clear_cache,
|
clear_cache,
|
||||||
load_school_data,
|
load_school_data,
|
||||||
|
load_latest_school_data,
|
||||||
geocode_single_postcode,
|
geocode_single_postcode,
|
||||||
get_supplementary_data,
|
get_supplementary_data,
|
||||||
search_schools_typesense,
|
search_schools_typesense,
|
||||||
@@ -223,6 +224,8 @@ async def lifespan(app: FastAPI):
|
|||||||
print("Warning: No data in marts. Run the annual EES pipeline to populate KS2 data.")
|
print("Warning: No data in marts. Run the annual EES pipeline to populate KS2 data.")
|
||||||
else:
|
else:
|
||||||
print(f"Data loaded successfully: {len(df)} records.")
|
print(f"Data loaded successfully: {len(df)} records.")
|
||||||
|
# Pre-compute the latest-year snapshot so the first search request is fast
|
||||||
|
await asyncio.to_thread(load_latest_school_data)
|
||||||
try:
|
try:
|
||||||
_sitemap_xml = build_sitemap()
|
_sitemap_xml = build_sitemap()
|
||||||
n = _sitemap_xml.count("<url>")
|
n = _sitemap_xml.count("<url>")
|
||||||
@@ -321,44 +324,17 @@ async def get_schools(
|
|||||||
phase = sanitize_search_input(phase)
|
phase = sanitize_search_input(phase)
|
||||||
postcode = validate_postcode(postcode)
|
postcode = validate_postcode(postcode)
|
||||||
|
|
||||||
df = load_school_data()
|
# Load the pre-computed latest-year snapshot (cached after first request / startup).
|
||||||
|
# This avoids rebuilding the expensive groupby + prev-year merge on every search.
|
||||||
|
df_latest = load_latest_school_data()
|
||||||
|
|
||||||
if df.empty:
|
if df_latest.empty:
|
||||||
return {"schools": [], "total": 0, "page": page, "page_size": 0}
|
return {"schools": [], "total": 0, "page": page, "page_size": 0}
|
||||||
|
|
||||||
# Use configured default if not specified
|
# Use configured default if not specified
|
||||||
if page_size is None:
|
if page_size is None:
|
||||||
page_size = settings.default_page_size
|
page_size = settings.default_page_size
|
||||||
|
|
||||||
# Schools with no performance data (special schools, PRUs, newly opened, etc.)
|
|
||||||
# have NULL year from the LEFT JOIN — keep them but skip the groupby/trend logic.
|
|
||||||
df_no_perf = df[df["year"].isna()].drop_duplicates(subset=["urn"])
|
|
||||||
df = df[df["year"].notna()]
|
|
||||||
|
|
||||||
# Get unique schools (latest year data for each)
|
|
||||||
latest_year = df.groupby("urn")["year"].max().reset_index()
|
|
||||||
df_latest = df.merge(latest_year, on=["urn", "year"])
|
|
||||||
|
|
||||||
# Calculate trend by comparing to previous year
|
|
||||||
# Get second-latest year for each school
|
|
||||||
df_sorted = df.sort_values(["urn", "year"], ascending=[True, False])
|
|
||||||
df_prev = df_sorted.groupby("urn").nth(1).reset_index()
|
|
||||||
if not df_prev.empty and "rwm_expected_pct" in df_prev.columns:
|
|
||||||
prev_rwm = df_prev[["urn", "rwm_expected_pct"]].rename(
|
|
||||||
columns={"rwm_expected_pct": "prev_rwm_expected_pct"}
|
|
||||||
)
|
|
||||||
if "attainment_8_score" in df_prev.columns:
|
|
||||||
prev_rwm = prev_rwm.merge(
|
|
||||||
df_prev[["urn", "attainment_8_score"]].rename(
|
|
||||||
columns={"attainment_8_score": "prev_attainment_8_score"}
|
|
||||||
),
|
|
||||||
on="urn", how="outer"
|
|
||||||
)
|
|
||||||
df_latest = df_latest.merge(prev_rwm, on="urn", how="left")
|
|
||||||
|
|
||||||
# Merge back schools with no performance data
|
|
||||||
df_latest = pd.concat([df_latest, df_no_perf], ignore_index=True)
|
|
||||||
|
|
||||||
# Phase filter — uses PHASE_GROUPS so all-through/middle schools appear
|
# Phase filter — uses PHASE_GROUPS so all-through/middle schools appear
|
||||||
# in the correct phase(s) rather than being invisible to both filters.
|
# in the correct phase(s) rather than being invisible to both filters.
|
||||||
if phase:
|
if phase:
|
||||||
@@ -404,7 +380,8 @@ async def get_schools(
|
|||||||
# Location-based search (uses pre-geocoded data from database)
|
# Location-based search (uses pre-geocoded data from database)
|
||||||
search_coords = None
|
search_coords = None
|
||||||
if postcode:
|
if postcode:
|
||||||
coords = geocode_single_postcode(postcode)
|
# Offload the synchronous HTTP call to a thread so the event loop stays free
|
||||||
|
coords = await asyncio.to_thread(geocode_single_postcode, postcode)
|
||||||
if coords:
|
if coords:
|
||||||
search_coords = coords
|
search_coords = coords
|
||||||
schools_df = schools_df.copy()
|
schools_df = schools_df.copy()
|
||||||
@@ -907,7 +884,8 @@ async def reload_data(
|
|||||||
Requires X-API-Key header with valid admin API key.
|
Requires X-API-Key header with valid admin API key.
|
||||||
"""
|
"""
|
||||||
clear_cache()
|
clear_cache()
|
||||||
load_school_data()
|
await asyncio.to_thread(load_school_data)
|
||||||
|
await asyncio.to_thread(load_latest_school_data)
|
||||||
return {"status": "reloaded"}
|
return {"status": "reloaded"}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+53
-1
@@ -242,6 +242,8 @@ def load_school_data_as_dataframe() -> pd.DataFrame:
|
|||||||
|
|
||||||
# Cache for DataFrame
|
# Cache for DataFrame
|
||||||
_df_cache: Optional[pd.DataFrame] = None
|
_df_cache: Optional[pd.DataFrame] = None
|
||||||
|
# Pre-computed latest-year snapshot (one row per school, with prev-year trend columns)
|
||||||
|
_df_latest_cache: Optional[pd.DataFrame] = None
|
||||||
|
|
||||||
|
|
||||||
def load_school_data() -> pd.DataFrame:
|
def load_school_data() -> pd.DataFrame:
|
||||||
@@ -260,10 +262,60 @@ def load_school_data() -> pd.DataFrame:
|
|||||||
return _df_cache
|
return _df_cache
|
||||||
|
|
||||||
|
|
||||||
|
def load_latest_school_data() -> pd.DataFrame:
|
||||||
|
"""Return a cached one-row-per-school DataFrame at the latest available year.
|
||||||
|
|
||||||
|
The expensive groupby / merge / prev-year trend computation runs once at
|
||||||
|
startup (or after a cache clear) rather than on every search request.
|
||||||
|
Per-request filters (phase, gender, LA …) should be applied to the returned
|
||||||
|
DataFrame's copy; they must NOT modify the cached object.
|
||||||
|
"""
|
||||||
|
global _df_latest_cache
|
||||||
|
if _df_latest_cache is not None:
|
||||||
|
return _df_latest_cache
|
||||||
|
|
||||||
|
df = load_school_data()
|
||||||
|
if df.empty:
|
||||||
|
return df
|
||||||
|
|
||||||
|
# Schools that have no performance rows (PRUs, new schools, etc.)
|
||||||
|
df_no_perf = df[df["year"].isna()].drop_duplicates(subset=["urn"])
|
||||||
|
df_with_perf = df[df["year"].notna()]
|
||||||
|
|
||||||
|
# Reduce to the latest year per school
|
||||||
|
latest_year = df_with_perf.groupby("urn")["year"].max().reset_index()
|
||||||
|
df_latest = df_with_perf.merge(latest_year, on=["urn", "year"])
|
||||||
|
|
||||||
|
# Attach previous-year metrics for trend arrows (second-latest year per school)
|
||||||
|
df_sorted = df_with_perf.sort_values(["urn", "year"], ascending=[True, False])
|
||||||
|
df_prev = df_sorted.groupby("urn").nth(1).reset_index()
|
||||||
|
if not df_prev.empty and "rwm_expected_pct" in df_prev.columns:
|
||||||
|
prev_rwm = df_prev[["urn", "rwm_expected_pct"]].rename(
|
||||||
|
columns={"rwm_expected_pct": "prev_rwm_expected_pct"}
|
||||||
|
)
|
||||||
|
if "attainment_8_score" in df_prev.columns:
|
||||||
|
prev_rwm = prev_rwm.merge(
|
||||||
|
df_prev[["urn", "attainment_8_score"]].rename(
|
||||||
|
columns={"attainment_8_score": "prev_attainment_8_score"}
|
||||||
|
),
|
||||||
|
on="urn",
|
||||||
|
how="outer",
|
||||||
|
)
|
||||||
|
df_latest = df_latest.merge(prev_rwm, on="urn", how="left")
|
||||||
|
|
||||||
|
# Merge back schools with no performance data
|
||||||
|
df_latest = pd.concat([df_latest, df_no_perf], ignore_index=True)
|
||||||
|
|
||||||
|
print(f"Latest-snapshot cache built: {len(df_latest)} schools")
|
||||||
|
_df_latest_cache = df_latest
|
||||||
|
return _df_latest_cache
|
||||||
|
|
||||||
|
|
||||||
def clear_cache():
|
def clear_cache():
|
||||||
"""Clear all caches."""
|
"""Clear all caches."""
|
||||||
global _df_cache
|
global _df_cache, _df_latest_cache
|
||||||
_df_cache = None
|
_df_cache = None
|
||||||
|
_df_latest_cache = None
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
+2
-5
@@ -15,6 +15,7 @@ engine = create_engine(
|
|||||||
pool_size=10,
|
pool_size=10,
|
||||||
max_overflow=20,
|
max_overflow=20,
|
||||||
pool_pre_ping=True,
|
pool_pre_ping=True,
|
||||||
|
pool_recycle=1800, # recycle connections every 30 min to avoid stale TCP
|
||||||
echo=False,
|
echo=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -34,13 +35,9 @@ def get_db():
|
|||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def get_db_session():
|
def get_db_session():
|
||||||
"""Context manager for non-FastAPI contexts."""
|
"""Context manager for non-FastAPI contexts (read-only)."""
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
yield db
|
yield db
|
||||||
db.commit()
|
|
||||||
except Exception:
|
|
||||||
db.rollback()
|
|
||||||
raise
|
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|||||||
@@ -6,31 +6,11 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { Line } from 'react-chartjs-2';
|
import { Line } from 'react-chartjs-2';
|
||||||
import {
|
import { ChartOptions } from 'chart.js';
|
||||||
Chart as ChartJS,
|
import '@/lib/chartSetup';
|
||||||
CategoryScale,
|
|
||||||
LinearScale,
|
|
||||||
PointElement,
|
|
||||||
LineElement,
|
|
||||||
Title,
|
|
||||||
Tooltip,
|
|
||||||
Legend,
|
|
||||||
ChartOptions,
|
|
||||||
} from 'chart.js';
|
|
||||||
import type { ComparisonData } from '@/lib/types';
|
import type { ComparisonData } from '@/lib/types';
|
||||||
import { CHART_COLORS, formatAcademicYear } from '@/lib/utils';
|
import { CHART_COLORS, formatAcademicYear } from '@/lib/utils';
|
||||||
|
|
||||||
// Register Chart.js components
|
|
||||||
ChartJS.register(
|
|
||||||
CategoryScale,
|
|
||||||
LinearScale,
|
|
||||||
PointElement,
|
|
||||||
LineElement,
|
|
||||||
Title,
|
|
||||||
Tooltip,
|
|
||||||
Legend
|
|
||||||
);
|
|
||||||
|
|
||||||
interface ComparisonChartProps {
|
interface ComparisonChartProps {
|
||||||
comparisonData: Record<string, ComparisonData>;
|
comparisonData: Record<string, ComparisonData>;
|
||||||
metric: string;
|
metric: string;
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp
|
|||||||
const [mapSchools, setMapSchools] = useState<School[]>([]);
|
const [mapSchools, setMapSchools] = useState<School[]>([]);
|
||||||
const [isLoadingMap, setIsLoadingMap] = useState(false);
|
const [isLoadingMap, setIsLoadingMap] = useState(false);
|
||||||
const prevSearchParamsRef = useRef(searchParams.toString());
|
const prevSearchParamsRef = useRef(searchParams.toString());
|
||||||
|
const mapParamsRef = useRef<string>('');
|
||||||
const [geoState, setGeoState] = useState<'idle' | 'requesting' | 'error'>('idle');
|
const [geoState, setGeoState] = useState<'idle' | 'requesting' | 'error'>('idle');
|
||||||
const [geoError, setGeoError] = useState<string | null>(null);
|
const [geoError, setGeoError] = useState<string | null>(null);
|
||||||
const [chipDays, setChipDays] = useState<(number | null)[]>(ADMISSIONS_CHIPS.map(() => null));
|
const [chipDays, setChipDays] = useState<(number | null)[]>(ADMISSIONS_CHIPS.map(() => null));
|
||||||
@@ -88,11 +89,12 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp
|
|||||||
|| (!currentPhase && secondaryCount > primaryCount);
|
|| (!currentPhase && secondaryCount > primaryCount);
|
||||||
const isMixedView = primaryCount > 0 && secondaryCount > 0 && !currentPhase;
|
const isMixedView = primaryCount > 0 && secondaryCount > 0 && !currentPhase;
|
||||||
|
|
||||||
// Reset pagination state when search params change
|
// Reset pagination and map cache when search params change
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const newParamsStr = searchParams.toString();
|
const newParamsStr = searchParams.toString();
|
||||||
if (newParamsStr !== prevSearchParamsRef.current) {
|
if (newParamsStr !== prevSearchParamsRef.current) {
|
||||||
prevSearchParamsRef.current = newParamsStr;
|
prevSearchParamsRef.current = newParamsStr;
|
||||||
|
mapParamsRef.current = ''; // allow map to re-fetch for new search
|
||||||
setAllSchools(initialSchools.schools);
|
setAllSchools(initialSchools.schools);
|
||||||
setCurrentPage(initialSchools.page);
|
setCurrentPage(initialSchools.page);
|
||||||
setHasMore(initialSchools.total_pages > 1);
|
setHasMore(initialSchools.total_pages > 1);
|
||||||
@@ -105,9 +107,13 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp
|
|||||||
setSelectedMapSchool(null);
|
setSelectedMapSchool(null);
|
||||||
}, [resultsView, searchParams]);
|
}, [resultsView, searchParams]);
|
||||||
|
|
||||||
// Fetch all schools within radius when map view is active
|
// Fetch all schools within radius when map view is active.
|
||||||
|
// Guard with a ref so toggling back to map never re-fetches the same params.
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (resultsView !== 'map' || !isLocationSearch) return;
|
if (resultsView !== 'map' || !isLocationSearch) return;
|
||||||
|
const paramsKey = searchParams.toString();
|
||||||
|
if (paramsKey === mapParamsRef.current) return;
|
||||||
|
mapParamsRef.current = paramsKey;
|
||||||
setIsLoadingMap(true);
|
setIsLoadingMap(true);
|
||||||
const params: Record<string, any> = {};
|
const params: Record<string, any> = {};
|
||||||
searchParams.forEach((value, key) => { params[key] = value; });
|
searchParams.forEach((value, key) => { params[key] = value; });
|
||||||
|
|||||||
@@ -6,24 +6,12 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { Line } from 'react-chartjs-2';
|
import { Line } from 'react-chartjs-2';
|
||||||
import {
|
import { ChartOptions, ChartDataset } from 'chart.js';
|
||||||
Chart as ChartJS,
|
import '@/lib/chartSetup';
|
||||||
CategoryScale,
|
|
||||||
LinearScale,
|
|
||||||
PointElement,
|
|
||||||
LineElement,
|
|
||||||
Title,
|
|
||||||
Tooltip,
|
|
||||||
Legend,
|
|
||||||
ChartOptions,
|
|
||||||
ChartDataset,
|
|
||||||
} from 'chart.js';
|
|
||||||
import type { SchoolResult } from '@/lib/types';
|
import type { SchoolResult } from '@/lib/types';
|
||||||
import { formatAcademicYear } from '@/lib/utils';
|
import { formatAcademicYear } from '@/lib/utils';
|
||||||
import styles from './PerformanceChart.module.css';
|
import styles from './PerformanceChart.module.css';
|
||||||
|
|
||||||
ChartJS.register(CategoryScale, LinearScale, PointElement, LineElement, Title, Tooltip, Legend);
|
|
||||||
|
|
||||||
interface NationalByYear {
|
interface NationalByYear {
|
||||||
year: number;
|
year: number;
|
||||||
primary: Record<string, number>;
|
primary: Record<string, number>;
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
/**
|
||||||
|
* Single Chart.js registration point.
|
||||||
|
* Import this module (side-effect import) in any file that uses react-chartjs-2.
|
||||||
|
* Chart.js is idempotent about duplicate registrations, but importing the
|
||||||
|
* registration from one place ensures every primitive is only bundled once
|
||||||
|
* and the register() call is not duplicated at runtime.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
Chart as ChartJS,
|
||||||
|
CategoryScale,
|
||||||
|
LinearScale,
|
||||||
|
PointElement,
|
||||||
|
LineElement,
|
||||||
|
Title,
|
||||||
|
Tooltip,
|
||||||
|
Legend,
|
||||||
|
} from 'chart.js';
|
||||||
|
|
||||||
|
ChartJS.register(
|
||||||
|
CategoryScale,
|
||||||
|
LinearScale,
|
||||||
|
PointElement,
|
||||||
|
LineElement,
|
||||||
|
Title,
|
||||||
|
Tooltip,
|
||||||
|
Legend,
|
||||||
|
);
|
||||||
Reference in New Issue
Block a user