feat: add secondary school support with KS4 data and metric tooltips
Some checks failed
Build and Push Docker Images / Build Frontend (Next.js) (push) Has been cancelled
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Has been cancelled
Build and Push Docker Images / Trigger Portainer Update (push) Has been cancelled
Build and Push Docker Images / Build Backend (FastAPI) (push) Has been cancelled

- Backend: replace INNER JOIN ks2 with UNION ALL (ks2 + ks4) so primary
  and secondary schools both appear in the main DataFrame
- Backend: add /api/national-averages endpoint computing means from live
  data, replacing the hardcoded NATIONAL_AVG constant on the frontend
- Backend: add phase filter param to /api/schools; return phases from
  /api/filters; fix hardcoded "phase": "Primary" in school detail endpoint
- Backend: add KS4 metric definitions (Attainment 8, Progress 8, EBacc,
  English & Maths pass rates) to METRIC_DEFINITIONS and RANKING_COLUMNS
- Frontend: SchoolDetailView is now phase-aware — secondary schools show
  a GCSE Results section (Att8, P8, E&M, EBacc) instead of SATs; phonics
  tab hidden for secondary; admissions says Year 7 instead of Year 3;
  history table shows KS4 columns; chart datasets switch for secondary
- Frontend: new MetricTooltip component (CSS-only ⓘ icon) backed by
  METRIC_EXPLANATIONS — added to RWM, GPS, SEN, EAL, IDACI, progress
  scores and all KS4 metrics throughout SchoolDetailView and SchoolCard
- Frontend: METRIC_EXPLANATIONS extended with KS4 terms (Attainment 8,
  Progress 8, EBacc) and previously missing terms (SEN, EHCP, EAL, IDACI)
- Frontend: SchoolCard expands "RWM" to "Reading, Writing & Maths" and
  shows Attainment 8 / English & Maths Grade 4+ for secondary schools
- Frontend: FilterBar adds Phase dropdown (Primary / Secondary / All-through)
- Frontend: HomeView hero copy updated; compact list shows phase-aware metric
- Global metadata updated to remove "primary only" framing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 14:59:40 +00:00
parent b0990e30ee
commit 5eff9af69c
16 changed files with 903 additions and 187 deletions

View File

@@ -1,6 +1,6 @@
"""
SchoolCompare.co.uk API
Serves primary school (KS2) performance data for comparing schools.
Serves primary and secondary school performance data for comparing schools.
Uses real data from UK Government Compare School Performance downloads.
"""
@@ -151,7 +151,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(
title="SchoolCompare API",
description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk",
description="API for comparing primary and secondary school performance data - schoolcompare.co.uk",
version="2.0.0",
lifespan=lifespan,
# Disable docs in production for security
@@ -213,21 +213,23 @@ async def get_schools(
None, description="Filter by local authority", max_length=100
),
school_type: Optional[str] = Query(None, description="Filter by school type", max_length=100),
phase: Optional[str] = Query(None, description="Filter by phase: primary, secondary, all-through", max_length=50),
postcode: Optional[str] = Query(None, description="Search near postcode", max_length=10),
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
page: int = Query(1, ge=1, le=1000, description="Page number"),
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
):
"""
Get list of unique primary schools with pagination.
Get list of schools with pagination.
Returns paginated results with total count for efficient loading.
Supports location-based search using postcode.
Supports location-based search using postcode and phase filtering.
"""
# Sanitize inputs
search = sanitize_search_input(search)
local_authority = sanitize_search_input(local_authority)
school_type = sanitize_search_input(school_type)
phase = sanitize_search_input(phase)
postcode = validate_postcode(postcode)
df = load_school_data()
@@ -253,9 +255,25 @@ async def get_schools(
)
df_latest = df_latest.merge(prev_rwm, on="urn", how="left")
# Phase filter
if phase:
phase_lower = phase.lower()
if phase_lower in ("primary", "secondary", "all-through", "all_through"):
# Map param values to GIAS phase strings (partial match)
phase_map = {
"primary": "primary",
"secondary": "secondary",
"all-through": "all-through",
"all_through": "all-through",
}
phase_substr = phase_map[phase_lower]
schools_df_phase_mask = df_latest["phase"].str.lower().str.contains(phase_substr, na=False)
df_latest = df_latest[schools_df_phase_mask]
# Include key result metrics for display on cards
location_cols = ["latitude", "longitude"]
result_cols = [
"phase",
"year",
"rwm_expected_pct",
"rwm_high_pct",
@@ -264,6 +282,8 @@ async def get_schools(
"writing_expected_pct",
"maths_expected_pct",
"total_pupils",
"attainment_8_score",
"english_maths_standard_pass_pct",
]
available_cols = [
c
@@ -364,7 +384,7 @@ async def get_schools(
@app.get("/api/schools/{urn}")
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_school_details(request: Request, urn: int):
"""Get detailed KS2 data for a specific primary school across all years."""
"""Get detailed performance data for a specific school across all years."""
# Validate URN range (UK school URNs are 6 digits)
if not (100000 <= urn <= 999999):
raise HTTPException(status_code=400, detail="Invalid URN format")
@@ -406,7 +426,7 @@ async def get_school_details(request: Request, urn: int):
"age_range": latest.get("age_range", ""),
"latitude": latest.get("latitude"),
"longitude": latest.get("longitude"),
"phase": "Primary",
"phase": latest.get("phase"),
# GIAS fields
"website": latest.get("website"),
"headteacher_name": latest.get("headteacher_name"),
@@ -433,7 +453,7 @@ async def compare_schools(
request: Request,
urns: str = Query(..., description="Comma-separated URNs", max_length=100)
):
"""Compare multiple primary schools side by side."""
"""Compare multiple schools side by side."""
df = load_school_data()
if df.empty:
@@ -487,10 +507,66 @@ async def get_filter_options(request: Request):
"years": [],
}
# Phases: return values from data, ordered sensibly
phases = sorted(df["phase"].dropna().unique().tolist()) if "phase" in df.columns else []
return {
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
"years": sorted(df["year"].dropna().unique().tolist()),
"phases": phases,
}
@app.get("/api/national-averages")
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_national_averages(request: Request):
"""
Compute national average for each metric from the latest data year.
Returns separate averages for primary (KS2) and secondary (KS4) schools.
Values are derived from the loaded DataFrame so they automatically
stay current when new data is loaded.
"""
df = load_school_data()
if df.empty:
return {"primary": {}, "secondary": {}}
latest_year = int(df["year"].max())
df_latest = df[df["year"] == latest_year]
ks2_metrics = [
"rwm_expected_pct", "rwm_high_pct",
"reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
"reading_avg_score", "maths_avg_score", "gps_avg_score",
"reading_progress", "writing_progress", "maths_progress",
"overall_absence_pct", "persistent_absence_pct",
"disadvantaged_gap", "disadvantaged_pct", "sen_support_pct",
]
ks4_metrics = [
"attainment_8_score", "progress_8_score",
"english_maths_standard_pass_pct", "english_maths_strong_pass_pct",
"ebacc_entry_pct", "ebacc_standard_pass_pct", "ebacc_strong_pass_pct",
"ebacc_avg_score", "gcse_grade_91_pct",
]
def _means(sub_df, metric_list):
out = {}
for col in metric_list:
if col in sub_df.columns:
val = sub_df[col].dropna()
if len(val) > 0:
out[col] = round(float(val.mean()), 2)
return out
# Primary: schools where KS2 data is non-null
primary_df = df_latest[df_latest["rwm_expected_pct"].notna()]
# Secondary: schools where KS4 data is non-null
secondary_df = df_latest[df_latest["attainment_8_score"].notna()]
return {
"year": latest_year,
"primary": _means(primary_df, ks2_metrics),
"secondary": _means(secondary_df, ks4_metrics),
}
@@ -498,7 +574,7 @@ async def get_filter_options(request: Request):
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_available_metrics(request: Request):
"""
Get list of available KS2 performance metrics for primary schools.
Get list of available performance metrics for schools.
This is the single source of truth for metric definitions.
Frontend should consume this to avoid duplication.
@@ -517,7 +593,7 @@ async def get_available_metrics(request: Request):
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_rankings(
request: Request,
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by", max_length=50),
metric: str = Query("rwm_expected_pct", description="Metric to rank by", max_length=50),
year: Optional[int] = Query(
None, description="Specific year (defaults to most recent)", ge=2000, le=2100
),
@@ -526,7 +602,7 @@ async def get_rankings(
None, description="Filter by local authority", max_length=100
),
):
"""Get primary school rankings by a specific KS2 metric."""
"""Get school rankings by a specific metric."""
# Sanitize local authority input
local_authority = sanitize_search_input(local_authority)