feat: add secondary school support with KS4 data and metric tooltips
Some checks failed
Build and Push Docker Images / Build Frontend (Next.js) (push) Has been cancelled
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Has been cancelled
Build and Push Docker Images / Trigger Portainer Update (push) Has been cancelled
Build and Push Docker Images / Build Backend (FastAPI) (push) Has been cancelled

- Backend: replace INNER JOIN ks2 with UNION ALL (ks2 + ks4) so primary
  and secondary schools both appear in the main DataFrame
- Backend: add /api/national-averages endpoint computing means from live
  data, replacing the hardcoded NATIONAL_AVG constant on the frontend
- Backend: add phase filter param to /api/schools; return phases from
  /api/filters; fix hardcoded "phase": "Primary" in school detail endpoint
- Backend: add KS4 metric definitions (Attainment 8, Progress 8, EBacc,
  English & Maths pass rates) to METRIC_DEFINITIONS and RANKING_COLUMNS
- Frontend: SchoolDetailView is now phase-aware — secondary schools show
  a GCSE Results section (Att8, P8, E&M, EBacc) instead of SATs; phonics
  tab hidden for secondary; admissions says Year 7 instead of Year 3;
  history table shows KS4 columns; chart datasets switch for secondary
- Frontend: new MetricTooltip component (CSS-only ⓘ icon) backed by
  METRIC_EXPLANATIONS — added to RWM, GPS, SEN, EAL, IDACI, progress
  scores and all KS4 metrics throughout SchoolDetailView and SchoolCard
- Frontend: METRIC_EXPLANATIONS extended with KS4 terms (Attainment 8,
  Progress 8, EBacc) and previously missing terms (SEN, EHCP, EAL, IDACI)
- Frontend: SchoolCard expands "RWM" to "Reading, Writing & Maths" and
  shows Attainment 8 / English & Maths Grade 4+ for secondary schools
- Frontend: FilterBar adds Phase dropdown (Primary / Secondary / All-through)
- Frontend: HomeView hero copy updated; compact list shows phase-aware metric
- Global metadata updated to remove "primary only" framing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 14:59:40 +00:00
parent b0990e30ee
commit 5eff9af69c
16 changed files with 903 additions and 187 deletions

View File

@@ -1,6 +1,6 @@
"""
SchoolCompare.co.uk API
Serves primary school (KS2) performance data for comparing schools.
Serves primary and secondary school performance data for comparing schools.
Uses real data from UK Government Compare School Performance downloads.
"""
@@ -151,7 +151,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(
title="SchoolCompare API",
description="API for comparing primary school (KS2) performance data - schoolcompare.co.uk",
description="API for comparing primary and secondary school performance data - schoolcompare.co.uk",
version="2.0.0",
lifespan=lifespan,
# Disable docs in production for security
@@ -213,21 +213,23 @@ async def get_schools(
None, description="Filter by local authority", max_length=100
),
school_type: Optional[str] = Query(None, description="Filter by school type", max_length=100),
phase: Optional[str] = Query(None, description="Filter by phase: primary, secondary, all-through", max_length=50),
postcode: Optional[str] = Query(None, description="Search near postcode", max_length=10),
radius: float = Query(5.0, ge=0.1, le=50, description="Search radius in miles"),
page: int = Query(1, ge=1, le=1000, description="Page number"),
page_size: int = Query(None, ge=1, le=100, description="Results per page"),
):
"""
Get list of unique primary schools with pagination.
Get list of schools with pagination.
Returns paginated results with total count for efficient loading.
Supports location-based search using postcode.
Supports location-based search using postcode and phase filtering.
"""
# Sanitize inputs
search = sanitize_search_input(search)
local_authority = sanitize_search_input(local_authority)
school_type = sanitize_search_input(school_type)
phase = sanitize_search_input(phase)
postcode = validate_postcode(postcode)
df = load_school_data()
@@ -253,9 +255,25 @@ async def get_schools(
)
df_latest = df_latest.merge(prev_rwm, on="urn", how="left")
# Phase filter
if phase:
phase_lower = phase.lower()
if phase_lower in ("primary", "secondary", "all-through", "all_through"):
# Map param values to GIAS phase strings (partial match)
phase_map = {
"primary": "primary",
"secondary": "secondary",
"all-through": "all-through",
"all_through": "all-through",
}
phase_substr = phase_map[phase_lower]
schools_df_phase_mask = df_latest["phase"].str.lower().str.contains(phase_substr, na=False)
df_latest = df_latest[schools_df_phase_mask]
# Include key result metrics for display on cards
location_cols = ["latitude", "longitude"]
result_cols = [
"phase",
"year",
"rwm_expected_pct",
"rwm_high_pct",
@@ -264,6 +282,8 @@ async def get_schools(
"writing_expected_pct",
"maths_expected_pct",
"total_pupils",
"attainment_8_score",
"english_maths_standard_pass_pct",
]
available_cols = [
c
@@ -364,7 +384,7 @@ async def get_schools(
@app.get("/api/schools/{urn}")
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_school_details(request: Request, urn: int):
"""Get detailed KS2 data for a specific primary school across all years."""
"""Get detailed performance data for a specific school across all years."""
# Validate URN range (UK school URNs are 6 digits)
if not (100000 <= urn <= 999999):
raise HTTPException(status_code=400, detail="Invalid URN format")
@@ -406,7 +426,7 @@ async def get_school_details(request: Request, urn: int):
"age_range": latest.get("age_range", ""),
"latitude": latest.get("latitude"),
"longitude": latest.get("longitude"),
"phase": "Primary",
"phase": latest.get("phase"),
# GIAS fields
"website": latest.get("website"),
"headteacher_name": latest.get("headteacher_name"),
@@ -433,7 +453,7 @@ async def compare_schools(
request: Request,
urns: str = Query(..., description="Comma-separated URNs", max_length=100)
):
"""Compare multiple primary schools side by side."""
"""Compare multiple schools side by side."""
df = load_school_data()
if df.empty:
@@ -487,10 +507,66 @@ async def get_filter_options(request: Request):
"years": [],
}
# Phases: return values from data, ordered sensibly
phases = sorted(df["phase"].dropna().unique().tolist()) if "phase" in df.columns else []
return {
"local_authorities": sorted(df["local_authority"].dropna().unique().tolist()),
"school_types": sorted(df["school_type"].dropna().unique().tolist()),
"years": sorted(df["year"].dropna().unique().tolist()),
"phases": phases,
}
@app.get("/api/national-averages")
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_national_averages(request: Request):
"""
Compute national average for each metric from the latest data year.
Returns separate averages for primary (KS2) and secondary (KS4) schools.
Values are derived from the loaded DataFrame so they automatically
stay current when new data is loaded.
"""
df = load_school_data()
if df.empty:
return {"primary": {}, "secondary": {}}
latest_year = int(df["year"].max())
df_latest = df[df["year"] == latest_year]
ks2_metrics = [
"rwm_expected_pct", "rwm_high_pct",
"reading_expected_pct", "writing_expected_pct", "maths_expected_pct",
"reading_avg_score", "maths_avg_score", "gps_avg_score",
"reading_progress", "writing_progress", "maths_progress",
"overall_absence_pct", "persistent_absence_pct",
"disadvantaged_gap", "disadvantaged_pct", "sen_support_pct",
]
ks4_metrics = [
"attainment_8_score", "progress_8_score",
"english_maths_standard_pass_pct", "english_maths_strong_pass_pct",
"ebacc_entry_pct", "ebacc_standard_pass_pct", "ebacc_strong_pass_pct",
"ebacc_avg_score", "gcse_grade_91_pct",
]
def _means(sub_df, metric_list):
out = {}
for col in metric_list:
if col in sub_df.columns:
val = sub_df[col].dropna()
if len(val) > 0:
out[col] = round(float(val.mean()), 2)
return out
# Primary: schools where KS2 data is non-null
primary_df = df_latest[df_latest["rwm_expected_pct"].notna()]
# Secondary: schools where KS4 data is non-null
secondary_df = df_latest[df_latest["attainment_8_score"].notna()]
return {
"year": latest_year,
"primary": _means(primary_df, ks2_metrics),
"secondary": _means(secondary_df, ks4_metrics),
}
@@ -498,7 +574,7 @@ async def get_filter_options(request: Request):
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_available_metrics(request: Request):
"""
Get list of available KS2 performance metrics for primary schools.
Get list of available performance metrics for schools.
This is the single source of truth for metric definitions.
Frontend should consume this to avoid duplication.
@@ -517,7 +593,7 @@ async def get_available_metrics(request: Request):
@limiter.limit(f"{settings.rate_limit_per_minute}/minute")
async def get_rankings(
request: Request,
metric: str = Query("rwm_expected_pct", description="KS2 metric to rank by", max_length=50),
metric: str = Query("rwm_expected_pct", description="Metric to rank by", max_length=50),
year: Optional[int] = Query(
None, description="Specific year (defaults to most recent)", ge=2000, le=2100
),
@@ -526,7 +602,7 @@ async def get_rankings(
None, description="Filter by local authority", max_length=100
),
):
"""Get primary school rankings by a specific KS2 metric."""
"""Get school rankings by a specific metric."""
# Sanitize local authority input
local_authority = sanitize_search_input(local_authority)

View File

@@ -113,6 +113,7 @@ def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> fl
# =============================================================================
_MAIN_QUERY = text("""
-- Branch 1: Primary schools (KS2 data; KS4 columns NULL)
SELECT
s.urn,
s.school_name,
@@ -137,11 +138,11 @@ _MAIN_QUERY = text("""
l.postcode,
l.latitude,
l.longitude,
-- KS2 performance
k.year,
k.source_urn,
k.total_pupils,
k.eligible_pupils,
-- KS2 columns
k.rwm_expected_pct,
k.rwm_high_pct,
k.reading_expected_pct,
@@ -175,11 +176,116 @@ _MAIN_QUERY = text("""
k.eal_pct,
k.sen_support_pct,
k.sen_ehcp_pct,
k.stability_pct
k.stability_pct,
-- KS4 columns (NULL for primary)
NULL::numeric AS attainment_8_score,
NULL::numeric AS progress_8_score,
NULL::numeric AS progress_8_lower_ci,
NULL::numeric AS progress_8_upper_ci,
NULL::numeric AS progress_8_english,
NULL::numeric AS progress_8_maths,
NULL::numeric AS progress_8_ebacc,
NULL::numeric AS progress_8_open,
NULL::numeric AS english_maths_strong_pass_pct,
NULL::numeric AS english_maths_standard_pass_pct,
NULL::numeric AS ebacc_entry_pct,
NULL::numeric AS ebacc_strong_pass_pct,
NULL::numeric AS ebacc_standard_pass_pct,
NULL::numeric AS ebacc_avg_score,
NULL::numeric AS gcse_grade_91_pct,
NULL::numeric AS prior_attainment_avg
FROM marts.dim_school s
JOIN marts.dim_location l ON s.urn = l.urn
JOIN marts.fact_ks2_performance k ON s.urn = k.urn
ORDER BY s.school_name, k.year
UNION ALL
-- Branch 2: Secondary schools (KS4 data; KS2 columns NULL)
SELECT
s.urn,
s.school_name,
s.phase,
s.school_type,
s.academy_trust_name AS trust_name,
s.academy_trust_uid AS trust_uid,
s.religious_character AS religious_denomination,
s.gender,
s.age_range,
s.capacity,
s.headteacher_name,
s.website,
s.ofsted_grade,
s.ofsted_date,
s.ofsted_framework,
l.local_authority_name AS local_authority,
l.local_authority_code,
l.address_line1 AS address1,
l.address_line2 AS address2,
l.town,
l.postcode,
l.latitude,
l.longitude,
k4.year,
k4.source_urn,
k4.total_pupils,
k4.eligible_pupils,
-- KS2 columns (NULL for secondary)
NULL::numeric AS rwm_expected_pct,
NULL::numeric AS rwm_high_pct,
NULL::numeric AS reading_expected_pct,
NULL::numeric AS reading_high_pct,
NULL::numeric AS reading_avg_score,
NULL::numeric AS reading_progress,
NULL::numeric AS writing_expected_pct,
NULL::numeric AS writing_high_pct,
NULL::numeric AS writing_progress,
NULL::numeric AS maths_expected_pct,
NULL::numeric AS maths_high_pct,
NULL::numeric AS maths_avg_score,
NULL::numeric AS maths_progress,
NULL::numeric AS gps_expected_pct,
NULL::numeric AS gps_high_pct,
NULL::numeric AS gps_avg_score,
NULL::numeric AS science_expected_pct,
NULL::numeric AS reading_absence_pct,
NULL::numeric AS writing_absence_pct,
NULL::numeric AS maths_absence_pct,
NULL::numeric AS gps_absence_pct,
NULL::numeric AS science_absence_pct,
NULL::numeric AS rwm_expected_boys_pct,
NULL::numeric AS rwm_high_boys_pct,
NULL::numeric AS rwm_expected_girls_pct,
NULL::numeric AS rwm_high_girls_pct,
NULL::numeric AS rwm_expected_disadvantaged_pct,
NULL::numeric AS rwm_expected_non_disadvantaged_pct,
NULL::numeric AS disadvantaged_gap,
NULL::numeric AS disadvantaged_pct,
NULL::numeric AS eal_pct,
k4.sen_support_pct,
k4.sen_ehcp_pct,
NULL::numeric AS stability_pct,
-- KS4 columns
k4.attainment_8_score,
k4.progress_8_score,
k4.progress_8_lower_ci,
k4.progress_8_upper_ci,
k4.progress_8_english,
k4.progress_8_maths,
k4.progress_8_ebacc,
k4.progress_8_open,
k4.english_maths_strong_pass_pct,
k4.english_maths_standard_pass_pct,
k4.ebacc_entry_pct,
k4.ebacc_strong_pass_pct,
k4.ebacc_standard_pass_pct,
k4.ebacc_avg_score,
k4.gcse_grade_91_pct,
k4.prior_attainment_avg
FROM marts.dim_school s
JOIN marts.dim_location l ON s.urn = l.urn
JOIN marts.fact_ks4_performance k4 ON s.urn = k4.urn
ORDER BY school_name, year
""")

View File

@@ -401,6 +401,70 @@ METRIC_DEFINITIONS = {
"type": "score",
"category": "trends",
},
# ── GCSE Performance (KS4) ────────────────────────────────────────────
"attainment_8_score": {
"name": "Attainment 8",
"short_name": "Att 8",
"description": "Average grade across a pupil's best 8 GCSEs including English and Maths",
"type": "score",
"category": "gcse",
},
"progress_8_score": {
"name": "Progress 8",
"short_name": "P8",
"description": "Progress from KS2 baseline to GCSE relative to similar pupils nationally (0 = national average)",
"type": "score",
"category": "gcse",
},
"english_maths_standard_pass_pct": {
"name": "English & Maths Grade 4+",
"short_name": "E&M 4+",
"description": "% of pupils achieving grade 4 (standard pass) or above in both English and Maths",
"type": "percentage",
"category": "gcse",
},
"english_maths_strong_pass_pct": {
"name": "English & Maths Grade 5+",
"short_name": "E&M 5+",
"description": "% of pupils achieving grade 5 (strong pass) or above in both English and Maths",
"type": "percentage",
"category": "gcse",
},
"ebacc_entry_pct": {
"name": "EBacc Entry %",
"short_name": "EBacc Entry",
"description": "% of pupils entered for the English Baccalaureate (English, Maths, Sciences, Languages, Humanities)",
"type": "percentage",
"category": "gcse",
},
"ebacc_standard_pass_pct": {
"name": "EBacc Grade 4+",
"short_name": "EBacc 4+",
"description": "% of pupils achieving grade 4+ across all EBacc subjects",
"type": "percentage",
"category": "gcse",
},
"ebacc_strong_pass_pct": {
"name": "EBacc Grade 5+",
"short_name": "EBacc 5+",
"description": "% of pupils achieving grade 5+ across all EBacc subjects",
"type": "percentage",
"category": "gcse",
},
"ebacc_avg_score": {
"name": "EBacc Average Score",
"short_name": "EBacc Avg",
"description": "Average points score across EBacc subjects",
"type": "score",
"category": "gcse",
},
"gcse_grade_91_pct": {
"name": "GCSE Grade 91 %",
"short_name": "GCSE 91",
"description": "% of GCSE entries achieving a grade 9 to 1",
"type": "percentage",
"category": "gcse",
},
}
# Ranking columns to include in rankings response
@@ -456,6 +520,16 @@ RANKING_COLUMNS = [
"rwm_expected_3yr_pct",
"reading_avg_3yr",
"maths_avg_3yr",
# GCSE (KS4)
"attainment_8_score",
"progress_8_score",
"english_maths_standard_pass_pct",
"english_maths_strong_pass_pct",
"ebacc_entry_pct",
"ebacc_standard_pass_pct",
"ebacc_strong_pass_pct",
"ebacc_avg_score",
"gcse_grade_91_pct",
]
# School listing columns