From 6e5249aa1ef8becd495ea64928d001ebbbed2016 Mon Sep 17 00:00:00 2001 From: Tudor Date: Mon, 30 Mar 2026 14:07:30 +0100 Subject: [PATCH] refactor(phase): merge KS2+KS4 into fact_performance, fix all phase inconsistencies Root cause: the UNION ALL query in data_loader.py produced two rows per all-through school per year (one KS2, one KS4), with drop_duplicates() silently discarding the KS4 row. Fixes: - New dbt mart `fact_performance`: FULL OUTER JOIN of fact_ks2_performance and fact_ks4_performance on (urn, year). One row per school per year. All-through schools have both KS2 and KS4 columns populated. - data_loader.py: replace 175-line UNION ALL with a simple JOIN to fact_performance. No more duplicate rows or drop_duplicates needed. - sync_typesense.py: single LATERAL JOIN to fact_performance instead of two separate KS2/KS4 joins. - app.py: remove drop_duplicates (no longer needed); add PHASE_GROUPS constant so all-through/middle schools appear in primary and secondary filter results (were previously invisible to both); scope result_filters gender/admissions_policies to secondary schools only. - HomeView.tsx: isSecondaryView is now majority-based (not "any secondary") and isMixedView shows both sort option sets for mixed result sets. - school/[slug]/page.tsx: all-through schools route to SchoolDetailView (renders both SATs + GCSE sections) instead of SecondarySchoolDetailView (KS4-only). Dedicated SEO metadata for all-through schools. Co-Authored-By: Claude Sonnet 4.6 --- backend/app.py | 39 ++-- backend/data_loader.py | 211 +++++------------- nextjs-app/app/school/[slug]/page.tsx | 35 ++- nextjs-app/components/HomeView.tsx | 21 +- pipeline/scripts/sync_typesense.py | 45 ++-- .../transform/models/marts/_marts_schema.yml | 11 + .../models/marts/fact_performance.sql | 81 +++++++ 7 files changed, 227 insertions(+), 216 deletions(-) create mode 100644 pipeline/transform/models/marts/fact_performance.sql diff --git a/backend/app.py b/backend/app.py index 5f4ba0e..cf23dff 100644 --- a/backend/app.py +++ b/backend/app.py @@ -35,6 +35,14 @@ from .utils import clean_for_json # Values to exclude from filter dropdowns (empty strings, non-applicable labels) EXCLUDED_FILTER_VALUES = {"", "Not applicable", "Does not apply"} +# Maps user-facing phase filter values to the GIAS PhaseOfEducation values they include. +# All-through schools appear in both primary and secondary results. +PHASE_GROUPS: dict[str, set[str]] = { + "primary": {"primary", "middle deemed primary", "all-through"}, + "secondary": {"secondary", "middle deemed secondary", "all-through", "16 plus"}, + "all-through": {"all-through"}, +} + BASE_URL = "https://schoolcompare.co.uk" MAX_SLUG_LENGTH = 60 @@ -343,20 +351,13 @@ async def get_schools( ) df_latest = df_latest.merge(prev_rwm, on="urn", how="left") - # Phase filter + # Phase filter — uses PHASE_GROUPS so all-through/middle schools appear + # in the correct phase(s) rather than being invisible to both filters. if phase: - phase_lower = phase.lower() - if phase_lower in ("primary", "secondary", "all-through", "all_through"): - # Map param values to GIAS phase strings (partial match) - phase_map = { - "primary": "primary", - "secondary": "secondary", - "all-through": "all-through", - "all_through": "all-through", - } - phase_substr = phase_map[phase_lower] - schools_df_phase_mask = df_latest["phase"].str.lower().str.contains(phase_substr, na=False) - df_latest = df_latest[schools_df_phase_mask] + phase_lower = phase.lower().replace("_", "-") + allowed = PHASE_GROUPS.get(phase_lower) + if allowed: + df_latest = df_latest[df_latest["phase"].str.lower().isin(allowed)] # Secondary-specific filters (after phase filter) if gender: @@ -389,7 +390,8 @@ async def get_schools( for c in SCHOOL_COLUMNS + location_cols + result_cols if c in df_latest.columns ] - schools_df = df_latest[available_cols].drop_duplicates(subset=["urn"]) + # fact_performance guarantees one row per (urn, year); df_latest has one row per urn. + schools_df = df_latest[available_cols] # Location-based search (uses pre-geocoded data from database) search_coords = None @@ -458,13 +460,16 @@ async def get_schools( schools_df["school_type"].str.lower() == school_type.lower() ] - # Compute result-scoped filter values (before pagination) + # Compute result-scoped filter values (before pagination). + # Gender and admissions are secondary-only filters — scope them to schools + # with KS4 data so they don't appear for purely primary result sets. + _sec_mask = schools_df["attainment_8_score"].notna() if "attainment_8_score" in schools_df.columns else pd.Series(False, index=schools_df.index) result_filters = { "local_authorities": clean_filter_values(schools_df["local_authority"]) if "local_authority" in schools_df.columns else [], "school_types": clean_filter_values(schools_df["school_type"]) if "school_type" in schools_df.columns else [], "phases": clean_filter_values(schools_df["phase"]) if "phase" in schools_df.columns else [], - "genders": clean_filter_values(schools_df["gender"]) if "gender" in schools_df.columns else [], - "admissions_policies": clean_filter_values(schools_df["admissions_policy"]) if "admissions_policy" in schools_df.columns else [], + "genders": clean_filter_values(schools_df.loc[_sec_mask, "gender"]) if "gender" in schools_df.columns and _sec_mask.any() else [], + "admissions_policies": clean_filter_values(schools_df.loc[_sec_mask, "admissions_policy"]) if "admissions_policy" in schools_df.columns and _sec_mask.any() else [], } # Pagination diff --git a/backend/data_loader.py b/backend/data_loader.py index 9029e4c..c791a4b 100644 --- a/backend/data_loader.py +++ b/backend/data_loader.py @@ -109,11 +109,12 @@ def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> fl # ============================================================================= -# MAIN DATA LOAD — joins dim_school + dim_location + fact_ks2_performance +# MAIN DATA LOAD — joins dim_school + dim_location + fact_performance +# fact_performance is a merged KS2+KS4 table (one row per URN per year). +# All-through schools have both KS2 and KS4 columns populated in the same row. # ============================================================================= _MAIN_QUERY = text(""" - -- Branch 1: Primary schools (KS2 data; KS4 columns NULL) SELECT s.urn, s.school_name, @@ -139,155 +140,67 @@ _MAIN_QUERY = text(""" l.postcode, l.latitude, l.longitude, - k.year, - k.source_urn, - k.total_pupils, - k.eligible_pupils, - -- KS2 columns - k.rwm_expected_pct, - k.rwm_high_pct, - k.reading_expected_pct, - k.reading_high_pct, - k.reading_avg_score, - k.reading_progress, - k.writing_expected_pct, - k.writing_high_pct, - k.writing_progress, - k.maths_expected_pct, - k.maths_high_pct, - k.maths_avg_score, - k.maths_progress, - k.gps_expected_pct, - k.gps_high_pct, - k.gps_avg_score, - k.science_expected_pct, - k.reading_absence_pct, - k.writing_absence_pct, - k.maths_absence_pct, - k.gps_absence_pct, - k.science_absence_pct, - k.rwm_expected_boys_pct, - k.rwm_high_boys_pct, - k.rwm_expected_girls_pct, - k.rwm_high_girls_pct, - k.rwm_expected_disadvantaged_pct, - k.rwm_expected_non_disadvantaged_pct, - k.disadvantaged_gap, - k.disadvantaged_pct, - k.eal_pct, - k.sen_support_pct, - k.sen_ehcp_pct, - k.stability_pct, - -- KS4 columns (NULL for primary) - NULL::numeric AS attainment_8_score, - NULL::numeric AS progress_8_score, - NULL::numeric AS progress_8_lower_ci, - NULL::numeric AS progress_8_upper_ci, - NULL::numeric AS progress_8_english, - NULL::numeric AS progress_8_maths, - NULL::numeric AS progress_8_ebacc, - NULL::numeric AS progress_8_open, - NULL::numeric AS english_maths_strong_pass_pct, - NULL::numeric AS english_maths_standard_pass_pct, - NULL::numeric AS ebacc_entry_pct, - NULL::numeric AS ebacc_strong_pass_pct, - NULL::numeric AS ebacc_standard_pass_pct, - NULL::numeric AS ebacc_avg_score, - NULL::numeric AS gcse_grade_91_pct, - NULL::numeric AS prior_attainment_avg + p.year, + p.source_urn, + p.total_pupils, + p.eligible_pupils, + -- KS2 columns (NULL for pure secondary schools) + p.rwm_expected_pct, + p.rwm_high_pct, + p.reading_expected_pct, + p.reading_high_pct, + p.reading_avg_score, + p.reading_progress, + p.writing_expected_pct, + p.writing_high_pct, + p.writing_progress, + p.maths_expected_pct, + p.maths_high_pct, + p.maths_avg_score, + p.maths_progress, + p.gps_expected_pct, + p.gps_high_pct, + p.gps_avg_score, + p.science_expected_pct, + p.reading_absence_pct, + p.writing_absence_pct, + p.maths_absence_pct, + p.gps_absence_pct, + p.science_absence_pct, + p.rwm_expected_boys_pct, + p.rwm_high_boys_pct, + p.rwm_expected_girls_pct, + p.rwm_high_girls_pct, + p.rwm_expected_disadvantaged_pct, + p.rwm_expected_non_disadvantaged_pct, + p.disadvantaged_gap, + p.disadvantaged_pct, + p.eal_pct, + p.stability_pct, + -- KS4 columns (NULL for pure primary schools) + p.attainment_8_score, + p.progress_8_score, + p.progress_8_lower_ci, + p.progress_8_upper_ci, + p.progress_8_english, + p.progress_8_maths, + p.progress_8_ebacc, + p.progress_8_open, + p.english_maths_strong_pass_pct, + p.english_maths_standard_pass_pct, + p.ebacc_entry_pct, + p.ebacc_strong_pass_pct, + p.ebacc_standard_pass_pct, + p.ebacc_avg_score, + p.gcse_grade_91_pct, + p.prior_attainment_avg, + -- SEN (coalesced KS2+KS4 in fact_performance) + p.sen_support_pct, + p.sen_ehcp_pct FROM marts.dim_school s JOIN marts.dim_location l ON s.urn = l.urn - JOIN marts.fact_ks2_performance k ON s.urn = k.urn - - UNION ALL - - -- Branch 2: Secondary schools (KS4 data; KS2 columns NULL) - SELECT - s.urn, - s.school_name, - s.phase, - s.school_type, - s.academy_trust_name AS trust_name, - s.academy_trust_uid AS trust_uid, - s.religious_character AS religious_denomination, - s.gender, - s.age_range, - s.admissions_policy, - s.capacity, - s.headteacher_name, - s.website, - s.ofsted_grade, - s.ofsted_date, - s.ofsted_framework, - l.local_authority_name AS local_authority, - l.local_authority_code, - l.address_line1 AS address1, - l.address_line2 AS address2, - l.town, - l.postcode, - l.latitude, - l.longitude, - k4.year, - k4.source_urn, - k4.total_pupils, - k4.eligible_pupils, - -- KS2 columns (NULL for secondary) - NULL::numeric AS rwm_expected_pct, - NULL::numeric AS rwm_high_pct, - NULL::numeric AS reading_expected_pct, - NULL::numeric AS reading_high_pct, - NULL::numeric AS reading_avg_score, - NULL::numeric AS reading_progress, - NULL::numeric AS writing_expected_pct, - NULL::numeric AS writing_high_pct, - NULL::numeric AS writing_progress, - NULL::numeric AS maths_expected_pct, - NULL::numeric AS maths_high_pct, - NULL::numeric AS maths_avg_score, - NULL::numeric AS maths_progress, - NULL::numeric AS gps_expected_pct, - NULL::numeric AS gps_high_pct, - NULL::numeric AS gps_avg_score, - NULL::numeric AS science_expected_pct, - NULL::numeric AS reading_absence_pct, - NULL::numeric AS writing_absence_pct, - NULL::numeric AS maths_absence_pct, - NULL::numeric AS gps_absence_pct, - NULL::numeric AS science_absence_pct, - NULL::numeric AS rwm_expected_boys_pct, - NULL::numeric AS rwm_high_boys_pct, - NULL::numeric AS rwm_expected_girls_pct, - NULL::numeric AS rwm_high_girls_pct, - NULL::numeric AS rwm_expected_disadvantaged_pct, - NULL::numeric AS rwm_expected_non_disadvantaged_pct, - NULL::numeric AS disadvantaged_gap, - NULL::numeric AS disadvantaged_pct, - NULL::numeric AS eal_pct, - k4.sen_support_pct, - k4.sen_ehcp_pct, - NULL::numeric AS stability_pct, - -- KS4 columns - k4.attainment_8_score, - k4.progress_8_score, - k4.progress_8_lower_ci, - k4.progress_8_upper_ci, - k4.progress_8_english, - k4.progress_8_maths, - k4.progress_8_ebacc, - k4.progress_8_open, - k4.english_maths_strong_pass_pct, - k4.english_maths_standard_pass_pct, - k4.ebacc_entry_pct, - k4.ebacc_strong_pass_pct, - k4.ebacc_standard_pass_pct, - k4.ebacc_avg_score, - k4.gcse_grade_91_pct, - k4.prior_attainment_avg - FROM marts.dim_school s - JOIN marts.dim_location l ON s.urn = l.urn - JOIN marts.fact_ks4_performance k4 ON s.urn = k4.urn - - ORDER BY school_name, year + JOIN marts.fact_performance p ON s.urn = p.urn + ORDER BY s.school_name, p.year """) diff --git a/nextjs-app/app/school/[slug]/page.tsx b/nextjs-app/app/school/[slug]/page.tsx index cefe026..671afcd 100644 --- a/nextjs-app/app/school/[slug]/page.tsx +++ b/nextjs-app/app/school/[slug]/page.tsx @@ -30,19 +30,28 @@ export async function generateMetadata({ params }: SchoolPageProps): Promise d.attainment_8_score != null); + const phaseStr = (school_info.phase ?? '').toLowerCase(); + const isAllThrough = phaseStr === 'all-through'; + const isSecondary = !isAllThrough && ( + phaseStr.includes('secondary') + || (data.yearly_data ?? []).some((d: any) => d.attainment_8_score != null) + ); + const la = school_info.local_authority ? ` in ${school_info.local_authority}` : ''; const title = `${school_info.school_name} | ${school_info.local_authority || 'England'}`; - const description = isSecondary - ? `View GCSE results, Attainment 8, Progress 8 and school statistics for ${school_info.school_name}${school_info.local_authority ? ` in ${school_info.local_authority}` : ''}.` - : `View KS2 performance data, results, and statistics for ${school_info.school_name}${school_info.local_authority ? ` in ${school_info.local_authority}` : ''}. Compare reading, writing, and maths results.`; + const description = isAllThrough + ? `View KS2 SATs and GCSE results for ${school_info.school_name}${la}. All-through school covering primary and secondary education.` + : isSecondary + ? `View GCSE results, Attainment 8, Progress 8 and school statistics for ${school_info.school_name}${la}.` + : `View KS2 performance data, results, and statistics for ${school_info.school_name}${la}. Compare reading, writing, and maths results.`; return { title, description, - keywords: isSecondary - ? `${school_info.school_name}, GCSE results, secondary school, ${school_info.local_authority}, Attainment 8, Progress 8` - : `${school_info.school_name}, KS2 results, primary school, ${school_info.local_authority}, school performance, SATs results`, + keywords: isAllThrough + ? `${school_info.school_name}, KS2 results, GCSE results, all-through school, ${school_info.local_authority}, SATs, Attainment 8` + : isSecondary + ? `${school_info.school_name}, GCSE results, secondary school, ${school_info.local_authority}, Attainment 8, Progress 8` + : `${school_info.school_name}, KS2 results, primary school, ${school_info.local_authority}, school performance, SATs results`, openGraph: { title, description, @@ -95,8 +104,14 @@ export default async function SchoolPage({ params }: SchoolPageProps) { redirect(`/school/${canonicalSlug}`); } - const isSecondary = (school_info.phase ?? '').toLowerCase().includes('secondary') - || yearly_data.some((d: any) => d.attainment_8_score != null); + const phaseStr = (school_info.phase ?? '').toLowerCase(); + const isAllThrough = phaseStr === 'all-through'; + // All-through schools go to SchoolDetailView (renders both KS2 + KS4 sections). + // SecondarySchoolDetailView is KS4-only, so all-through schools would lose SATs data. + const isSecondary = !isAllThrough && ( + phaseStr.includes('secondary') + || yearly_data.some((d: any) => d.attainment_8_score != null) + ); // Generate JSON-LD structured data for SEO const structuredData = { diff --git a/nextjs-app/components/HomeView.tsx b/nextjs-app/components/HomeView.tsx index a8fe55d..4a21cea 100644 --- a/nextjs-app/components/HomeView.tsx +++ b/nextjs-app/components/HomeView.tsx @@ -45,8 +45,11 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp const isLocationSearch = !!searchParams.get('postcode'); const isSearchActive = !!(hasSearch || searchParams.get('local_authority') || searchParams.get('school_type')); const currentPhase = searchParams.get('phase') || ''; - const hasSecondaryResults = allSchools.some(s => s.attainment_8_score != null); - const isSecondaryView = currentPhase.toLowerCase().includes('secondary') || hasSecondaryResults; + const secondaryCount = allSchools.filter(s => s.attainment_8_score != null).length; + const primaryCount = allSchools.filter(s => s.rwm_expected_pct != null).length; + const isSecondaryView = currentPhase.toLowerCase().includes('secondary') + || (!currentPhase && secondaryCount > primaryCount); + const isMixedView = primaryCount > 0 && secondaryCount > 0 && !currentPhase; // Reset pagination state when search params change useEffect(() => { @@ -79,13 +82,13 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp .finally(() => setIsLoadingMap(false)); }, [resultsView, searchParams]); - // Fetch LA averages when secondary schools are visible + // Fetch LA averages when secondary or mixed schools are visible useEffect(() => { - if (!isSecondaryView) return; + if (!isSecondaryView && !isMixedView) return; fetchLAaverages({ cache: 'force-cache' }) .then(data => setLaAverages(data.secondary.attainment_8_by_la)) .catch(() => {}); - }, [isSecondaryView]); + }, [isSecondaryView, isMixedView]); const handleLoadMore = async () => { if (isLoadingMore || !hasMore) return; @@ -209,10 +212,10 @@ export function HomeView({ initialSchools, filters, totalSchools }: HomeViewProp className={styles.sortSelect} > - {!isSecondaryView && } - {!isSecondaryView && } - {isSecondaryView && } - {isSecondaryView && } + {(!isSecondaryView || isMixedView) && } + {(!isSecondaryView || isMixedView) && } + {(isSecondaryView || isMixedView) && } + {(isSecondaryView || isMixedView) && } {isLocationSearch && } diff --git a/pipeline/scripts/sync_typesense.py b/pipeline/scripts/sync_typesense.py index e843ecb..3d5e6e6 100644 --- a/pipeline/scripts/sync_typesense.py +++ b/pipeline/scripts/sync_typesense.py @@ -58,24 +58,14 @@ QUERY_BASE = """ LEFT JOIN marts.dim_location l ON s.urn = l.urn """ -QUERY_KS2_JOIN = """ +QUERY_PERFORMANCE_JOIN = """ LEFT JOIN LATERAL ( - SELECT rwm_expected_pct - FROM marts.fact_ks2_performance + SELECT rwm_expected_pct, progress_8_score + FROM marts.fact_performance WHERE urn = s.urn ORDER BY year DESC LIMIT 1 - ) ks2 ON true -""" - -QUERY_KS4_JOIN = """ - LEFT JOIN LATERAL ( - SELECT progress_8_score - FROM marts.fact_ks4_performance - WHERE urn = s.urn - ORDER BY year DESC - LIMIT 1 - ) ks4 ON true + ) p ON true """ @@ -136,30 +126,23 @@ def sync(typesense_url: str, api_key: str): schema = {**COLLECTION_SCHEMA, "name": collection_name} client.collections.create(schema) - # Fetch data from marts — dynamically include KS2/KS4 joins if tables exist + # Fetch data from marts — join fact_performance if it exists conn = get_db_connection() with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: - # Check which fact tables exist + # Check whether the merged fact table exists cur.execute(""" SELECT table_name FROM information_schema.tables - WHERE table_schema = 'marts' AND table_name IN ('fact_ks2_performance', 'fact_ks4_performance') + WHERE table_schema = 'marts' AND table_name = 'fact_performance' """) - existing_tables = {r["table_name"] for r in cur.fetchall()} - - select_extra = [] - joins = "" - if "fact_ks2_performance" in existing_tables: - select_extra.append("ks2.rwm_expected_pct") - joins += QUERY_KS2_JOIN - if "fact_ks4_performance" in existing_tables: - select_extra.append("ks4.progress_8_score") - joins += QUERY_KS4_JOIN + has_fact_performance = cur.fetchone() is not None query = QUERY_BASE - if select_extra: - # Insert extra select columns before FROM - query = query.replace("l.longitude as lng", "l.longitude as lng,\n " + ",\n ".join(select_extra)) - query += joins + if has_fact_performance: + query = query.replace( + "l.longitude as lng", + "l.longitude as lng,\n p.rwm_expected_pct,\n p.progress_8_score", + ) + query += QUERY_PERFORMANCE_JOIN cur.execute(query) rows = cur.fetchall() diff --git a/pipeline/transform/models/marts/_marts_schema.yml b/pipeline/transform/models/marts/_marts_schema.yml index e12fdc2..0569afc 100644 --- a/pipeline/transform/models/marts/_marts_schema.yml +++ b/pipeline/transform/models/marts/_marts_schema.yml @@ -34,6 +34,17 @@ models: - name: urn tests: [not_null] + - name: fact_performance + description: Merged KS2 + KS4 performance — one row per URN per year via FULL OUTER JOIN. All-through schools have both KS2 and KS4 columns populated; pure primary schools have NULL KS4 columns; pure secondary schools have NULL KS2 columns. + columns: + - name: urn + tests: [not_null] + - name: year + tests: [not_null] + tests: + - unique: + column_name: "urn || '-' || year" + - name: fact_ks2_performance description: KS2 attainment — one row per URN per year columns: diff --git a/pipeline/transform/models/marts/fact_performance.sql b/pipeline/transform/models/marts/fact_performance.sql new file mode 100644 index 0000000..97a92ec --- /dev/null +++ b/pipeline/transform/models/marts/fact_performance.sql @@ -0,0 +1,81 @@ +-- Mart: Merged KS2 + KS4 performance — one row per URN per year +-- FULL OUTER JOIN so all-through schools have both KS2 and KS4 columns populated. +-- Pure primary schools have NULL KS4 columns; pure secondary schools have NULL KS2 columns. + +with ks2 as ( + select * from {{ ref('fact_ks2_performance') }} +), + +ks4 as ( + select * from {{ ref('fact_ks4_performance') }} +) + +select + -- Join keys (coalesced so neither side is dropped) + coalesce(ks2.urn, ks4.urn) as urn, + coalesce(ks2.source_urn, ks4.source_urn) as source_urn, + coalesce(ks2.year, ks4.year) as year, + coalesce(ks2.total_pupils, ks4.total_pupils) as total_pupils, + coalesce(ks2.eligible_pupils, ks4.eligible_pupils) as eligible_pupils, + + -- KS2 columns (NULL for pure secondary schools) + ks2.rwm_expected_pct, + ks2.rwm_high_pct, + ks2.reading_expected_pct, + ks2.reading_high_pct, + ks2.reading_avg_score, + ks2.reading_progress, + ks2.writing_expected_pct, + ks2.writing_high_pct, + ks2.writing_progress, + ks2.maths_expected_pct, + ks2.maths_high_pct, + ks2.maths_avg_score, + ks2.maths_progress, + ks2.gps_expected_pct, + ks2.gps_high_pct, + ks2.gps_avg_score, + ks2.science_expected_pct, + ks2.reading_absence_pct, + ks2.writing_absence_pct, + ks2.maths_absence_pct, + ks2.gps_absence_pct, + ks2.science_absence_pct, + ks2.rwm_expected_boys_pct, + ks2.rwm_high_boys_pct, + ks2.rwm_expected_girls_pct, + ks2.rwm_high_girls_pct, + ks2.rwm_expected_disadvantaged_pct, + ks2.rwm_expected_non_disadvantaged_pct, + ks2.disadvantaged_gap, + ks2.disadvantaged_pct, + ks2.eal_pct, + ks2.stability_pct, + + -- KS4 columns (NULL for pure primary schools) + ks4.attainment_8_score, + ks4.progress_8_score, + ks4.progress_8_lower_ci, + ks4.progress_8_upper_ci, + ks4.progress_8_english, + ks4.progress_8_maths, + ks4.progress_8_ebacc, + ks4.progress_8_open, + ks4.english_maths_strong_pass_pct, + ks4.english_maths_standard_pass_pct, + ks4.ebacc_entry_pct, + ks4.ebacc_strong_pass_pct, + ks4.ebacc_standard_pass_pct, + ks4.ebacc_avg_score, + ks4.gcse_grade_91_pct, + ks4.prior_attainment_avg, + ks4.sen_pct, + + -- Shared SEN columns — KS2 preferred, fall back to KS4 + coalesce(ks2.sen_support_pct, ks4.sen_support_pct) as sen_support_pct, + coalesce(ks2.sen_ehcp_pct, ks4.sen_ehcp_pct) as sen_ehcp_pct + +from ks2 +full outer join ks4 + on ks2.urn = ks4.urn + and ks2.year = ks4.year