refactor(phase): merge KS2+KS4 into fact_performance, fix all phase inconsistencies
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 50s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m24s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 50s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m12s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m24s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 0s
Root cause: the UNION ALL query in data_loader.py produced two rows per all-through school per year (one KS2, one KS4), with drop_duplicates() silently discarding the KS4 row. Fixes: - New dbt mart `fact_performance`: FULL OUTER JOIN of fact_ks2_performance and fact_ks4_performance on (urn, year). One row per school per year. All-through schools have both KS2 and KS4 columns populated. - data_loader.py: replace 175-line UNION ALL with a simple JOIN to fact_performance. No more duplicate rows or drop_duplicates needed. - sync_typesense.py: single LATERAL JOIN to fact_performance instead of two separate KS2/KS4 joins. - app.py: remove drop_duplicates (no longer needed); add PHASE_GROUPS constant so all-through/middle schools appear in primary and secondary filter results (were previously invisible to both); scope result_filters gender/admissions_policies to secondary schools only. - HomeView.tsx: isSecondaryView is now majority-based (not "any secondary") and isMixedView shows both sort option sets for mixed result sets. - school/[slug]/page.tsx: all-through schools route to SchoolDetailView (renders both SATs + GCSE sections) instead of SecondarySchoolDetailView (KS4-only). Dedicated SEO metadata for all-through schools. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -58,24 +58,14 @@ QUERY_BASE = """
|
||||
LEFT JOIN marts.dim_location l ON s.urn = l.urn
|
||||
"""
|
||||
|
||||
QUERY_KS2_JOIN = """
|
||||
QUERY_PERFORMANCE_JOIN = """
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT rwm_expected_pct
|
||||
FROM marts.fact_ks2_performance
|
||||
SELECT rwm_expected_pct, progress_8_score
|
||||
FROM marts.fact_performance
|
||||
WHERE urn = s.urn
|
||||
ORDER BY year DESC
|
||||
LIMIT 1
|
||||
) ks2 ON true
|
||||
"""
|
||||
|
||||
QUERY_KS4_JOIN = """
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT progress_8_score
|
||||
FROM marts.fact_ks4_performance
|
||||
WHERE urn = s.urn
|
||||
ORDER BY year DESC
|
||||
LIMIT 1
|
||||
) ks4 ON true
|
||||
) p ON true
|
||||
"""
|
||||
|
||||
|
||||
@@ -136,30 +126,23 @@ def sync(typesense_url: str, api_key: str):
|
||||
schema = {**COLLECTION_SCHEMA, "name": collection_name}
|
||||
client.collections.create(schema)
|
||||
|
||||
# Fetch data from marts — dynamically include KS2/KS4 joins if tables exist
|
||||
# Fetch data from marts — join fact_performance if it exists
|
||||
conn = get_db_connection()
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
# Check which fact tables exist
|
||||
# Check whether the merged fact table exists
|
||||
cur.execute("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'marts' AND table_name IN ('fact_ks2_performance', 'fact_ks4_performance')
|
||||
WHERE table_schema = 'marts' AND table_name = 'fact_performance'
|
||||
""")
|
||||
existing_tables = {r["table_name"] for r in cur.fetchall()}
|
||||
|
||||
select_extra = []
|
||||
joins = ""
|
||||
if "fact_ks2_performance" in existing_tables:
|
||||
select_extra.append("ks2.rwm_expected_pct")
|
||||
joins += QUERY_KS2_JOIN
|
||||
if "fact_ks4_performance" in existing_tables:
|
||||
select_extra.append("ks4.progress_8_score")
|
||||
joins += QUERY_KS4_JOIN
|
||||
has_fact_performance = cur.fetchone() is not None
|
||||
|
||||
query = QUERY_BASE
|
||||
if select_extra:
|
||||
# Insert extra select columns before FROM
|
||||
query = query.replace("l.longitude as lng", "l.longitude as lng,\n " + ",\n ".join(select_extra))
|
||||
query += joins
|
||||
if has_fact_performance:
|
||||
query = query.replace(
|
||||
"l.longitude as lng",
|
||||
"l.longitude as lng,\n p.rwm_expected_pct,\n p.progress_8_score",
|
||||
)
|
||||
query += QUERY_PERFORMANCE_JOIN
|
||||
|
||||
cur.execute(query)
|
||||
rows = cur.fetchall()
|
||||
|
||||
@@ -34,6 +34,17 @@ models:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
|
||||
- name: fact_performance
|
||||
description: Merged KS2 + KS4 performance — one row per URN per year via FULL OUTER JOIN. All-through schools have both KS2 and KS4 columns populated; pure primary schools have NULL KS4 columns; pure secondary schools have NULL KS2 columns.
|
||||
columns:
|
||||
- name: urn
|
||||
tests: [not_null]
|
||||
- name: year
|
||||
tests: [not_null]
|
||||
tests:
|
||||
- unique:
|
||||
column_name: "urn || '-' || year"
|
||||
|
||||
- name: fact_ks2_performance
|
||||
description: KS2 attainment — one row per URN per year
|
||||
columns:
|
||||
|
||||
81
pipeline/transform/models/marts/fact_performance.sql
Normal file
81
pipeline/transform/models/marts/fact_performance.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Mart: Merged KS2 + KS4 performance — one row per URN per year
|
||||
-- FULL OUTER JOIN so all-through schools have both KS2 and KS4 columns populated.
|
||||
-- Pure primary schools have NULL KS4 columns; pure secondary schools have NULL KS2 columns.
|
||||
|
||||
with ks2 as (
|
||||
select * from {{ ref('fact_ks2_performance') }}
|
||||
),
|
||||
|
||||
ks4 as (
|
||||
select * from {{ ref('fact_ks4_performance') }}
|
||||
)
|
||||
|
||||
select
|
||||
-- Join keys (coalesced so neither side is dropped)
|
||||
coalesce(ks2.urn, ks4.urn) as urn,
|
||||
coalesce(ks2.source_urn, ks4.source_urn) as source_urn,
|
||||
coalesce(ks2.year, ks4.year) as year,
|
||||
coalesce(ks2.total_pupils, ks4.total_pupils) as total_pupils,
|
||||
coalesce(ks2.eligible_pupils, ks4.eligible_pupils) as eligible_pupils,
|
||||
|
||||
-- KS2 columns (NULL for pure secondary schools)
|
||||
ks2.rwm_expected_pct,
|
||||
ks2.rwm_high_pct,
|
||||
ks2.reading_expected_pct,
|
||||
ks2.reading_high_pct,
|
||||
ks2.reading_avg_score,
|
||||
ks2.reading_progress,
|
||||
ks2.writing_expected_pct,
|
||||
ks2.writing_high_pct,
|
||||
ks2.writing_progress,
|
||||
ks2.maths_expected_pct,
|
||||
ks2.maths_high_pct,
|
||||
ks2.maths_avg_score,
|
||||
ks2.maths_progress,
|
||||
ks2.gps_expected_pct,
|
||||
ks2.gps_high_pct,
|
||||
ks2.gps_avg_score,
|
||||
ks2.science_expected_pct,
|
||||
ks2.reading_absence_pct,
|
||||
ks2.writing_absence_pct,
|
||||
ks2.maths_absence_pct,
|
||||
ks2.gps_absence_pct,
|
||||
ks2.science_absence_pct,
|
||||
ks2.rwm_expected_boys_pct,
|
||||
ks2.rwm_high_boys_pct,
|
||||
ks2.rwm_expected_girls_pct,
|
||||
ks2.rwm_high_girls_pct,
|
||||
ks2.rwm_expected_disadvantaged_pct,
|
||||
ks2.rwm_expected_non_disadvantaged_pct,
|
||||
ks2.disadvantaged_gap,
|
||||
ks2.disadvantaged_pct,
|
||||
ks2.eal_pct,
|
||||
ks2.stability_pct,
|
||||
|
||||
-- KS4 columns (NULL for pure primary schools)
|
||||
ks4.attainment_8_score,
|
||||
ks4.progress_8_score,
|
||||
ks4.progress_8_lower_ci,
|
||||
ks4.progress_8_upper_ci,
|
||||
ks4.progress_8_english,
|
||||
ks4.progress_8_maths,
|
||||
ks4.progress_8_ebacc,
|
||||
ks4.progress_8_open,
|
||||
ks4.english_maths_strong_pass_pct,
|
||||
ks4.english_maths_standard_pass_pct,
|
||||
ks4.ebacc_entry_pct,
|
||||
ks4.ebacc_strong_pass_pct,
|
||||
ks4.ebacc_standard_pass_pct,
|
||||
ks4.ebacc_avg_score,
|
||||
ks4.gcse_grade_91_pct,
|
||||
ks4.prior_attainment_avg,
|
||||
ks4.sen_pct,
|
||||
|
||||
-- Shared SEN columns — KS2 preferred, fall back to KS4
|
||||
coalesce(ks2.sen_support_pct, ks4.sen_support_pct) as sen_support_pct,
|
||||
coalesce(ks2.sen_ehcp_pct, ks4.sen_ehcp_pct) as sen_ehcp_pct
|
||||
|
||||
from ks2
|
||||
full outer join ks4
|
||||
on ks2.urn = ks4.urn
|
||||
and ks2.year = ks4.year
|
||||
Reference in New Issue
Block a user