feat: ingest official DfE KS2 national averages from EES data catalogue
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 19s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 53s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m24s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 19s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 53s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m24s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
Replaces computed means from our school dataset with the published DfE national headline figures for the KS2 chart reference line. - tap-uk-ees: new EESKs2NationalStream fetches the stable EES data-catalogue CSV (one row per year, England national total, AllSchools filter) - dbt staging: stg_ees_ks2_national normalises columns, casts to float, filters to years >= 201617 - dbt mart: fact_ks2_national_averages — one row per year, official figures - backend/models: Ks2NationalAverage SQLAlchemy model - backend/app: /api/national-averages queries the mart for KS2 by_year; secondary by_year stays computed (no DfE KS4 national dataset yet) - DAG: extract_ks2_national task added to school_data_annual_ees, runs in parallel with the main EES extract Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+46
-11
@@ -708,21 +708,56 @@ async def get_national_averages(request: Request):
|
||||
# Secondary: schools where KS4 data is non-null
|
||||
secondary_df = df_latest[df_latest["attainment_8_score"].notna()]
|
||||
|
||||
# Per-year averages for every year in the dataset (used by chart reference lines)
|
||||
latest_primary = _means(primary_df, ks2_metrics)
|
||||
latest_secondary = _means(secondary_df, ks4_metrics)
|
||||
|
||||
# Per-year KS2 primary averages: use official DfE figures from the mart table.
|
||||
# Per-year KS4 secondary averages: computed from our dataset (no DfE dataset yet).
|
||||
from .database import SessionLocal
|
||||
from .models import Ks2NationalAverage
|
||||
|
||||
by_year = []
|
||||
for yr in sorted(df["year"].dropna().unique()):
|
||||
yr = int(yr)
|
||||
df_yr = df[df["year"] == yr]
|
||||
by_year.append({
|
||||
"year": yr,
|
||||
"primary": _means(df_yr[df_yr["rwm_expected_pct"].notna()], ks2_metrics),
|
||||
"secondary": _means(df_yr[df_yr["attainment_8_score"].notna()], ks4_metrics),
|
||||
})
|
||||
try:
|
||||
db = SessionLocal()
|
||||
nat_rows = db.query(Ks2NationalAverage).order_by(Ks2NationalAverage.year).all()
|
||||
# Build a lookup of computed secondary averages per year as fallback
|
||||
secondary_by_year = {}
|
||||
for yr in sorted(df["year"].dropna().unique()):
|
||||
yr = int(yr)
|
||||
df_yr = df[df["year"] == yr]
|
||||
secondary_by_year[yr] = _means(
|
||||
df_yr[df_yr["attainment_8_score"].notna()], ks4_metrics
|
||||
)
|
||||
# Merge: official KS2 figures + computed KS4 figures per year
|
||||
ks2_years = {r.year for r in nat_rows}
|
||||
all_years = sorted(ks2_years | set(secondary_by_year.keys()))
|
||||
nat_lookup = {r.year: r for r in nat_rows}
|
||||
for yr in all_years:
|
||||
primary_yr: dict = {}
|
||||
if yr in nat_lookup:
|
||||
r = nat_lookup[yr]
|
||||
for col in ks2_metrics:
|
||||
val = getattr(r, col, None)
|
||||
if val is not None:
|
||||
primary_yr[col] = val
|
||||
by_year.append({
|
||||
"year": yr,
|
||||
"primary": primary_yr,
|
||||
"secondary": secondary_by_year.get(yr, {}),
|
||||
})
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Update latest_primary with official DfE figure for the latest year if available
|
||||
if by_year:
|
||||
latest_official = next((e["primary"] for e in reversed(by_year) if e["primary"]), None)
|
||||
if latest_official:
|
||||
latest_primary = latest_official
|
||||
|
||||
return {
|
||||
"year": latest_year,
|
||||
"primary": _means(primary_df, ks2_metrics),
|
||||
"secondary": _means(secondary_df, ks4_metrics),
|
||||
"primary": latest_primary,
|
||||
"secondary": latest_secondary,
|
||||
"by_year": by_year,
|
||||
}
|
||||
|
||||
|
||||
@@ -215,3 +215,25 @@ class FactFinance(Base):
|
||||
teacher_cost_pct = Column(Float)
|
||||
support_staff_cost_pct = Column(Float)
|
||||
premises_cost_pct = Column(Float)
|
||||
|
||||
|
||||
class Ks2NationalAverage(Base):
|
||||
"""Official DfE KS2 national headline averages — one row per academic year."""
|
||||
__tablename__ = "fact_ks2_national_averages"
|
||||
__table_args__ = MARTS
|
||||
|
||||
year = Column(Integer, primary_key=True)
|
||||
rwm_expected_pct = Column(Float)
|
||||
rwm_high_pct = Column(Float)
|
||||
reading_expected_pct = Column(Float)
|
||||
reading_high_pct = Column(Float)
|
||||
reading_avg_score = Column(Float)
|
||||
writing_expected_pct = Column(Float)
|
||||
writing_gd_pct = Column(Float)
|
||||
maths_expected_pct = Column(Float)
|
||||
maths_high_pct = Column(Float)
|
||||
maths_avg_score = Column(Float)
|
||||
gps_expected_pct = Column(Float)
|
||||
gps_high_pct = Column(Float)
|
||||
gps_avg_score = Column(Float)
|
||||
science_expected_pct = Column(Float)
|
||||
|
||||
Reference in New Issue
Block a user