feat(data): integrate 9 UK government data sources via Kestra
Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.
Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
ofsted_parent_view, school_census, admissions, sen_detail, phonics,
school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date
Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry
Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,7 @@ from .data_loader import (
|
|||||||
clear_cache,
|
clear_cache,
|
||||||
load_school_data,
|
load_school_data,
|
||||||
geocode_single_postcode,
|
geocode_single_postcode,
|
||||||
|
get_supplementary_data,
|
||||||
)
|
)
|
||||||
from .data_loader import get_data_info as get_db_info
|
from .data_loader import get_data_info as get_db_info
|
||||||
from .database import check_and_migrate_if_needed
|
from .database import check_and_migrate_if_needed
|
||||||
@@ -384,6 +385,16 @@ async def get_school_details(request: Request, urn: int):
|
|||||||
# Get latest info for the school
|
# Get latest info for the school
|
||||||
latest = school_data.iloc[-1]
|
latest = school_data.iloc[-1]
|
||||||
|
|
||||||
|
# Fetch supplementary data (Ofsted, Parent View, admissions, etc.)
|
||||||
|
from .database import SessionLocal
|
||||||
|
supplementary = {}
|
||||||
|
try:
|
||||||
|
db = SessionLocal()
|
||||||
|
supplementary = get_supplementary_data(db, urn)
|
||||||
|
db.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"school_info": {
|
"school_info": {
|
||||||
"urn": urn,
|
"urn": urn,
|
||||||
@@ -396,8 +407,23 @@ async def get_school_details(request: Request, urn: int):
|
|||||||
"latitude": latest.get("latitude"),
|
"latitude": latest.get("latitude"),
|
||||||
"longitude": latest.get("longitude"),
|
"longitude": latest.get("longitude"),
|
||||||
"phase": "Primary",
|
"phase": "Primary",
|
||||||
|
# GIAS fields
|
||||||
|
"website": latest.get("website"),
|
||||||
|
"headteacher_name": latest.get("headteacher_name"),
|
||||||
|
"capacity": latest.get("capacity"),
|
||||||
|
"trust_name": latest.get("trust_name"),
|
||||||
|
"gender": latest.get("gender"),
|
||||||
},
|
},
|
||||||
"yearly_data": clean_for_json(school_data),
|
"yearly_data": clean_for_json(school_data),
|
||||||
|
# Supplementary data (null if not yet populated by Kestra)
|
||||||
|
"ofsted": supplementary.get("ofsted"),
|
||||||
|
"parent_view": supplementary.get("parent_view"),
|
||||||
|
"census": supplementary.get("census"),
|
||||||
|
"admissions": supplementary.get("admissions"),
|
||||||
|
"sen_detail": supplementary.get("sen_detail"),
|
||||||
|
"phonics": supplementary.get("phonics"),
|
||||||
|
"deprivation": supplementary.get("deprivation"),
|
||||||
|
"finance": supplementary.get("finance"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,11 @@ from sqlalchemy.orm import joinedload, Session
|
|||||||
|
|
||||||
from .config import settings
|
from .config import settings
|
||||||
from .database import SessionLocal, get_db_session
|
from .database import SessionLocal, get_db_session
|
||||||
from .models import School, SchoolResult
|
from .models import (
|
||||||
|
School, SchoolResult,
|
||||||
|
OfstedInspection, OfstedParentView, SchoolCensus,
|
||||||
|
SchoolAdmissions, SenDetail, Phonics, SchoolDeprivation, SchoolFinance,
|
||||||
|
)
|
||||||
from .schemas import SCHOOL_TYPE_MAP
|
from .schemas import SCHOOL_TYPE_MAP
|
||||||
|
|
||||||
# Cache for user search postcode geocoding (not for school data)
|
# Cache for user search postcode geocoding (not for school data)
|
||||||
@@ -381,6 +385,12 @@ def school_to_dict(school: School, include_results: bool = False) -> dict:
|
|||||||
"postcode": school.postcode,
|
"postcode": school.postcode,
|
||||||
"latitude": school.latitude,
|
"latitude": school.latitude,
|
||||||
"longitude": school.longitude,
|
"longitude": school.longitude,
|
||||||
|
# GIAS fields
|
||||||
|
"website": school.website,
|
||||||
|
"headteacher_name": school.headteacher_name,
|
||||||
|
"capacity": school.capacity,
|
||||||
|
"trust_name": school.trust_name,
|
||||||
|
"gender": school.gender,
|
||||||
}
|
}
|
||||||
|
|
||||||
if include_results and school.results:
|
if include_results and school.results:
|
||||||
@@ -455,8 +465,25 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
|||||||
# Query all schools with their results
|
# Query all schools with their results
|
||||||
schools = db.query(School).options(joinedload(School.results)).all()
|
schools = db.query(School).options(joinedload(School.results)).all()
|
||||||
|
|
||||||
|
# Load Ofsted data into a lookup dict (urn → grade, date)
|
||||||
|
ofsted_lookup: Dict[int, dict] = {}
|
||||||
|
try:
|
||||||
|
ofsted_rows = db.query(
|
||||||
|
OfstedInspection.urn,
|
||||||
|
OfstedInspection.overall_effectiveness,
|
||||||
|
OfstedInspection.inspection_date,
|
||||||
|
).all()
|
||||||
|
for o in ofsted_rows:
|
||||||
|
ofsted_lookup[o.urn] = {
|
||||||
|
"ofsted_grade": o.overall_effectiveness,
|
||||||
|
"ofsted_date": o.inspection_date.isoformat() if o.inspection_date else None,
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass # Table may not exist yet on first run
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
for school in schools:
|
for school in schools:
|
||||||
|
ofsted = ofsted_lookup.get(school.urn, {})
|
||||||
for result in school.results:
|
for result in school.results:
|
||||||
row = {
|
row = {
|
||||||
"urn": school.urn,
|
"urn": school.urn,
|
||||||
@@ -468,6 +495,15 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
|
|||||||
"postcode": school.postcode,
|
"postcode": school.postcode,
|
||||||
"latitude": school.latitude,
|
"latitude": school.latitude,
|
||||||
"longitude": school.longitude,
|
"longitude": school.longitude,
|
||||||
|
# GIAS fields
|
||||||
|
"website": school.website,
|
||||||
|
"headteacher_name": school.headteacher_name,
|
||||||
|
"capacity": school.capacity,
|
||||||
|
"trust_name": school.trust_name,
|
||||||
|
"gender": school.gender,
|
||||||
|
# Ofsted (for list view)
|
||||||
|
"ofsted_grade": ofsted.get("ofsted_grade"),
|
||||||
|
"ofsted_date": ofsted.get("ofsted_date"),
|
||||||
**result_to_dict(result)
|
**result_to_dict(result)
|
||||||
}
|
}
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
@@ -511,3 +547,126 @@ def clear_cache():
|
|||||||
"""Clear all caches."""
|
"""Clear all caches."""
|
||||||
global _df_cache
|
global _df_cache
|
||||||
_df_cache = None
|
_df_cache = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_supplementary_data(db: Session, urn: int) -> dict:
|
||||||
|
"""
|
||||||
|
Fetch all supplementary data for a single school URN.
|
||||||
|
Returns a dict with keys: ofsted, parent_view, census, admissions, sen_detail,
|
||||||
|
phonics, deprivation, finance. Values are dicts or None.
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
def safe_query(model, pk_field, latest_year_field=None):
|
||||||
|
try:
|
||||||
|
if latest_year_field:
|
||||||
|
row = (
|
||||||
|
db.query(model)
|
||||||
|
.filter(getattr(model, pk_field) == urn)
|
||||||
|
.order_by(getattr(model, latest_year_field).desc())
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
row = db.query(model).filter(getattr(model, pk_field) == urn).first()
|
||||||
|
return row
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Ofsted inspection
|
||||||
|
o = safe_query(OfstedInspection, "urn")
|
||||||
|
result["ofsted"] = {
|
||||||
|
"overall_effectiveness": o.overall_effectiveness,
|
||||||
|
"quality_of_education": o.quality_of_education,
|
||||||
|
"behaviour_attitudes": o.behaviour_attitudes,
|
||||||
|
"personal_development": o.personal_development,
|
||||||
|
"leadership_management": o.leadership_management,
|
||||||
|
"early_years_provision": o.early_years_provision,
|
||||||
|
"previous_overall": o.previous_overall,
|
||||||
|
"inspection_date": o.inspection_date.isoformat() if o.inspection_date else None,
|
||||||
|
"inspection_type": o.inspection_type,
|
||||||
|
} if o else None
|
||||||
|
|
||||||
|
# Parent View
|
||||||
|
pv = safe_query(OfstedParentView, "urn")
|
||||||
|
result["parent_view"] = {
|
||||||
|
"survey_date": pv.survey_date.isoformat() if pv.survey_date else None,
|
||||||
|
"total_responses": pv.total_responses,
|
||||||
|
"q_happy_pct": pv.q_happy_pct,
|
||||||
|
"q_safe_pct": pv.q_safe_pct,
|
||||||
|
"q_behaviour_pct": pv.q_behaviour_pct,
|
||||||
|
"q_bullying_pct": pv.q_bullying_pct,
|
||||||
|
"q_communication_pct": pv.q_communication_pct,
|
||||||
|
"q_progress_pct": pv.q_progress_pct,
|
||||||
|
"q_teaching_pct": pv.q_teaching_pct,
|
||||||
|
"q_information_pct": pv.q_information_pct,
|
||||||
|
"q_curriculum_pct": pv.q_curriculum_pct,
|
||||||
|
"q_future_pct": pv.q_future_pct,
|
||||||
|
"q_leadership_pct": pv.q_leadership_pct,
|
||||||
|
"q_wellbeing_pct": pv.q_wellbeing_pct,
|
||||||
|
"q_recommend_pct": pv.q_recommend_pct,
|
||||||
|
"q_sen_pct": pv.q_sen_pct,
|
||||||
|
} if pv else None
|
||||||
|
|
||||||
|
# School Census (latest year)
|
||||||
|
c = safe_query(SchoolCensus, "urn", "year")
|
||||||
|
result["census"] = {
|
||||||
|
"year": c.year,
|
||||||
|
"class_size_avg": c.class_size_avg,
|
||||||
|
"ethnicity_white_pct": c.ethnicity_white_pct,
|
||||||
|
"ethnicity_asian_pct": c.ethnicity_asian_pct,
|
||||||
|
"ethnicity_black_pct": c.ethnicity_black_pct,
|
||||||
|
"ethnicity_mixed_pct": c.ethnicity_mixed_pct,
|
||||||
|
"ethnicity_other_pct": c.ethnicity_other_pct,
|
||||||
|
} if c else None
|
||||||
|
|
||||||
|
# Admissions (latest year)
|
||||||
|
a = safe_query(SchoolAdmissions, "urn", "year")
|
||||||
|
result["admissions"] = {
|
||||||
|
"year": a.year,
|
||||||
|
"published_admission_number": a.published_admission_number,
|
||||||
|
"total_applications": a.total_applications,
|
||||||
|
"first_preference_offers_pct": a.first_preference_offers_pct,
|
||||||
|
"oversubscribed": a.oversubscribed,
|
||||||
|
} if a else None
|
||||||
|
|
||||||
|
# SEN Detail (latest year)
|
||||||
|
s = safe_query(SenDetail, "urn", "year")
|
||||||
|
result["sen_detail"] = {
|
||||||
|
"year": s.year,
|
||||||
|
"primary_need_speech_pct": s.primary_need_speech_pct,
|
||||||
|
"primary_need_autism_pct": s.primary_need_autism_pct,
|
||||||
|
"primary_need_mld_pct": s.primary_need_mld_pct,
|
||||||
|
"primary_need_spld_pct": s.primary_need_spld_pct,
|
||||||
|
"primary_need_semh_pct": s.primary_need_semh_pct,
|
||||||
|
"primary_need_physical_pct": s.primary_need_physical_pct,
|
||||||
|
"primary_need_other_pct": s.primary_need_other_pct,
|
||||||
|
} if s else None
|
||||||
|
|
||||||
|
# Phonics (latest year)
|
||||||
|
ph = safe_query(Phonics, "urn", "year")
|
||||||
|
result["phonics"] = {
|
||||||
|
"year": ph.year,
|
||||||
|
"year1_phonics_pct": ph.year1_phonics_pct,
|
||||||
|
"year2_phonics_pct": ph.year2_phonics_pct,
|
||||||
|
} if ph else None
|
||||||
|
|
||||||
|
# Deprivation
|
||||||
|
d = safe_query(SchoolDeprivation, "urn")
|
||||||
|
result["deprivation"] = {
|
||||||
|
"lsoa_code": d.lsoa_code,
|
||||||
|
"idaci_score": d.idaci_score,
|
||||||
|
"idaci_decile": d.idaci_decile,
|
||||||
|
} if d else None
|
||||||
|
|
||||||
|
# Finance (latest year)
|
||||||
|
f = safe_query(SchoolFinance, "urn", "year")
|
||||||
|
result["finance"] = {
|
||||||
|
"year": f.year,
|
||||||
|
"per_pupil_spend": f.per_pupil_spend,
|
||||||
|
"staff_cost_pct": f.staff_cost_pct,
|
||||||
|
"teacher_cost_pct": f.teacher_cost_pct,
|
||||||
|
"support_staff_cost_pct": f.support_staff_cost_pct,
|
||||||
|
"premises_cost_pct": f.premises_cost_pct,
|
||||||
|
} if f else None
|
||||||
|
|
||||||
|
return result
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
from sqlalchemy import (
|
from sqlalchemy import (
|
||||||
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
|
Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
|
||||||
Text, Boolean, DateTime
|
Text, Boolean, DateTime, Date
|
||||||
)
|
)
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
from .database import Base
|
from .database import Base
|
||||||
@@ -39,6 +39,15 @@ class School(Base):
|
|||||||
latitude = Column(Float)
|
latitude = Column(Float)
|
||||||
longitude = Column(Float)
|
longitude = Column(Float)
|
||||||
|
|
||||||
|
# GIAS enrichment fields
|
||||||
|
website = Column(String(255))
|
||||||
|
headteacher_name = Column(String(200))
|
||||||
|
capacity = Column(Integer)
|
||||||
|
trust_name = Column(String(255))
|
||||||
|
trust_uid = Column(String(20))
|
||||||
|
gender = Column(String(20)) # Mixed / Girls / Boys
|
||||||
|
nursery_provision = Column(Boolean)
|
||||||
|
|
||||||
# Relationships
|
# Relationships
|
||||||
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
|
results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
|
||||||
|
|
||||||
@@ -150,6 +159,169 @@ class SchemaVersion(Base):
|
|||||||
return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"
|
return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Supplementary data tables (populated by the Kestra data integrator)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class OfstedInspection(Base):
|
||||||
|
"""Latest Ofsted inspection judgement per school."""
|
||||||
|
__tablename__ = "ofsted_inspections"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
inspection_date = Column(Date)
|
||||||
|
publication_date = Column(Date)
|
||||||
|
inspection_type = Column(String(100)) # Section 5 / Section 8 etc.
|
||||||
|
# 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate
|
||||||
|
overall_effectiveness = Column(Integer)
|
||||||
|
quality_of_education = Column(Integer)
|
||||||
|
behaviour_attitudes = Column(Integer)
|
||||||
|
personal_development = Column(Integer)
|
||||||
|
leadership_management = Column(Integer)
|
||||||
|
early_years_provision = Column(Integer) # nullable — not all schools
|
||||||
|
previous_overall = Column(Integer) # for trend display
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<OfstedInspection(urn={self.urn}, overall={self.overall_effectiveness})>"
|
||||||
|
|
||||||
|
|
||||||
|
class OfstedParentView(Base):
|
||||||
|
"""Ofsted Parent View survey — latest per school. 14 questions, % saying Yes."""
|
||||||
|
__tablename__ = "ofsted_parent_view"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
survey_date = Column(Date)
|
||||||
|
total_responses = Column(Integer)
|
||||||
|
q_happy_pct = Column(Float) # My child is happy at this school
|
||||||
|
q_safe_pct = Column(Float) # My child feels safe at this school
|
||||||
|
q_bullying_pct = Column(Float) # School deals with bullying well
|
||||||
|
q_communication_pct = Column(Float) # School keeps me informed
|
||||||
|
q_progress_pct = Column(Float) # My child does well / good progress
|
||||||
|
q_teaching_pct = Column(Float) # Teaching is good
|
||||||
|
q_information_pct = Column(Float) # I receive valuable info about progress
|
||||||
|
q_curriculum_pct = Column(Float) # Broad range of subjects taught
|
||||||
|
q_future_pct = Column(Float) # Prepares child well for the future
|
||||||
|
q_leadership_pct = Column(Float) # Led and managed effectively
|
||||||
|
q_wellbeing_pct = Column(Float) # Supports wider personal development
|
||||||
|
q_behaviour_pct = Column(Float) # Pupils are well behaved
|
||||||
|
q_recommend_pct = Column(Float) # I would recommend this school
|
||||||
|
q_sen_pct = Column(Float) # Good information about child's SEN (where applicable)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<OfstedParentView(urn={self.urn}, responses={self.total_responses})>"
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolCensus(Base):
|
||||||
|
"""Annual school census snapshot — class sizes and ethnicity breakdown."""
|
||||||
|
__tablename__ = "school_census"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
year = Column(Integer, primary_key=True)
|
||||||
|
class_size_avg = Column(Float)
|
||||||
|
ethnicity_white_pct = Column(Float)
|
||||||
|
ethnicity_asian_pct = Column(Float)
|
||||||
|
ethnicity_black_pct = Column(Float)
|
||||||
|
ethnicity_mixed_pct = Column(Float)
|
||||||
|
ethnicity_other_pct = Column(Float)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index('ix_school_census_urn_year', 'urn', 'year'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SchoolCensus(urn={self.urn}, year={self.year})>"
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolAdmissions(Base):
|
||||||
|
"""Annual admissions statistics per school."""
|
||||||
|
__tablename__ = "school_admissions"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
year = Column(Integer, primary_key=True)
|
||||||
|
published_admission_number = Column(Integer) # PAN
|
||||||
|
total_applications = Column(Integer)
|
||||||
|
first_preference_offers_pct = Column(Float) # % receiving 1st choice
|
||||||
|
oversubscribed = Column(Boolean)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index('ix_school_admissions_urn_year', 'urn', 'year'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SchoolAdmissions(urn={self.urn}, year={self.year})>"
|
||||||
|
|
||||||
|
|
||||||
|
class SenDetail(Base):
|
||||||
|
"""SEN primary need type breakdown — more granular than school_results context fields."""
|
||||||
|
__tablename__ = "sen_detail"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
year = Column(Integer, primary_key=True)
|
||||||
|
primary_need_speech_pct = Column(Float) # SLCN
|
||||||
|
primary_need_autism_pct = Column(Float) # ASD
|
||||||
|
primary_need_mld_pct = Column(Float) # Moderate learning difficulty
|
||||||
|
primary_need_spld_pct = Column(Float) # Specific learning difficulty (dyslexia etc.)
|
||||||
|
primary_need_semh_pct = Column(Float) # Social, emotional, mental health
|
||||||
|
primary_need_physical_pct = Column(Float) # Physical/sensory
|
||||||
|
primary_need_other_pct = Column(Float)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index('ix_sen_detail_urn_year', 'urn', 'year'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SenDetail(urn={self.urn}, year={self.year})>"
|
||||||
|
|
||||||
|
|
||||||
|
class Phonics(Base):
|
||||||
|
"""Phonics Screening Check pass rates."""
|
||||||
|
__tablename__ = "phonics"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
year = Column(Integer, primary_key=True)
|
||||||
|
year1_phonics_pct = Column(Float) # % reaching expected standard in Year 1
|
||||||
|
year2_phonics_pct = Column(Float) # % reaching standard in Year 2 (re-takers)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index('ix_phonics_urn_year', 'urn', 'year'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Phonics(urn={self.urn}, year={self.year})>"
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolDeprivation(Base):
|
||||||
|
"""IDACI deprivation index — derived via postcode → LSOA lookup."""
|
||||||
|
__tablename__ = "school_deprivation"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
lsoa_code = Column(String(20))
|
||||||
|
idaci_score = Column(Float) # 0–1, higher = more deprived
|
||||||
|
idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SchoolDeprivation(urn={self.urn}, decile={self.idaci_decile})>"
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolFinance(Base):
|
||||||
|
"""FBIT financial benchmarking data."""
|
||||||
|
__tablename__ = "school_finance"
|
||||||
|
|
||||||
|
urn = Column(Integer, primary_key=True)
|
||||||
|
year = Column(Integer, primary_key=True)
|
||||||
|
per_pupil_spend = Column(Float) # £ total expenditure per pupil
|
||||||
|
staff_cost_pct = Column(Float) # % of budget on all staff
|
||||||
|
teacher_cost_pct = Column(Float) # % on teachers specifically
|
||||||
|
support_staff_cost_pct = Column(Float)
|
||||||
|
premises_cost_pct = Column(Float)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index('ix_school_finance_urn_year', 'urn', 'year'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SchoolFinance(urn={self.urn}, year={self.year})>"
|
||||||
|
|
||||||
|
|
||||||
# Mapping from CSV columns to model fields
|
# Mapping from CSV columns to model fields
|
||||||
SCHOOL_FIELD_MAPPING = {
|
SCHOOL_FIELD_MAPPING = {
|
||||||
'urn': 'urn',
|
'urn': 'urn',
|
||||||
|
|||||||
@@ -13,10 +13,11 @@ WHEN TO BUMP:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Current schema version - increment when models change
|
# Current schema version - increment when models change
|
||||||
SCHEMA_VERSION = 2
|
SCHEMA_VERSION = 3
|
||||||
|
|
||||||
# Changelog for documentation
|
# Changelog for documentation
|
||||||
SCHEMA_CHANGELOG = {
|
SCHEMA_CHANGELOG = {
|
||||||
1: "Initial schema with School and SchoolResult tables",
|
1: "Initial schema with School and SchoolResult tables",
|
||||||
2: "Added pupil absence fields (reading, maths, gps, writing, science)",
|
2: "Added pupil absence fields (reading, maths, gps, writing, science)",
|
||||||
|
3: "Added supplementary data tables: ofsted, parent_view, census, admissions, sen_detail, phonics, deprivation, finance; GIAS columns on schools",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,9 +77,70 @@ services:
|
|||||||
retries: 3
|
retries: 3
|
||||||
start_period: 40s
|
start_period: 40s
|
||||||
|
|
||||||
|
# Kestra — workflow orchestrator (UI at http://localhost:8080)
|
||||||
|
kestra:
|
||||||
|
image: kestra/kestra:latest
|
||||||
|
container_name: schoolcompare_kestra
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
volumes:
|
||||||
|
- kestra_storage:/app/storage
|
||||||
|
- ./integrator/flows:/flows
|
||||||
|
environment:
|
||||||
|
KESTRA_CONFIGURATION: |
|
||||||
|
datasources:
|
||||||
|
postgres:
|
||||||
|
url: jdbc:postgresql://db:5432/kestra
|
||||||
|
driverClassName: org.postgresql.Driver
|
||||||
|
username: schoolcompare
|
||||||
|
password: schoolcompare
|
||||||
|
kestra:
|
||||||
|
repository:
|
||||||
|
type: postgres
|
||||||
|
queue:
|
||||||
|
type: postgres
|
||||||
|
storage:
|
||||||
|
type: local
|
||||||
|
local:
|
||||||
|
base-path: /app/storage
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- schoolcompare-network
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# Data integrator — Python microservice called by Kestra
|
||||||
|
integrator:
|
||||||
|
build:
|
||||||
|
context: ./integrator
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: schoolcompare_integrator
|
||||||
|
ports:
|
||||||
|
- "8001:8001"
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare
|
||||||
|
DATA_DIR: /data
|
||||||
|
PYTHONUNBUFFERED: 1
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- schoolcompare-network
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
schoolcompare-network:
|
schoolcompare-network:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
|
kestra_storage:
|
||||||
|
|||||||
13
integrator/Dockerfile
Normal file
13
integrator/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY scripts/ ./scripts/
|
||||||
|
COPY server.py .
|
||||||
|
|
||||||
|
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8001"]
|
||||||
25
integrator/flows/admissions.yml
Normal file
25
integrator/flows/admissions.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: admissions-annual-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load school admissions data via EES API
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 4 1 7 *" # 1 July annually at 04:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/admissions?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT20M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/admissions?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT15M
|
||||||
25
integrator/flows/census.yml
Normal file
25
integrator/flows/census.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: census-annual-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load School Census (SPC) data via EES API
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 4 1 9 *" # 1 September annually at 04:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/census?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT20M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/census?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT15M
|
||||||
25
integrator/flows/finance.yml
Normal file
25
integrator/flows/finance.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: finance-annual-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Fetch FBIT financial benchmarking data from DfE API for all schools
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 4 1 12 *" # 1 December annually at 04:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/finance?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT120M # Fetches per-school from API — ~20k schools
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/finance?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 2
|
||||||
|
delay: PT30M
|
||||||
30
integrator/flows/gias.yml
Normal file
30
integrator/flows/gias.yml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
id: gias-weekly-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load GIAS (Get Information About Schools) bulk CSV
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: weekly-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 3 * * 0" # Every Sunday at 03:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/gias?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/gias?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
errors:
|
||||||
|
- id: notify-failure
|
||||||
|
type: io.kestra.plugin.core.log.Log
|
||||||
|
message: "GIAS update FAILED: {{ error.message }}"
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT10M
|
||||||
25
integrator/flows/idaci.yml
Normal file
25
integrator/flows/idaci.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: idaci-annual-check
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download IoD2019 IDACI file and compute deprivation scores for all schools
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 5 1 1 *" # 1 January annually at 05:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/idaci?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT10M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/idaci?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT60M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 2
|
||||||
|
delay: PT30M
|
||||||
32
integrator/flows/ofsted.yml
Normal file
32
integrator/flows/ofsted.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
id: ofsted-monthly-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load Ofsted Monthly Management Information CSV
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: monthly-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 2 1 * *" # 1st of each month at 02:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/ofsted?action=download
|
||||||
|
method: POST
|
||||||
|
allowFailed: false
|
||||||
|
timeout: PT10M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/ofsted?action=load
|
||||||
|
method: POST
|
||||||
|
allowFailed: false
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
errors:
|
||||||
|
- id: notify-failure
|
||||||
|
type: io.kestra.plugin.core.log.Log
|
||||||
|
message: "Ofsted update FAILED: {{ error.message }}"
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT10M
|
||||||
30
integrator/flows/parent_view.yml
Normal file
30
integrator/flows/parent_view.yml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
id: parent-view-monthly-check
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load Ofsted Parent View open data (released ~3x/year)
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: monthly-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 3 1 * *" # 1st of each month at 03:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/parent_view?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT10M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/parent_view?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT20M
|
||||||
|
|
||||||
|
errors:
|
||||||
|
- id: notify-failure
|
||||||
|
type: io.kestra.plugin.core.log.Log
|
||||||
|
message: "Parent View update FAILED: {{ error.message }}"
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT10M
|
||||||
25
integrator/flows/phonics.yml
Normal file
25
integrator/flows/phonics.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: phonics-annual-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load Phonics Screening Check data via EES API
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 5 1 9 *" # 1 September annually at 05:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/phonics?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT20M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/phonics?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT15M
|
||||||
25
integrator/flows/sen_detail.yml
Normal file
25
integrator/flows/sen_detail.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
id: sen-detail-annual-update
|
||||||
|
namespace: schoolcompare.data
|
||||||
|
description: Download and load SEN primary need breakdown via EES API
|
||||||
|
|
||||||
|
triggers:
|
||||||
|
- id: annual-schedule
|
||||||
|
type: io.kestra.plugin.core.trigger.Schedule
|
||||||
|
cron: "0 4 15 9 *" # 15 September annually at 04:00
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- id: download
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/sen_detail?action=download
|
||||||
|
method: POST
|
||||||
|
timeout: PT20M
|
||||||
|
|
||||||
|
- id: load
|
||||||
|
type: io.kestra.plugin.core.http.Request
|
||||||
|
uri: http://integrator:8001/run/sen_detail?action=load
|
||||||
|
method: POST
|
||||||
|
timeout: PT30M
|
||||||
|
|
||||||
|
retry:
|
||||||
|
maxAttempts: 3
|
||||||
|
delay: PT15M
|
||||||
7
integrator/requirements.txt
Normal file
7
integrator/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
fastapi==0.115.0
|
||||||
|
uvicorn[standard]==0.30.6
|
||||||
|
requests==2.32.3
|
||||||
|
pandas==2.2.3
|
||||||
|
openpyxl==3.1.5
|
||||||
|
psycopg2-binary==2.9.9
|
||||||
|
sqlalchemy==2.0.35
|
||||||
11
integrator/scripts/config.py
Normal file
11
integrator/scripts/config.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""Configuration for the data integrator."""
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DATABASE_URL = os.environ.get(
|
||||||
|
"DATABASE_URL",
|
||||||
|
"postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare",
|
||||||
|
)
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "/data"))
|
||||||
|
SUPPLEMENTARY_DIR = DATA_DIR / "supplementary"
|
||||||
23
integrator/scripts/db.py
Normal file
23
integrator/scripts/db.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
"""Database connection for the integrator."""
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
||||||
|
from config import DATABASE_URL
|
||||||
|
|
||||||
|
engine = create_engine(DATABASE_URL, pool_pre_ping=True)
|
||||||
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_session():
|
||||||
|
session = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield session
|
||||||
|
session.commit()
|
||||||
|
except Exception:
|
||||||
|
session.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
0
integrator/scripts/sources/__init__.py
Normal file
0
integrator/scripts/sources/__init__.py
Normal file
158
integrator/scripts/sources/admissions.py
Normal file
158
integrator/scripts/sources/admissions.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
"""
|
||||||
|
School Admissions data downloader and loader.
|
||||||
|
|
||||||
|
Source: EES publication "secondary-and-primary-school-applications-and-offers"
|
||||||
|
Update: Annual (June/July post-offer round)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
from sources.ees import get_latest_csv_url, download_csv
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "admissions"
|
||||||
|
PUBLICATION_SLUG = "secondary-and-primary-school-applications-and-offers"
|
||||||
|
|
||||||
|
NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
|
||||||
|
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"urn": "urn",
|
||||||
|
"YEAR": "year",
|
||||||
|
"Year": "year",
|
||||||
|
# PAN
|
||||||
|
"PAN": "pan",
|
||||||
|
"published_admission_number": "pan",
|
||||||
|
"admissions_number": "pan",
|
||||||
|
# Applications
|
||||||
|
"total_applications": "total_applications",
|
||||||
|
"TAPP": "total_applications",
|
||||||
|
"applications_received": "total_applications",
|
||||||
|
# 1st preference offers
|
||||||
|
"first_preference_offers_pct": "first_preference_offers_pct",
|
||||||
|
"pct_1st_preference": "first_preference_offers_pct",
|
||||||
|
"PT1PREF": "first_preference_offers_pct",
|
||||||
|
# Oversubscription
|
||||||
|
"oversubscribed": "oversubscribed",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "admissions") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG, keyword="primary")
|
||||||
|
if not url:
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG)
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError("Could not find CSV URL for admissions publication")
|
||||||
|
|
||||||
|
filename = url.split("/")[-1].split("?")[0] or "admissions_latest.csv"
|
||||||
|
return download_csv(url, dest / filename)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_int(val) -> int | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
s = str(val).strip().upper().replace(",", "")
|
||||||
|
if s in NULL_VALUES:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(float(s))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pct(val) -> float | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
s = str(val).strip().upper().replace("%", "")
|
||||||
|
if s in NULL_VALUES:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "admissions") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No admissions CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" Admissions: loading {path} ...")
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
year = None
|
||||||
|
m = re.search(r"20(\d{2})", path.stem)
|
||||||
|
if m:
|
||||||
|
year = int("20" + m.group(1))
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
|
||||||
|
if not row_year:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pan = _parse_int(row.get("pan"))
|
||||||
|
total_apps = _parse_int(row.get("total_applications"))
|
||||||
|
pct_1st = _parse_pct(row.get("first_preference_offers_pct"))
|
||||||
|
oversubscribed = bool(row.get("oversubscribed")) if pd.notna(row.get("oversubscribed")) else (
|
||||||
|
True if (pan and total_apps and total_apps > pan) else None
|
||||||
|
)
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO school_admissions
|
||||||
|
(urn, year, published_admission_number, total_applications,
|
||||||
|
first_preference_offers_pct, oversubscribed)
|
||||||
|
VALUES (:urn, :year, :pan, :total_apps, :pct_1st, :oversubscribed)
|
||||||
|
ON CONFLICT (urn, year) DO UPDATE SET
|
||||||
|
published_admission_number = EXCLUDED.published_admission_number,
|
||||||
|
total_applications = EXCLUDED.total_applications,
|
||||||
|
first_preference_offers_pct = EXCLUDED.first_preference_offers_pct,
|
||||||
|
oversubscribed = EXCLUDED.oversubscribed
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": urn, "year": row_year, "pan": pan,
|
||||||
|
"total_apps": total_apps, "pct_1st": pct_1st,
|
||||||
|
"oversubscribed": oversubscribed,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" Admissions: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
148
integrator/scripts/sources/census.py
Normal file
148
integrator/scripts/sources/census.py
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
"""
|
||||||
|
School Census (SPC) downloader and loader.
|
||||||
|
|
||||||
|
Source: EES publication "schools-pupils-and-their-characteristics"
|
||||||
|
Update: Annual (June)
|
||||||
|
Adds: class_size_avg, ethnicity breakdown by school
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
from sources.ees import get_latest_csv_url, download_csv
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "census"
|
||||||
|
PUBLICATION_SLUG = "schools-pupils-and-their-characteristics"
|
||||||
|
|
||||||
|
NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
|
||||||
|
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"urn": "urn",
|
||||||
|
"YEAR": "year",
|
||||||
|
"Year": "year",
|
||||||
|
# Class size
|
||||||
|
"average_class_size": "class_size_avg",
|
||||||
|
"AVCLAS": "class_size_avg",
|
||||||
|
"avg_class_size": "class_size_avg",
|
||||||
|
# Ethnicity — DfE uses ethnicity major group percentages
|
||||||
|
"perc_white": "ethnicity_white_pct",
|
||||||
|
"perc_asian": "ethnicity_asian_pct",
|
||||||
|
"perc_black": "ethnicity_black_pct",
|
||||||
|
"perc_mixed": "ethnicity_mixed_pct",
|
||||||
|
"perc_other_ethnic": "ethnicity_other_pct",
|
||||||
|
"PTWHITE": "ethnicity_white_pct",
|
||||||
|
"PTASIAN": "ethnicity_asian_pct",
|
||||||
|
"PTBLACK": "ethnicity_black_pct",
|
||||||
|
"PTMIXED": "ethnicity_mixed_pct",
|
||||||
|
"PTOTHER": "ethnicity_other_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "census") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError(f"Could not find CSV URL for census publication")
|
||||||
|
|
||||||
|
filename = url.split("/")[-1].split("?")[0] or "census_latest.csv"
|
||||||
|
return download_csv(url, dest / filename)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pct(val) -> float | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
s = str(val).strip().upper().replace("%", "")
|
||||||
|
if s in NULL_VALUES:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "census") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No census CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" Census: loading {path} ...")
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
year = None
|
||||||
|
m = re.search(r"20(\d{2})", path.stem)
|
||||||
|
if m:
|
||||||
|
year = int("20" + m.group(1))
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
|
||||||
|
if not row_year:
|
||||||
|
continue
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO school_census
|
||||||
|
(urn, year, class_size_avg,
|
||||||
|
ethnicity_white_pct, ethnicity_asian_pct, ethnicity_black_pct,
|
||||||
|
ethnicity_mixed_pct, ethnicity_other_pct)
|
||||||
|
VALUES (:urn, :year, :class_size_avg,
|
||||||
|
:white, :asian, :black, :mixed, :other)
|
||||||
|
ON CONFLICT (urn, year) DO UPDATE SET
|
||||||
|
class_size_avg = EXCLUDED.class_size_avg,
|
||||||
|
ethnicity_white_pct = EXCLUDED.ethnicity_white_pct,
|
||||||
|
ethnicity_asian_pct = EXCLUDED.ethnicity_asian_pct,
|
||||||
|
ethnicity_black_pct = EXCLUDED.ethnicity_black_pct,
|
||||||
|
ethnicity_mixed_pct = EXCLUDED.ethnicity_mixed_pct,
|
||||||
|
ethnicity_other_pct = EXCLUDED.ethnicity_other_pct
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": urn,
|
||||||
|
"year": row_year,
|
||||||
|
"class_size_avg": _parse_pct(row.get("class_size_avg")),
|
||||||
|
"white": _parse_pct(row.get("ethnicity_white_pct")),
|
||||||
|
"asian": _parse_pct(row.get("ethnicity_asian_pct")),
|
||||||
|
"black": _parse_pct(row.get("ethnicity_black_pct")),
|
||||||
|
"mixed": _parse_pct(row.get("ethnicity_mixed_pct")),
|
||||||
|
"other": _parse_pct(row.get("ethnicity_other_pct")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" Census: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
53
integrator/scripts/sources/ees.py
Normal file
53
integrator/scripts/sources/ees.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
"""
|
||||||
|
Shared EES (Explore Education Statistics) API client.
|
||||||
|
|
||||||
|
Base URL: https://api.education.gov.uk/statistics/v1
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
API_BASE = "https://api.education.gov.uk/statistics/v1"
|
||||||
|
TIMEOUT = 60
|
||||||
|
|
||||||
|
|
||||||
|
def get_publication_files(publication_slug: str) -> list[dict]:
|
||||||
|
"""Return list of data-set file descriptors for a publication."""
|
||||||
|
url = f"{API_BASE}/publications/{publication_slug}/data-set-files"
|
||||||
|
resp = requests.get(url, timeout=TIMEOUT)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json().get("results", [])
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_csv_url(publication_slug: str, keyword: str = "") -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Find the most recent CSV download URL for a publication.
|
||||||
|
Optionally filter by a keyword in the file name.
|
||||||
|
"""
|
||||||
|
files = get_publication_files(publication_slug)
|
||||||
|
for entry in files:
|
||||||
|
name = entry.get("name", "").lower()
|
||||||
|
if keyword and keyword.lower() not in name:
|
||||||
|
continue
|
||||||
|
csv_url = entry.get("csvDownloadUrl") or entry.get("file", {}).get("url")
|
||||||
|
if csv_url:
|
||||||
|
return csv_url
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_csv(url: str, dest_path: Path) -> Path:
|
||||||
|
"""Download a CSV from EES to dest_path."""
|
||||||
|
if dest_path.exists():
|
||||||
|
print(f" EES: {dest_path.name} already exists, skipping.")
|
||||||
|
return dest_path
|
||||||
|
print(f" EES: downloading {url} ...")
|
||||||
|
resp = requests.get(url, timeout=300, stream=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(dest_path, "wb") as f:
|
||||||
|
for chunk in resp.iter_content(chunk_size=65536):
|
||||||
|
f.write(chunk)
|
||||||
|
print(f" EES: saved {dest_path} ({dest_path.stat().st_size // 1024} KB)")
|
||||||
|
return dest_path
|
||||||
143
integrator/scripts/sources/finance.py
Normal file
143
integrator/scripts/sources/finance.py
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
"""
|
||||||
|
FBIT (Financial Benchmarking and Insights Tool) financial data loader.
|
||||||
|
|
||||||
|
Source: https://schools-financial-benchmarking.service.gov.uk/api/
|
||||||
|
Update: Annual (December — data for the prior financial year)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "finance"
|
||||||
|
API_BASE = "https://schools-financial-benchmarking.service.gov.uk/api"
|
||||||
|
RATE_LIMIT_DELAY = 0.1 # seconds between requests
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
"""
|
||||||
|
Fetch per-URN financial data from FBIT API and save as CSV.
|
||||||
|
Batches all school URNs from the database.
|
||||||
|
"""
|
||||||
|
dest = (data_dir / "supplementary" / "finance") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Determine year from API (use current year minus 1 for completed financials)
|
||||||
|
from datetime import date
|
||||||
|
year = date.today().year - 1
|
||||||
|
dest_file = dest / f"fbit_{year}.csv"
|
||||||
|
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" Finance: {dest_file.name} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
# Get all URNs from the database
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
rows = session.execute(text("SELECT urn FROM schools")).fetchall()
|
||||||
|
urns = [r[0] for r in rows]
|
||||||
|
print(f" Finance: fetching FBIT data for {len(urns)} schools (year {year}) ...")
|
||||||
|
|
||||||
|
records = []
|
||||||
|
errors = 0
|
||||||
|
for i, urn in enumerate(urns):
|
||||||
|
if i % 500 == 0:
|
||||||
|
print(f" {i}/{len(urns)} ...")
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
f"{API_BASE}/schoolFinancialDataObject/{urn}",
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
if data:
|
||||||
|
records.append({
|
||||||
|
"urn": urn,
|
||||||
|
"year": year,
|
||||||
|
"per_pupil_spend": data.get("totalExpenditure") and
|
||||||
|
data.get("numberOfPupils") and
|
||||||
|
round(data["totalExpenditure"] / data["numberOfPupils"], 2),
|
||||||
|
"staff_cost_pct": data.get("staffCostPercent"),
|
||||||
|
"teacher_cost_pct": data.get("teachingStaffCostPercent"),
|
||||||
|
"support_staff_cost_pct": data.get("educationSupportStaffCostPercent"),
|
||||||
|
"premises_cost_pct": data.get("premisesStaffCostPercent"),
|
||||||
|
})
|
||||||
|
elif resp.status_code not in (404, 400):
|
||||||
|
errors += 1
|
||||||
|
except Exception:
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
time.sleep(RATE_LIMIT_DELAY)
|
||||||
|
|
||||||
|
df = pd.DataFrame(records)
|
||||||
|
df.to_csv(dest_file, index=False)
|
||||||
|
print(f" Finance: saved {len(records)} records to {dest_file} ({errors} errors)")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "finance") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("fbit_*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No finance CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" Finance: loading {path} ...")
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO school_finance
|
||||||
|
(urn, year, per_pupil_spend, staff_cost_pct, teacher_cost_pct,
|
||||||
|
support_staff_cost_pct, premises_cost_pct)
|
||||||
|
VALUES (:urn, :year, :per_pupil, :staff, :teacher, :support, :premises)
|
||||||
|
ON CONFLICT (urn, year) DO UPDATE SET
|
||||||
|
per_pupil_spend = EXCLUDED.per_pupil_spend,
|
||||||
|
staff_cost_pct = EXCLUDED.staff_cost_pct,
|
||||||
|
teacher_cost_pct = EXCLUDED.teacher_cost_pct,
|
||||||
|
support_staff_cost_pct = EXCLUDED.support_staff_cost_pct,
|
||||||
|
premises_cost_pct = EXCLUDED.premises_cost_pct
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": int(row["urn"]),
|
||||||
|
"year": int(row["year"]),
|
||||||
|
"per_pupil": float(row["per_pupil_spend"]) if pd.notna(row.get("per_pupil_spend")) else None,
|
||||||
|
"staff": float(row["staff_cost_pct"]) if pd.notna(row.get("staff_cost_pct")) else None,
|
||||||
|
"teacher": float(row["teacher_cost_pct"]) if pd.notna(row.get("teacher_cost_pct")) else None,
|
||||||
|
"support": float(row["support_staff_cost_pct"]) if pd.notna(row.get("support_staff_cost_pct")) else None,
|
||||||
|
"premises": float(row["premises_cost_pct"]) if pd.notna(row.get("premises_cost_pct")) else None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 2000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" Finance: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
159
integrator/scripts/sources/gias.py
Normal file
159
integrator/scripts/sources/gias.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""
|
||||||
|
GIAS (Get Information About Schools) bulk CSV downloader and loader.
|
||||||
|
|
||||||
|
Source: https://get-information-schools.service.gov.uk/Downloads
|
||||||
|
Update: Daily; we refresh weekly.
|
||||||
|
Adds: website, headteacher_name, capacity, trust_name, trust_uid, gender, nursery_provision
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "gias"
|
||||||
|
|
||||||
|
# GIAS bulk download URL — date is injected at runtime
|
||||||
|
GIAS_URL_TEMPLATE = "https://ea-edubase-api-prod.azurewebsites.net/edubase/downloads/public/edubasealldata{date}.csv"
|
||||||
|
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"SchoolWebsite": "website",
|
||||||
|
"SchoolCapacity": "capacity",
|
||||||
|
"TrustName": "trust_name",
|
||||||
|
"TrustUID": "trust_uid",
|
||||||
|
"Gender (name)": "gender",
|
||||||
|
"NurseryProvision (name)": "nursery_provision_raw",
|
||||||
|
"HeadTitle": "head_title",
|
||||||
|
"HeadFirstName": "head_first",
|
||||||
|
"HeadLastName": "head_last",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "gias") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
today = date.today().strftime("%Y%m%d")
|
||||||
|
url = GIAS_URL_TEMPLATE.format(date=today)
|
||||||
|
filename = f"gias_{today}.csv"
|
||||||
|
dest_file = dest / filename
|
||||||
|
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" GIAS: {filename} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
print(f" GIAS: downloading {url} ...")
|
||||||
|
resp = requests.get(url, timeout=300, stream=True)
|
||||||
|
|
||||||
|
# GIAS may not have today's file yet — fall back to yesterday
|
||||||
|
if resp.status_code == 404:
|
||||||
|
from datetime import timedelta
|
||||||
|
yesterday = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
|
||||||
|
url = GIAS_URL_TEMPLATE.format(date=yesterday)
|
||||||
|
filename = f"gias_{yesterday}.csv"
|
||||||
|
dest_file = dest / filename
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" GIAS: {filename} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
resp = requests.get(url, timeout=300, stream=True)
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
with open(dest_file, "wb") as f:
|
||||||
|
for chunk in resp.iter_content(chunk_size=65536):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
print(f" GIAS: saved {dest_file} ({dest_file.stat().st_size // 1024} KB)")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "gias") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("gias_*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No GIAS CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" GIAS: loading {path} ...")
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
# Build headteacher_name from parts
|
||||||
|
def build_name(row):
|
||||||
|
parts = [
|
||||||
|
str(row.get("head_title", "") or "").strip(),
|
||||||
|
str(row.get("head_first", "") or "").strip(),
|
||||||
|
str(row.get("head_last", "") or "").strip(),
|
||||||
|
]
|
||||||
|
return " ".join(p for p in parts if p) or None
|
||||||
|
|
||||||
|
df["headteacher_name"] = df.apply(build_name, axis=1)
|
||||||
|
df["nursery_provision"] = df.get("nursery_provision_raw", pd.Series()).apply(
|
||||||
|
lambda v: True if str(v).strip().lower().startswith("has") else False if pd.notna(v) else None
|
||||||
|
)
|
||||||
|
|
||||||
|
def clean_str(val):
|
||||||
|
s = str(val).strip() if pd.notna(val) else None
|
||||||
|
return s if s and s.lower() not in ("nan", "none", "") else None
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
UPDATE schools SET
|
||||||
|
website = :website,
|
||||||
|
headteacher_name = :headteacher_name,
|
||||||
|
capacity = :capacity,
|
||||||
|
trust_name = :trust_name,
|
||||||
|
trust_uid = :trust_uid,
|
||||||
|
gender = :gender,
|
||||||
|
nursery_provision = :nursery_provision
|
||||||
|
WHERE urn = :urn
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": urn,
|
||||||
|
"website": clean_str(row.get("website")),
|
||||||
|
"headteacher_name": row.get("headteacher_name"),
|
||||||
|
"capacity": int(row["capacity"]) if pd.notna(row.get("capacity")) and str(row.get("capacity")).strip().isdigit() else None,
|
||||||
|
"trust_name": clean_str(row.get("trust_name")),
|
||||||
|
"trust_uid": clean_str(row.get("trust_uid")),
|
||||||
|
"gender": clean_str(row.get("gender")),
|
||||||
|
"nursery_provision": row.get("nursery_provision"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
updated += 1
|
||||||
|
if updated % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
print(f" Updated {updated} schools...")
|
||||||
|
|
||||||
|
print(f" GIAS: updated {updated} school records")
|
||||||
|
return {"inserted": 0, "updated": updated, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
path = download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
176
integrator/scripts/sources/idaci.py
Normal file
176
integrator/scripts/sources/idaci.py
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
"""
|
||||||
|
IDACI (Income Deprivation Affecting Children Index) loader.
|
||||||
|
|
||||||
|
Source: English Indices of Deprivation 2019
|
||||||
|
https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019
|
||||||
|
|
||||||
|
This is a one-time download (5-yearly release). We join school postcodes to LSOAs
|
||||||
|
via postcodes.io, then look up IDACI scores from the IoD2019 file.
|
||||||
|
|
||||||
|
Update: ~5-yearly (next release expected 2025/26)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "idaci"
|
||||||
|
|
||||||
|
# IoD 2019 supplementary data — "Income Deprivation Affecting Children Index (IDACI)"
|
||||||
|
IOD_2019_URL = (
|
||||||
|
"https://assets.publishing.service.gov.uk/government/uploads/system/uploads/"
|
||||||
|
"attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx"
|
||||||
|
)
|
||||||
|
|
||||||
|
POSTCODES_IO_BATCH = "https://api.postcodes.io/postcodes"
|
||||||
|
BATCH_SIZE = 100
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
filename = "iod2019_idaci.xlsx"
|
||||||
|
dest_file = dest / filename
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" IDACI: {filename} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
print(f" IDACI: downloading IoD2019 file ...")
|
||||||
|
resp = requests.get(IOD_2019_URL, timeout=300, stream=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
with open(dest_file, "wb") as f:
|
||||||
|
for chunk in resp.iter_content(chunk_size=65536):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
print(f" IDACI: saved {dest_file}")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
|
||||||
|
def _postcode_to_lsoa(postcodes: list[str]) -> dict[str, str]:
|
||||||
|
"""Batch-resolve postcodes to LSOA codes via postcodes.io."""
|
||||||
|
result = {}
|
||||||
|
valid = [p.strip().upper() for p in postcodes if p and len(str(p).strip()) >= 5]
|
||||||
|
valid = list(set(valid))
|
||||||
|
|
||||||
|
for i in range(0, len(valid), BATCH_SIZE):
|
||||||
|
batch = valid[i:i + BATCH_SIZE]
|
||||||
|
try:
|
||||||
|
resp = requests.post(POSTCODES_IO_BATCH, json={"postcodes": batch}, timeout=30)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
for item in resp.json().get("result", []):
|
||||||
|
if item and item.get("result"):
|
||||||
|
lsoa = item["result"].get("lsoa")
|
||||||
|
if lsoa:
|
||||||
|
result[item["query"].upper()] = lsoa
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: postcodes.io batch failed: {e}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
|
||||||
|
if path is None:
|
||||||
|
files = sorted(dest.glob("*.xlsx"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No IDACI file found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" IDACI: loading IoD2019 from {path} ...")
|
||||||
|
|
||||||
|
# IoD2019 File 1 — sheet "IoD2019 IDACI" or similar
|
||||||
|
try:
|
||||||
|
iod_df = pd.read_excel(path, sheet_name=None)
|
||||||
|
# Find sheet with IDACI data
|
||||||
|
idaci_sheet = None
|
||||||
|
for name, df in iod_df.items():
|
||||||
|
if "IDACI" in name.upper() or "IDACI" in str(df.columns.tolist()).upper():
|
||||||
|
idaci_sheet = name
|
||||||
|
break
|
||||||
|
if idaci_sheet is None:
|
||||||
|
idaci_sheet = list(iod_df.keys())[0]
|
||||||
|
df_iod = iod_df[idaci_sheet]
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Could not read IoD2019 file: {e}")
|
||||||
|
|
||||||
|
# Normalise column names — IoD2019 uses specific headers
|
||||||
|
col_lsoa = next((c for c in df_iod.columns if "LSOA" in str(c).upper() and "code" in str(c).lower()), None)
|
||||||
|
col_score = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "score" in str(c).lower()), None)
|
||||||
|
col_rank = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "rank" in str(c).lower()), None)
|
||||||
|
|
||||||
|
if not col_lsoa or not col_score:
|
||||||
|
print(f" IDACI columns available: {list(df_iod.columns)[:20]}")
|
||||||
|
raise ValueError("Could not find LSOA code or IDACI score columns")
|
||||||
|
|
||||||
|
df_iod = df_iod[[col_lsoa, col_score]].copy()
|
||||||
|
df_iod.columns = ["lsoa_code", "idaci_score"]
|
||||||
|
df_iod = df_iod.dropna()
|
||||||
|
|
||||||
|
# Compute decile from rank (or from score distribution)
|
||||||
|
total = len(df_iod)
|
||||||
|
df_iod = df_iod.sort_values("idaci_score", ascending=False)
|
||||||
|
df_iod["idaci_decile"] = (pd.qcut(df_iod["idaci_score"], 10, labels=False) + 1).astype(int)
|
||||||
|
# Decile 1 = most deprived (highest IDACI score)
|
||||||
|
df_iod["idaci_decile"] = 11 - df_iod["idaci_decile"]
|
||||||
|
|
||||||
|
lsoa_lookup = df_iod.set_index("lsoa_code")[["idaci_score", "idaci_decile"]].to_dict("index")
|
||||||
|
print(f" IDACI: loaded {len(lsoa_lookup)} LSOA records")
|
||||||
|
|
||||||
|
# Fetch all school postcodes from the database
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
rows = session.execute(text("SELECT urn, postcode FROM schools WHERE postcode IS NOT NULL")).fetchall()
|
||||||
|
|
||||||
|
postcodes = [r[1] for r in rows]
|
||||||
|
print(f" IDACI: resolving {len(postcodes)} postcodes via postcodes.io ...")
|
||||||
|
pc_to_lsoa = _postcode_to_lsoa(postcodes)
|
||||||
|
print(f" IDACI: resolved {len(pc_to_lsoa)} postcodes to LSOAs")
|
||||||
|
|
||||||
|
inserted = skipped = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for urn, postcode in rows:
|
||||||
|
lsoa = pc_to_lsoa.get(str(postcode).strip().upper())
|
||||||
|
if not lsoa:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
iod = lsoa_lookup.get(lsoa)
|
||||||
|
if not iod:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO school_deprivation (urn, lsoa_code, idaci_score, idaci_decile)
|
||||||
|
VALUES (:urn, :lsoa, :score, :decile)
|
||||||
|
ON CONFLICT (urn) DO UPDATE SET
|
||||||
|
lsoa_code = EXCLUDED.lsoa_code,
|
||||||
|
idaci_score = EXCLUDED.idaci_score,
|
||||||
|
idaci_decile = EXCLUDED.idaci_decile
|
||||||
|
"""),
|
||||||
|
{"urn": urn, "lsoa": lsoa, "score": float(iod["idaci_score"]), "decile": int(iod["idaci_decile"])},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 2000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" IDACI: upserted {inserted}, skipped {skipped}")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": skipped}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
226
integrator/scripts/sources/ofsted.py
Normal file
226
integrator/scripts/sources/ofsted.py
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
"""
|
||||||
|
Ofsted Monthly Management Information CSV downloader and loader.
|
||||||
|
|
||||||
|
Source: https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes
|
||||||
|
Update: Monthly (released ~2 weeks into each month)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
|
||||||
|
# Current Ofsted MI download URL — update this when Ofsted releases a new file.
|
||||||
|
# The URL follows a predictable pattern; we attempt to discover it from the GOV.UK page.
|
||||||
|
GOV_UK_PAGE = "https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes"
|
||||||
|
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"Inspection date": "inspection_date",
|
||||||
|
"Publication date": "publication_date",
|
||||||
|
"Inspection type": "inspection_type",
|
||||||
|
"Overall effectiveness": "overall_effectiveness",
|
||||||
|
"Quality of education": "quality_of_education",
|
||||||
|
"Behaviour and attitudes": "behaviour_attitudes",
|
||||||
|
"Personal development": "personal_development",
|
||||||
|
"Leadership and management": "leadership_management",
|
||||||
|
"Early years provision": "early_years_provision",
|
||||||
|
# Some CSVs use shortened names
|
||||||
|
"Urn": "urn",
|
||||||
|
"InspectionDate": "inspection_date",
|
||||||
|
"PublicationDate": "publication_date",
|
||||||
|
"InspectionType": "inspection_type",
|
||||||
|
"OverallEffectiveness": "overall_effectiveness",
|
||||||
|
"QualityOfEducation": "quality_of_education",
|
||||||
|
"BehaviourAndAttitudes": "behaviour_attitudes",
|
||||||
|
"PersonalDevelopment": "personal_development",
|
||||||
|
"LeadershipAndManagement": "leadership_management",
|
||||||
|
"EarlyYearsProvision": "early_years_provision",
|
||||||
|
}
|
||||||
|
|
||||||
|
GRADE_MAP = {
|
||||||
|
"Outstanding": 1, "1": 1, 1: 1,
|
||||||
|
"Good": 2, "2": 2, 2: 2,
|
||||||
|
"Requires improvement": 3, "3": 3, 3: 3,
|
||||||
|
"Requires Improvement": 3,
|
||||||
|
"Inadequate": 4, "4": 4, 4: 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "ofsted"
|
||||||
|
|
||||||
|
|
||||||
|
def _discover_csv_url() -> str | None:
|
||||||
|
"""Scrape the GOV.UK page for the most recent CSV/ZIP link."""
|
||||||
|
try:
|
||||||
|
resp = requests.get(GOV_UK_PAGE, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
# Look for links to assets.publishing.service.gov.uk CSV or ZIP files
|
||||||
|
pattern = r'href="(https://assets\.publishing\.service\.gov\.uk[^"]+\.(?:csv|zip))"'
|
||||||
|
urls = re.findall(pattern, resp.text, re.IGNORECASE)
|
||||||
|
if urls:
|
||||||
|
return urls[0]
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: could not scrape GOV.UK page: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "ofsted") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
url = _discover_csv_url()
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Could not discover Ofsted MI download URL. "
|
||||||
|
"Visit https://www.gov.uk/government/statistical-data-sets/"
|
||||||
|
"monthly-management-information-ofsteds-school-inspections-outcomes "
|
||||||
|
"to get the latest URL and update MANUAL_URL in ofsted.py"
|
||||||
|
)
|
||||||
|
|
||||||
|
filename = url.split("/")[-1]
|
||||||
|
dest_file = dest / filename
|
||||||
|
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" Ofsted: {filename} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
print(f" Ofsted: downloading {url} ...")
|
||||||
|
resp = requests.get(url, timeout=120, stream=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
with open(dest_file, "wb") as f:
|
||||||
|
for chunk in resp.iter_content(chunk_size=65536):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
print(f" Ofsted: saved {dest_file} ({dest_file.stat().st_size // 1024} KB)")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_grade(val) -> int | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
key = str(val).strip()
|
||||||
|
return GRADE_MAP.get(key)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_date(val) -> date | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
for fmt in ("%d/%m/%Y", "%Y-%m-%d", "%d-%m-%Y", "%d %B %Y"):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(str(val).strip(), fmt).date()
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "ofsted") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.csv")) + sorted(dest.glob("*.zip"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No Ofsted MI file found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" Ofsted: loading {path} ...")
|
||||||
|
|
||||||
|
if str(path).endswith(".zip"):
|
||||||
|
import zipfile, io
|
||||||
|
with zipfile.ZipFile(path) as z:
|
||||||
|
csv_names = [n for n in z.namelist() if n.endswith(".csv")]
|
||||||
|
if not csv_names:
|
||||||
|
raise ValueError("No CSV found inside Ofsted ZIP")
|
||||||
|
with z.open(csv_names[0]) as f:
|
||||||
|
df = pd.read_csv(io.TextIOWrapper(f, encoding="latin-1"), low_memory=False)
|
||||||
|
else:
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
|
||||||
|
# Normalise column names
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
# Only keep rows with a valid URN
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
inserted = updated = skipped = 0
|
||||||
|
|
||||||
|
with get_session() as session:
|
||||||
|
# Keep only the most recent inspection per URN
|
||||||
|
if "inspection_date" in df.columns:
|
||||||
|
df["_date_parsed"] = df["inspection_date"].apply(_parse_date)
|
||||||
|
df = df.sort_values("_date_parsed", ascending=False).groupby("urn").first().reset_index()
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
|
||||||
|
record = {
|
||||||
|
"urn": urn,
|
||||||
|
"inspection_date": _parse_date(row.get("inspection_date")),
|
||||||
|
"publication_date": _parse_date(row.get("publication_date")),
|
||||||
|
"inspection_type": str(row.get("inspection_type", "")).strip() or None,
|
||||||
|
"overall_effectiveness": _parse_grade(row.get("overall_effectiveness")),
|
||||||
|
"quality_of_education": _parse_grade(row.get("quality_of_education")),
|
||||||
|
"behaviour_attitudes": _parse_grade(row.get("behaviour_attitudes")),
|
||||||
|
"personal_development": _parse_grade(row.get("personal_development")),
|
||||||
|
"leadership_management": _parse_grade(row.get("leadership_management")),
|
||||||
|
"early_years_provision": _parse_grade(row.get("early_years_provision")),
|
||||||
|
"previous_overall": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO ofsted_inspections
|
||||||
|
(urn, inspection_date, publication_date, inspection_type,
|
||||||
|
overall_effectiveness, quality_of_education, behaviour_attitudes,
|
||||||
|
personal_development, leadership_management, early_years_provision,
|
||||||
|
previous_overall)
|
||||||
|
VALUES
|
||||||
|
(:urn, :inspection_date, :publication_date, :inspection_type,
|
||||||
|
:overall_effectiveness, :quality_of_education, :behaviour_attitudes,
|
||||||
|
:personal_development, :leadership_management, :early_years_provision,
|
||||||
|
:previous_overall)
|
||||||
|
ON CONFLICT (urn) DO UPDATE SET
|
||||||
|
previous_overall = ofsted_inspections.overall_effectiveness,
|
||||||
|
inspection_date = EXCLUDED.inspection_date,
|
||||||
|
publication_date = EXCLUDED.publication_date,
|
||||||
|
inspection_type = EXCLUDED.inspection_type,
|
||||||
|
overall_effectiveness = EXCLUDED.overall_effectiveness,
|
||||||
|
quality_of_education = EXCLUDED.quality_of_education,
|
||||||
|
behaviour_attitudes = EXCLUDED.behaviour_attitudes,
|
||||||
|
personal_development = EXCLUDED.personal_development,
|
||||||
|
leadership_management = EXCLUDED.leadership_management,
|
||||||
|
early_years_provision = EXCLUDED.early_years_provision
|
||||||
|
"""),
|
||||||
|
record,
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
|
||||||
|
if inserted % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
print(f" Processed {inserted} records...")
|
||||||
|
|
||||||
|
print(f" Ofsted: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": updated, "skipped": skipped}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
path = download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
229
integrator/scripts/sources/parent_view.py
Normal file
229
integrator/scripts/sources/parent_view.py
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
"""
|
||||||
|
Ofsted Parent View open data downloader and loader.
|
||||||
|
|
||||||
|
Source: https://parentview.ofsted.gov.uk/open-data
|
||||||
|
Update: ~3 times/year (Spring, Autumn, Summer)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "parent_view"
|
||||||
|
OPEN_DATA_PAGE = "https://parentview.ofsted.gov.uk/open-data"
|
||||||
|
|
||||||
|
# Question column mapping — Parent View open data uses descriptive column headers
|
||||||
|
# Map any variant to our internal field names
|
||||||
|
QUESTION_MAP = {
|
||||||
|
# Q1 — happiness
|
||||||
|
"My child is happy at this school": "q_happy_pct",
|
||||||
|
"Happy": "q_happy_pct",
|
||||||
|
# Q2 — safety
|
||||||
|
"My child feels safe at this school": "q_safe_pct",
|
||||||
|
"Safe": "q_safe_pct",
|
||||||
|
# Q3 — bullying
|
||||||
|
"The school makes sure its pupils are well behaved": "q_behaviour_pct",
|
||||||
|
"Well Behaved": "q_behaviour_pct",
|
||||||
|
# Q4 — bullying dealt with (sometimes separate)
|
||||||
|
"My child has been bullied and the school dealt with the bullying quickly and effectively": "q_bullying_pct",
|
||||||
|
"Bullying": "q_bullying_pct",
|
||||||
|
# Q5 — curriculum info
|
||||||
|
"The school makes me aware of what my child will learn during the year": "q_communication_pct",
|
||||||
|
"Aware of learning": "q_communication_pct",
|
||||||
|
# Q6 — concerns dealt with
|
||||||
|
"When I have raised concerns with the school, they have been dealt with properly": "q_communication_pct",
|
||||||
|
# Q7 — child does well
|
||||||
|
"My child does well at this school": "q_progress_pct",
|
||||||
|
"Does well": "q_progress_pct",
|
||||||
|
# Q8 — teaching
|
||||||
|
"The teaching is good at this school": "q_teaching_pct",
|
||||||
|
"Good teaching": "q_teaching_pct",
|
||||||
|
# Q9 — progress info
|
||||||
|
"I receive valuable information from the school about my child's progress": "q_information_pct",
|
||||||
|
"Progress information": "q_information_pct",
|
||||||
|
# Q10 — curriculum breadth
|
||||||
|
"My child is taught a broad range of subjects": "q_curriculum_pct",
|
||||||
|
"Broad subjects": "q_curriculum_pct",
|
||||||
|
# Q11 — prepares for future
|
||||||
|
"The school prepares my child well for the future": "q_future_pct",
|
||||||
|
"Prepared for future": "q_future_pct",
|
||||||
|
# Q12 — leadership
|
||||||
|
"The school is led and managed effectively": "q_leadership_pct",
|
||||||
|
"Led well": "q_leadership_pct",
|
||||||
|
# Q13 — wellbeing
|
||||||
|
"The school supports my child's wider personal development": "q_wellbeing_pct",
|
||||||
|
"Personal development": "q_wellbeing_pct",
|
||||||
|
# Q14 — recommendation
|
||||||
|
"I would recommend this school to another parent": "q_recommend_pct",
|
||||||
|
"Recommend": "q_recommend_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "parent_view") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Scrape the open data page for the download link
|
||||||
|
try:
|
||||||
|
resp = requests.get(OPEN_DATA_PAGE, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
pattern = r'href="([^"]+\.(?:xlsx|csv|zip))"'
|
||||||
|
urls = re.findall(pattern, resp.text, re.IGNORECASE)
|
||||||
|
if not urls:
|
||||||
|
raise RuntimeError("No download link found on Parent View open data page")
|
||||||
|
url = urls[0] if urls[0].startswith("http") else "https://parentview.ofsted.gov.uk" + urls[0]
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Could not discover Parent View download URL: {e}")
|
||||||
|
|
||||||
|
filename = url.split("/")[-1].split("?")[0]
|
||||||
|
dest_file = dest / filename
|
||||||
|
|
||||||
|
if dest_file.exists():
|
||||||
|
print(f" ParentView: {filename} already exists, skipping download.")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
print(f" ParentView: downloading {url} ...")
|
||||||
|
resp = requests.get(url, timeout=120, stream=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
with open(dest_file, "wb") as f:
|
||||||
|
for chunk in resp.iter_content(chunk_size=65536):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
print(f" ParentView: saved {dest_file}")
|
||||||
|
return dest_file
|
||||||
|
|
||||||
|
|
||||||
|
def _positive_pct(row: pd.Series, q_col_base: str) -> float | None:
|
||||||
|
"""Sum 'Strongly agree' + 'Agree' percentages for a question."""
|
||||||
|
# Parent View open data has columns like "Q1 - Strongly agree %", "Q1 - Agree %"
|
||||||
|
strongly = row.get(f"{q_col_base} - Strongly agree %") or row.get(f"{q_col_base} - Strongly Agree %")
|
||||||
|
agree = row.get(f"{q_col_base} - Agree %")
|
||||||
|
try:
|
||||||
|
total = 0.0
|
||||||
|
if pd.notna(strongly):
|
||||||
|
total += float(strongly)
|
||||||
|
if pd.notna(agree):
|
||||||
|
total += float(agree)
|
||||||
|
return round(total, 1) if total > 0 else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "parent_view") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.xlsx")) + sorted(dest.glob("*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No Parent View file found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" ParentView: loading {path} ...")
|
||||||
|
|
||||||
|
if str(path).endswith(".xlsx"):
|
||||||
|
df = pd.read_excel(path)
|
||||||
|
else:
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
|
||||||
|
# Normalise URN column
|
||||||
|
urn_col = next((c for c in df.columns if c.strip().upper() == "URN"), None)
|
||||||
|
if not urn_col:
|
||||||
|
raise ValueError(f"URN column not found. Columns: {list(df.columns)[:20]}")
|
||||||
|
df.rename(columns={urn_col: "urn"}, inplace=True)
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
# Try to find total responses column
|
||||||
|
resp_col = next((c for c in df.columns if "total" in c.lower() and "respon" in c.lower()), None)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
today = date.today()
|
||||||
|
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
total = int(row[resp_col]) if resp_col and pd.notna(row.get(resp_col)) else None
|
||||||
|
|
||||||
|
# Try to extract % positive per question from wide-format columns
|
||||||
|
# Parent View has numbered questions Q1–Q12 (or Q1–Q14 depending on year)
|
||||||
|
record = {
|
||||||
|
"urn": urn,
|
||||||
|
"survey_date": today,
|
||||||
|
"total_responses": total,
|
||||||
|
"q_happy_pct": _positive_pct(row, "Q1"),
|
||||||
|
"q_safe_pct": _positive_pct(row, "Q2"),
|
||||||
|
"q_behaviour_pct": _positive_pct(row, "Q3"),
|
||||||
|
"q_bullying_pct": _positive_pct(row, "Q4"),
|
||||||
|
"q_communication_pct": _positive_pct(row, "Q5"),
|
||||||
|
"q_progress_pct": _positive_pct(row, "Q7"),
|
||||||
|
"q_teaching_pct": _positive_pct(row, "Q8"),
|
||||||
|
"q_information_pct": _positive_pct(row, "Q9"),
|
||||||
|
"q_curriculum_pct": _positive_pct(row, "Q10"),
|
||||||
|
"q_future_pct": _positive_pct(row, "Q11"),
|
||||||
|
"q_leadership_pct": _positive_pct(row, "Q12"),
|
||||||
|
"q_wellbeing_pct": _positive_pct(row, "Q13"),
|
||||||
|
"q_recommend_pct": _positive_pct(row, "Q14"),
|
||||||
|
"q_sen_pct": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO ofsted_parent_view
|
||||||
|
(urn, survey_date, total_responses,
|
||||||
|
q_happy_pct, q_safe_pct, q_behaviour_pct, q_bullying_pct,
|
||||||
|
q_communication_pct, q_progress_pct, q_teaching_pct,
|
||||||
|
q_information_pct, q_curriculum_pct, q_future_pct,
|
||||||
|
q_leadership_pct, q_wellbeing_pct, q_recommend_pct, q_sen_pct)
|
||||||
|
VALUES
|
||||||
|
(:urn, :survey_date, :total_responses,
|
||||||
|
:q_happy_pct, :q_safe_pct, :q_behaviour_pct, :q_bullying_pct,
|
||||||
|
:q_communication_pct, :q_progress_pct, :q_teaching_pct,
|
||||||
|
:q_information_pct, :q_curriculum_pct, :q_future_pct,
|
||||||
|
:q_leadership_pct, :q_wellbeing_pct, :q_recommend_pct, :q_sen_pct)
|
||||||
|
ON CONFLICT (urn) DO UPDATE SET
|
||||||
|
survey_date = EXCLUDED.survey_date,
|
||||||
|
total_responses = EXCLUDED.total_responses,
|
||||||
|
q_happy_pct = EXCLUDED.q_happy_pct,
|
||||||
|
q_safe_pct = EXCLUDED.q_safe_pct,
|
||||||
|
q_behaviour_pct = EXCLUDED.q_behaviour_pct,
|
||||||
|
q_bullying_pct = EXCLUDED.q_bullying_pct,
|
||||||
|
q_communication_pct = EXCLUDED.q_communication_pct,
|
||||||
|
q_progress_pct = EXCLUDED.q_progress_pct,
|
||||||
|
q_teaching_pct = EXCLUDED.q_teaching_pct,
|
||||||
|
q_information_pct = EXCLUDED.q_information_pct,
|
||||||
|
q_curriculum_pct = EXCLUDED.q_curriculum_pct,
|
||||||
|
q_future_pct = EXCLUDED.q_future_pct,
|
||||||
|
q_leadership_pct = EXCLUDED.q_leadership_pct,
|
||||||
|
q_wellbeing_pct = EXCLUDED.q_wellbeing_pct,
|
||||||
|
q_recommend_pct = EXCLUDED.q_recommend_pct,
|
||||||
|
q_sen_pct = EXCLUDED.q_sen_pct
|
||||||
|
"""),
|
||||||
|
record,
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 2000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" ParentView: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
132
integrator/scripts/sources/phonics.py
Normal file
132
integrator/scripts/sources/phonics.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
"""
|
||||||
|
Phonics Screening Check downloader and loader.
|
||||||
|
|
||||||
|
Source: EES publication "phonics-screening-check-and-key-stage-1-assessments-england"
|
||||||
|
Update: Annual (September/October)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
from sources.ees import get_latest_csv_url, download_csv
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "phonics"
|
||||||
|
PUBLICATION_SLUG = "phonics-screening-check-and-key-stage-1-assessments-england"
|
||||||
|
|
||||||
|
# Known column names in the phonics CSV (vary by year)
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"urn": "urn",
|
||||||
|
# Year 1 pass rate
|
||||||
|
"PPTA1": "year1_phonics_pct", # % meeting expected standard Y1
|
||||||
|
"PPTA1B": "year1_phonics_pct",
|
||||||
|
"PT_MET_PHON_Y1": "year1_phonics_pct",
|
||||||
|
"Y1_MET_EXPECTED_PCT": "year1_phonics_pct",
|
||||||
|
# Year 2 (re-takers)
|
||||||
|
"PPTA2": "year2_phonics_pct",
|
||||||
|
"PT_MET_PHON_Y2": "year2_phonics_pct",
|
||||||
|
"Y2_MET_EXPECTED_PCT": "year2_phonics_pct",
|
||||||
|
# Year label
|
||||||
|
"YEAR": "year",
|
||||||
|
"Year": "year",
|
||||||
|
}
|
||||||
|
|
||||||
|
NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", ""}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "phonics") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError(f"Could not find CSV URL for phonics publication")
|
||||||
|
|
||||||
|
filename = url.split("/")[-1].split("?")[0] or "phonics_latest.csv"
|
||||||
|
return download_csv(url, dest / filename)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pct(val) -> float | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
s = str(val).strip().upper().replace("%", "")
|
||||||
|
if s in NULL_VALUES:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "phonics") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No phonics CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" Phonics: loading {path} ...")
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
# Infer year from filename if not in data
|
||||||
|
year = None
|
||||||
|
import re
|
||||||
|
m = re.search(r"20(\d{2})", path.stem)
|
||||||
|
if m:
|
||||||
|
year = int("20" + m.group(1))
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
|
||||||
|
if not row_year:
|
||||||
|
continue
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO phonics (urn, year, year1_phonics_pct, year2_phonics_pct)
|
||||||
|
VALUES (:urn, :year, :y1, :y2)
|
||||||
|
ON CONFLICT (urn, year) DO UPDATE SET
|
||||||
|
year1_phonics_pct = EXCLUDED.year1_phonics_pct,
|
||||||
|
year2_phonics_pct = EXCLUDED.year2_phonics_pct
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": urn,
|
||||||
|
"year": row_year,
|
||||||
|
"y1": _parse_pct(row.get("year1_phonics_pct")),
|
||||||
|
"y2": _parse_pct(row.get("year2_phonics_pct")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" Phonics: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
150
integrator/scripts/sources/sen_detail.py
Normal file
150
integrator/scripts/sources/sen_detail.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
"""
|
||||||
|
SEN (Special Educational Needs) primary need type breakdown.
|
||||||
|
|
||||||
|
Source: EES publication "special-educational-needs-in-england"
|
||||||
|
Update: Annual (September)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
from config import SUPPLEMENTARY_DIR
|
||||||
|
from db import get_session
|
||||||
|
from sources.ees import get_latest_csv_url, download_csv
|
||||||
|
|
||||||
|
DEST_DIR = SUPPLEMENTARY_DIR / "sen_detail"
|
||||||
|
PUBLICATION_SLUG = "special-educational-needs-in-england"
|
||||||
|
|
||||||
|
NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
|
||||||
|
|
||||||
|
COLUMN_MAP = {
|
||||||
|
"URN": "urn",
|
||||||
|
"urn": "urn",
|
||||||
|
"YEAR": "year",
|
||||||
|
"Year": "year",
|
||||||
|
# Primary need types — DfE abbreviated codes
|
||||||
|
"PT_SPEECH": "primary_need_speech_pct", # SLCN
|
||||||
|
"PT_ASD": "primary_need_autism_pct", # ASD
|
||||||
|
"PT_MLD": "primary_need_mld_pct", # Moderate learning difficulty
|
||||||
|
"PT_SPLD": "primary_need_spld_pct", # Specific learning difficulty
|
||||||
|
"PT_SEMH": "primary_need_semh_pct", # Social, emotional, mental health
|
||||||
|
"PT_PHYSICAL": "primary_need_physical_pct", # Physical/sensory
|
||||||
|
"PT_OTHER": "primary_need_other_pct",
|
||||||
|
# Alternative naming
|
||||||
|
"SLCN_PCT": "primary_need_speech_pct",
|
||||||
|
"ASD_PCT": "primary_need_autism_pct",
|
||||||
|
"MLD_PCT": "primary_need_mld_pct",
|
||||||
|
"SPLD_PCT": "primary_need_spld_pct",
|
||||||
|
"SEMH_PCT": "primary_need_semh_pct",
|
||||||
|
"PHYSICAL_PCT": "primary_need_physical_pct",
|
||||||
|
"OTHER_PCT": "primary_need_other_pct",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download(data_dir: Path | None = None) -> Path:
|
||||||
|
dest = (data_dir / "supplementary" / "sen_detail") if data_dir else DEST_DIR
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
|
||||||
|
if not url:
|
||||||
|
url = get_latest_csv_url(PUBLICATION_SLUG)
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError("Could not find CSV URL for SEN publication")
|
||||||
|
|
||||||
|
filename = url.split("/")[-1].split("?")[0] or "sen_latest.csv"
|
||||||
|
return download_csv(url, dest / filename)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pct(val) -> float | None:
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
s = str(val).strip().upper().replace("%", "")
|
||||||
|
if s in NULL_VALUES:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
||||||
|
if path is None:
|
||||||
|
dest = (data_dir / "supplementary" / "sen_detail") if data_dir else DEST_DIR
|
||||||
|
files = sorted(dest.glob("*.csv"))
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError(f"No SEN CSV found in {dest}")
|
||||||
|
path = files[-1]
|
||||||
|
|
||||||
|
print(f" SEN Detail: loading {path} ...")
|
||||||
|
df = pd.read_csv(path, encoding="latin-1", low_memory=False)
|
||||||
|
df.rename(columns=COLUMN_MAP, inplace=True)
|
||||||
|
|
||||||
|
if "urn" not in df.columns:
|
||||||
|
raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
|
||||||
|
|
||||||
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
|
df = df.dropna(subset=["urn"])
|
||||||
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
|
year = None
|
||||||
|
m = re.search(r"20(\d{2})", path.stem)
|
||||||
|
if m:
|
||||||
|
year = int("20" + m.group(1))
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with get_session() as session:
|
||||||
|
from sqlalchemy import text
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
urn = int(row["urn"])
|
||||||
|
row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
|
||||||
|
if not row_year:
|
||||||
|
continue
|
||||||
|
|
||||||
|
session.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO sen_detail
|
||||||
|
(urn, year, primary_need_speech_pct, primary_need_autism_pct,
|
||||||
|
primary_need_mld_pct, primary_need_spld_pct, primary_need_semh_pct,
|
||||||
|
primary_need_physical_pct, primary_need_other_pct)
|
||||||
|
VALUES (:urn, :year, :speech, :autism, :mld, :spld, :semh, :physical, :other)
|
||||||
|
ON CONFLICT (urn, year) DO UPDATE SET
|
||||||
|
primary_need_speech_pct = EXCLUDED.primary_need_speech_pct,
|
||||||
|
primary_need_autism_pct = EXCLUDED.primary_need_autism_pct,
|
||||||
|
primary_need_mld_pct = EXCLUDED.primary_need_mld_pct,
|
||||||
|
primary_need_spld_pct = EXCLUDED.primary_need_spld_pct,
|
||||||
|
primary_need_semh_pct = EXCLUDED.primary_need_semh_pct,
|
||||||
|
primary_need_physical_pct = EXCLUDED.primary_need_physical_pct,
|
||||||
|
primary_need_other_pct = EXCLUDED.primary_need_other_pct
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"urn": urn, "year": row_year,
|
||||||
|
"speech": _parse_pct(row.get("primary_need_speech_pct")),
|
||||||
|
"autism": _parse_pct(row.get("primary_need_autism_pct")),
|
||||||
|
"mld": _parse_pct(row.get("primary_need_mld_pct")),
|
||||||
|
"spld": _parse_pct(row.get("primary_need_spld_pct")),
|
||||||
|
"semh": _parse_pct(row.get("primary_need_semh_pct")),
|
||||||
|
"physical": _parse_pct(row.get("primary_need_physical_pct")),
|
||||||
|
"other": _parse_pct(row.get("primary_need_other_pct")),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
if inserted % 5000 == 0:
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
print(f" SEN Detail: upserted {inserted} records")
|
||||||
|
return {"inserted": inserted, "updated": 0, "skipped": 0}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
||||||
|
parser.add_argument("--data-dir", type=Path, default=None)
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.action in ("download", "all"):
|
||||||
|
download(args.data_dir)
|
||||||
|
if args.action in ("load", "all"):
|
||||||
|
load(data_dir=args.data_dir)
|
||||||
70
integrator/server.py
Normal file
70
integrator/server.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Data integrator HTTP server.
|
||||||
|
Kestra calls this server via HTTP tasks to trigger download/load operations.
|
||||||
|
"""
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
sys.path.insert(0, "/app/scripts")
|
||||||
|
|
||||||
|
app = FastAPI(title="SchoolCompare Data Integrator", version="1.0.0")
|
||||||
|
|
||||||
|
SOURCES = {
|
||||||
|
"ofsted", "gias", "parent_view",
|
||||||
|
"census", "admissions", "sen_detail",
|
||||||
|
"phonics", "idaci", "finance",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/run/{source}")
|
||||||
|
def run_source(source: str, action: str = "all"):
|
||||||
|
"""
|
||||||
|
Trigger a data source download and/or load.
|
||||||
|
action: "download" | "load" | "all"
|
||||||
|
"""
|
||||||
|
if source not in SOURCES:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Unknown source '{source}'. Available: {sorted(SOURCES)}")
|
||||||
|
if action not in ("download", "load", "all"):
|
||||||
|
raise HTTPException(status_code=400, detail="action must be 'download', 'load', or 'all'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
mod = importlib.import_module(f"sources.{source}")
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
if action in ("download", "all"):
|
||||||
|
mod.download()
|
||||||
|
|
||||||
|
if action in ("load", "all"):
|
||||||
|
result = mod.load()
|
||||||
|
|
||||||
|
return {"source": source, "action": action, "result": result}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
raise HTTPException(status_code=500, detail={"error": str(e), "traceback": tb})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/run-all")
|
||||||
|
def run_all(action: str = "all"):
|
||||||
|
"""Trigger all sources in sequence."""
|
||||||
|
results = {}
|
||||||
|
for source in sorted(SOURCES):
|
||||||
|
try:
|
||||||
|
mod = importlib.import_module(f"sources.{source}")
|
||||||
|
if action in ("download", "all"):
|
||||||
|
mod.download()
|
||||||
|
if action in ("load", "all"):
|
||||||
|
results[source] = mod.load()
|
||||||
|
except Exception as e:
|
||||||
|
results[source] = {"error": str(e)}
|
||||||
|
return results
|
||||||
@@ -77,7 +77,7 @@ export default async function SchoolPage({ params }: SchoolPageProps) {
|
|||||||
notFound();
|
notFound();
|
||||||
}
|
}
|
||||||
|
|
||||||
const { school_info, yearly_data, absence_data } = data;
|
const { school_info, yearly_data, absence_data, ofsted, parent_view, census, admissions, sen_detail, phonics, deprivation, finance } = data;
|
||||||
|
|
||||||
// Generate JSON-LD structured data for SEO
|
// Generate JSON-LD structured data for SEO
|
||||||
const structuredData = {
|
const structuredData = {
|
||||||
@@ -116,6 +116,14 @@ export default async function SchoolPage({ params }: SchoolPageProps) {
|
|||||||
schoolInfo={school_info}
|
schoolInfo={school_info}
|
||||||
yearlyData={yearly_data}
|
yearlyData={yearly_data}
|
||||||
absenceData={absence_data}
|
absenceData={absence_data}
|
||||||
|
ofsted={ofsted ?? null}
|
||||||
|
parentView={parent_view ?? null}
|
||||||
|
census={census ?? null}
|
||||||
|
admissions={admissions ?? null}
|
||||||
|
senDetail={sen_detail ?? null}
|
||||||
|
phonics={phonics ?? null}
|
||||||
|
deprivation={deprivation ?? null}
|
||||||
|
finance={finance ?? null}
|
||||||
/>
|
/>
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -424,3 +424,120 @@
|
|||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Supplementary Data Sections ──────────────────────── */
|
||||||
|
.supplementarySection {
|
||||||
|
background: var(--bg-card, white);
|
||||||
|
border: 1px solid var(--border-color, #e5dfd5);
|
||||||
|
border-radius: 10px;
|
||||||
|
padding: 1.25rem 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.supplementarySubtitle {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-muted, #8a847a);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.subSectionTitle {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-secondary, #5c564d);
|
||||||
|
margin: 1.25rem 0 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ofsted */
|
||||||
|
.ofstedHeader {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ofstedGrade {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.3rem 0.75rem;
|
||||||
|
font-size: 1rem;
|
||||||
|
font-weight: 700;
|
||||||
|
border-radius: 6px;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ofstedGrade1 { background: rgba(45, 125, 125, 0.12); color: var(--accent-teal, #2d7d7d); }
|
||||||
|
.ofstedGrade2 { background: rgba(60, 140, 60, 0.12); color: #3c8c3c; }
|
||||||
|
.ofstedGrade3 { background: rgba(201, 162, 39, 0.15); color: #b8920e; }
|
||||||
|
.ofstedGrade4 { background: rgba(224, 114, 86, 0.15); color: var(--accent-coral, #e07256); }
|
||||||
|
|
||||||
|
.ofstedDate {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-muted, #8a847a);
|
||||||
|
}
|
||||||
|
|
||||||
|
.ofstedType {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: var(--text-muted, #8a847a);
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent View */
|
||||||
|
.parentViewGrid {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewRow {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewLabel {
|
||||||
|
flex: 0 0 18rem;
|
||||||
|
color: var(--text-secondary, #5c564d);
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewBar {
|
||||||
|
flex: 1;
|
||||||
|
height: 0.5rem;
|
||||||
|
background: var(--bg-secondary, #f3ede4);
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewFill {
|
||||||
|
height: 100%;
|
||||||
|
background: var(--accent-teal, #2d7d7d);
|
||||||
|
border-radius: 4px;
|
||||||
|
transition: width 0.4s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewPct {
|
||||||
|
flex: 0 0 2.75rem;
|
||||||
|
text-align: right;
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-primary, #1a1612);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Metric hint (small label below metricValue) */
|
||||||
|
.metricHint {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-muted, #8a847a);
|
||||||
|
margin-top: 0.25rem;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ── Mobile ──────────────────────────────────────────── */
|
||||||
|
@media (max-width: 640px) {
|
||||||
|
.supplementarySection {
|
||||||
|
padding: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parentViewLabel {
|
||||||
|
flex: 0 0 10rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,17 +9,37 @@ import { useRouter } from 'next/navigation';
|
|||||||
import { useComparison } from '@/hooks/useComparison';
|
import { useComparison } from '@/hooks/useComparison';
|
||||||
import { PerformanceChart } from './PerformanceChart';
|
import { PerformanceChart } from './PerformanceChart';
|
||||||
import { SchoolMap } from './SchoolMap';
|
import { SchoolMap } from './SchoolMap';
|
||||||
import type { School, SchoolResult, AbsenceData } from '@/lib/types';
|
import type {
|
||||||
|
School, SchoolResult, AbsenceData,
|
||||||
|
OfstedInspection, OfstedParentView, SchoolCensus,
|
||||||
|
SchoolAdmissions, SenDetail, Phonics,
|
||||||
|
SchoolDeprivation, SchoolFinance,
|
||||||
|
} from '@/lib/types';
|
||||||
import { formatPercentage, formatProgress, calculateTrend } from '@/lib/utils';
|
import { formatPercentage, formatProgress, calculateTrend } from '@/lib/utils';
|
||||||
import styles from './SchoolDetailView.module.css';
|
import styles from './SchoolDetailView.module.css';
|
||||||
|
|
||||||
|
const OFSTED_LABELS: Record<number, string> = {
|
||||||
|
1: 'Outstanding', 2: 'Good', 3: 'Requires Improvement', 4: 'Inadequate',
|
||||||
|
};
|
||||||
|
|
||||||
interface SchoolDetailViewProps {
|
interface SchoolDetailViewProps {
|
||||||
schoolInfo: School;
|
schoolInfo: School;
|
||||||
yearlyData: SchoolResult[];
|
yearlyData: SchoolResult[];
|
||||||
absenceData: AbsenceData | null;
|
absenceData: AbsenceData | null;
|
||||||
|
ofsted: OfstedInspection | null;
|
||||||
|
parentView: OfstedParentView | null;
|
||||||
|
census: SchoolCensus | null;
|
||||||
|
admissions: SchoolAdmissions | null;
|
||||||
|
senDetail: SenDetail | null;
|
||||||
|
phonics: Phonics | null;
|
||||||
|
deprivation: SchoolDeprivation | null;
|
||||||
|
finance: SchoolFinance | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function SchoolDetailView({ schoolInfo, yearlyData, absenceData }: SchoolDetailViewProps) {
|
export function SchoolDetailView({
|
||||||
|
schoolInfo, yearlyData, absenceData,
|
||||||
|
ofsted, parentView, census, admissions, senDetail, phonics, deprivation, finance,
|
||||||
|
}: SchoolDetailViewProps) {
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { addSchool, removeSchool, isSelected } = useComparison();
|
const { addSchool, removeSchool, isSelected } = useComparison();
|
||||||
const isInComparison = isSelected(schoolInfo.urn);
|
const isInComparison = isSelected(schoolInfo.urn);
|
||||||
@@ -322,6 +342,209 @@ export function SchoolDetailView({ schoolInfo, yearlyData, absenceData }: School
|
|||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Ofsted Section */}
|
||||||
|
{ofsted && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Ofsted Inspection</h2>
|
||||||
|
<div className={styles.ofstedHeader}>
|
||||||
|
<span className={`${styles.ofstedGrade} ${styles[`ofstedGrade${ofsted.overall_effectiveness}`]}`}>
|
||||||
|
{ofsted.overall_effectiveness ? OFSTED_LABELS[ofsted.overall_effectiveness] : 'Not rated'}
|
||||||
|
</span>
|
||||||
|
{ofsted.inspection_date && (
|
||||||
|
<span className={styles.ofstedDate}>
|
||||||
|
Inspected: {new Date(ofsted.inspection_date).toLocaleDateString('en-GB', { day: 'numeric', month: 'long', year: 'numeric' })}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
{[
|
||||||
|
{ label: 'Quality of Education', value: ofsted.quality_of_education },
|
||||||
|
{ label: 'Behaviour & Attitudes', value: ofsted.behaviour_attitudes },
|
||||||
|
{ label: 'Personal Development', value: ofsted.personal_development },
|
||||||
|
{ label: 'Leadership & Management', value: ofsted.leadership_management },
|
||||||
|
...(ofsted.early_years_provision != null ? [{ label: 'Early Years', value: ofsted.early_years_provision }] : []),
|
||||||
|
].map(({ label, value }) => value != null && (
|
||||||
|
<div key={label} className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>{label}</div>
|
||||||
|
<div className={`${styles.metricValue} ${styles[`ofstedGrade${value}`]}`}>
|
||||||
|
{OFSTED_LABELS[value]}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
{ofsted.inspection_type && (
|
||||||
|
<p className={styles.ofstedType}>{ofsted.inspection_type}</p>
|
||||||
|
)}
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* What Parents Think */}
|
||||||
|
{parentView && parentView.total_responses != null && parentView.total_responses > 0 && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>What Parents Think</h2>
|
||||||
|
<p className={styles.supplementarySubtitle}>
|
||||||
|
Based on {parentView.total_responses.toLocaleString()} parent responses to the Ofsted Parent View survey.
|
||||||
|
</p>
|
||||||
|
<div className={styles.parentViewGrid}>
|
||||||
|
{[
|
||||||
|
{ label: 'My child is happy here', pct: parentView.q_happy_pct },
|
||||||
|
{ label: 'My child feels safe here', pct: parentView.q_safe_pct },
|
||||||
|
{ label: 'Would recommend this school', pct: parentView.q_recommend_pct },
|
||||||
|
{ label: 'Teaching is good', pct: parentView.q_teaching_pct },
|
||||||
|
{ label: 'My child makes good progress', pct: parentView.q_progress_pct },
|
||||||
|
{ label: 'School looks after wellbeing', pct: parentView.q_wellbeing_pct },
|
||||||
|
{ label: 'Led and managed effectively', pct: parentView.q_leadership_pct },
|
||||||
|
{ label: 'Behaviour is well managed', pct: parentView.q_behaviour_pct },
|
||||||
|
{ label: 'Communicates well with parents', pct: parentView.q_communication_pct },
|
||||||
|
].filter(q => q.pct != null).map(({ label, pct }) => (
|
||||||
|
<div key={label} className={styles.parentViewRow}>
|
||||||
|
<span className={styles.parentViewLabel}>{label}</span>
|
||||||
|
<div className={styles.parentViewBar}>
|
||||||
|
<div className={styles.parentViewFill} style={{ width: `${pct}%` }} />
|
||||||
|
</div>
|
||||||
|
<span className={styles.parentViewPct}>{pct}%</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Admissions */}
|
||||||
|
{admissions && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Admissions ({admissions.year})</h2>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
{admissions.published_admission_number != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Places available</div>
|
||||||
|
<div className={styles.metricValue}>{admissions.published_admission_number}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{admissions.total_applications != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Applications received</div>
|
||||||
|
<div className={styles.metricValue}>{admissions.total_applications.toLocaleString()}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{admissions.first_preference_offers_pct != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Got first choice</div>
|
||||||
|
<div className={styles.metricValue}>{admissions.first_preference_offers_pct}%</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{admissions.oversubscribed != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Oversubscribed</div>
|
||||||
|
<div className={styles.metricValue}>{admissions.oversubscribed ? 'Yes' : 'No'}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Pupils & Inclusion (Census + SEN) */}
|
||||||
|
{(census || senDetail) && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Pupils & Inclusion</h2>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
{census?.class_size_avg != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Average class size</div>
|
||||||
|
<div className={styles.metricValue}>{census.class_size_avg.toFixed(1)}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{senDetail && (
|
||||||
|
<>
|
||||||
|
<h3 className={styles.subSectionTitle}>Primary SEN Needs (latest year)</h3>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
{[
|
||||||
|
{ label: 'Speech & Language', pct: senDetail.primary_need_speech_pct },
|
||||||
|
{ label: 'Autism (ASD)', pct: senDetail.primary_need_autism_pct },
|
||||||
|
{ label: 'Learning Difficulties', pct: senDetail.primary_need_mld_pct },
|
||||||
|
{ label: 'Specific Learning (Dyslexia etc.)', pct: senDetail.primary_need_spld_pct },
|
||||||
|
{ label: 'Social, Emotional & Mental Health', pct: senDetail.primary_need_semh_pct },
|
||||||
|
{ label: 'Physical / Sensory', pct: senDetail.primary_need_physical_pct },
|
||||||
|
].filter(n => n.pct != null).map(({ label, pct }) => (
|
||||||
|
<div key={label} className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>{label}</div>
|
||||||
|
<div className={styles.metricValue}>{pct}%</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Year 1 Phonics */}
|
||||||
|
{phonics && phonics.year1_phonics_pct != null && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Year 1 Phonics ({phonics.year})</h2>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Reached expected standard</div>
|
||||||
|
<div className={styles.metricValue}>{formatPercentage(phonics.year1_phonics_pct)}</div>
|
||||||
|
</div>
|
||||||
|
{phonics.year2_phonics_pct != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Year 2 (re-takers) standard</div>
|
||||||
|
<div className={styles.metricValue}>{formatPercentage(phonics.year2_phonics_pct)}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Deprivation Context */}
|
||||||
|
{deprivation && deprivation.idaci_decile != null && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Deprivation Context</h2>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Area deprivation decile</div>
|
||||||
|
<div className={styles.metricValue}>{deprivation.idaci_decile} / 10</div>
|
||||||
|
<div className={styles.metricHint}>
|
||||||
|
1 = most deprived, 10 = least deprived
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{deprivation.idaci_score != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>IDACI score</div>
|
||||||
|
<div className={styles.metricValue}>{deprivation.idaci_score.toFixed(3)}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Finances */}
|
||||||
|
{finance && finance.per_pupil_spend != null && (
|
||||||
|
<section className={styles.supplementarySection}>
|
||||||
|
<h2 className={styles.sectionTitle}>Finances ({finance.year})</h2>
|
||||||
|
<div className={styles.metricsGrid}>
|
||||||
|
{finance.per_pupil_spend != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Spend per pupil</div>
|
||||||
|
<div className={styles.metricValue}>£{Math.round(finance.per_pupil_spend).toLocaleString()}</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{finance.teacher_cost_pct != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>Teacher costs</div>
|
||||||
|
<div className={styles.metricValue}>{finance.teacher_cost_pct.toFixed(1)}% of budget</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{finance.staff_cost_pct != null && (
|
||||||
|
<div className={styles.metricCard}>
|
||||||
|
<div className={styles.metricLabel}>All staff costs</div>
|
||||||
|
<div className={styles.metricValue}>{finance.staff_cost_pct.toFixed(1)}% of budget</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,6 +211,23 @@
|
|||||||
color: var(--text-primary, #1a1612);
|
color: var(--text-primary, #1a1612);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Ofsted badge ────────────────────────────────────── */
|
||||||
|
.ofstedBadge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.0625rem 0.375rem;
|
||||||
|
font-size: 0.6875rem;
|
||||||
|
font-weight: 600;
|
||||||
|
border-radius: 3px;
|
||||||
|
white-space: nowrap;
|
||||||
|
flex-shrink: 0;
|
||||||
|
line-height: 1.4;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ofsted1 { background: rgba(45, 125, 125, 0.12); color: var(--accent-teal, #2d7d7d); }
|
||||||
|
.ofsted2 { background: rgba(60, 140, 60, 0.12); color: #3c8c3c; }
|
||||||
|
.ofsted3 { background: rgba(201, 162, 39, 0.15); color: #b8920e; }
|
||||||
|
.ofsted4 { background: rgba(224, 114, 86, 0.15); color: var(--accent-coral, #e07256); }
|
||||||
|
|
||||||
/* ── Mobile ──────────────────────────────────────────── */
|
/* ── Mobile ──────────────────────────────────────────── */
|
||||||
@media (max-width: 640px) {
|
@media (max-width: 640px) {
|
||||||
.row {
|
.row {
|
||||||
|
|||||||
@@ -12,6 +12,13 @@ import { formatPercentage, formatProgress, calculateTrend } from '@/lib/utils';
|
|||||||
import { progressBand } from '@/lib/metrics';
|
import { progressBand } from '@/lib/metrics';
|
||||||
import styles from './SchoolRow.module.css';
|
import styles from './SchoolRow.module.css';
|
||||||
|
|
||||||
|
const OFSTED_LABELS: Record<number, string> = {
|
||||||
|
1: 'Outstanding',
|
||||||
|
2: 'Good',
|
||||||
|
3: 'Req. Improvement',
|
||||||
|
4: 'Inadequate',
|
||||||
|
};
|
||||||
|
|
||||||
interface SchoolRowProps {
|
interface SchoolRowProps {
|
||||||
school: School;
|
school: School;
|
||||||
isLocationSearch?: boolean;
|
isLocationSearch?: boolean;
|
||||||
@@ -46,7 +53,7 @@ export function SchoolRow({
|
|||||||
{/* Left: three content lines */}
|
{/* Left: three content lines */}
|
||||||
<div className={styles.rowContent}>
|
<div className={styles.rowContent}>
|
||||||
|
|
||||||
{/* Line 1: School name + type */}
|
{/* Line 1: School name + type + Ofsted badge */}
|
||||||
<div className={styles.line1}>
|
<div className={styles.line1}>
|
||||||
<a href={`/school/${school.urn}`} className={styles.schoolName}>
|
<a href={`/school/${school.urn}`} className={styles.schoolName}>
|
||||||
{school.school_name}
|
{school.school_name}
|
||||||
@@ -54,6 +61,11 @@ export function SchoolRow({
|
|||||||
{school.school_type && (
|
{school.school_type && (
|
||||||
<span className={styles.schoolType}>{school.school_type}</span>
|
<span className={styles.schoolType}>{school.school_type}</span>
|
||||||
)}
|
)}
|
||||||
|
{school.ofsted_grade && (
|
||||||
|
<span className={`${styles.ofstedBadge} ${styles[`ofsted${school.ofsted_grade}`]}`}>
|
||||||
|
{OFSTED_LABELS[school.ofsted_grade]}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Line 2: Key stats */}
|
{/* Line 2: Key stats */}
|
||||||
|
|||||||
@@ -47,6 +47,102 @@ export interface School {
|
|||||||
|
|
||||||
// Location search fields
|
// Location search fields
|
||||||
distance?: number | null;
|
distance?: number | null;
|
||||||
|
|
||||||
|
// GIAS enrichment fields
|
||||||
|
website?: string | null;
|
||||||
|
headteacher_name?: string | null;
|
||||||
|
capacity?: number | null;
|
||||||
|
trust_name?: string | null;
|
||||||
|
gender?: string | null;
|
||||||
|
|
||||||
|
// Ofsted (for list view — summary only)
|
||||||
|
ofsted_grade?: 1 | 2 | 3 | 4 | null;
|
||||||
|
ofsted_date?: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Supplementary Data Types (populated by Kestra data integrator)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export interface OfstedInspection {
|
||||||
|
overall_effectiveness: 1 | 2 | 3 | 4 | null;
|
||||||
|
quality_of_education: number | null;
|
||||||
|
behaviour_attitudes: number | null;
|
||||||
|
personal_development: number | null;
|
||||||
|
leadership_management: number | null;
|
||||||
|
early_years_provision: number | null;
|
||||||
|
previous_overall: number | null;
|
||||||
|
inspection_date: string | null;
|
||||||
|
inspection_type: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface OfstedParentView {
|
||||||
|
survey_date: string | null;
|
||||||
|
total_responses: number | null;
|
||||||
|
q_happy_pct: number | null;
|
||||||
|
q_safe_pct: number | null;
|
||||||
|
q_behaviour_pct: number | null;
|
||||||
|
q_bullying_pct: number | null;
|
||||||
|
q_communication_pct: number | null;
|
||||||
|
q_progress_pct: number | null;
|
||||||
|
q_teaching_pct: number | null;
|
||||||
|
q_information_pct: number | null;
|
||||||
|
q_curriculum_pct: number | null;
|
||||||
|
q_future_pct: number | null;
|
||||||
|
q_leadership_pct: number | null;
|
||||||
|
q_wellbeing_pct: number | null;
|
||||||
|
q_recommend_pct: number | null;
|
||||||
|
q_sen_pct: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SchoolCensus {
|
||||||
|
year: number;
|
||||||
|
class_size_avg: number | null;
|
||||||
|
ethnicity_white_pct: number | null;
|
||||||
|
ethnicity_asian_pct: number | null;
|
||||||
|
ethnicity_black_pct: number | null;
|
||||||
|
ethnicity_mixed_pct: number | null;
|
||||||
|
ethnicity_other_pct: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SchoolAdmissions {
|
||||||
|
year: number;
|
||||||
|
published_admission_number: number | null;
|
||||||
|
total_applications: number | null;
|
||||||
|
first_preference_offers_pct: number | null;
|
||||||
|
oversubscribed: boolean | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SenDetail {
|
||||||
|
year: number;
|
||||||
|
primary_need_speech_pct: number | null;
|
||||||
|
primary_need_autism_pct: number | null;
|
||||||
|
primary_need_mld_pct: number | null;
|
||||||
|
primary_need_spld_pct: number | null;
|
||||||
|
primary_need_semh_pct: number | null;
|
||||||
|
primary_need_physical_pct: number | null;
|
||||||
|
primary_need_other_pct: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Phonics {
|
||||||
|
year: number;
|
||||||
|
year1_phonics_pct: number | null;
|
||||||
|
year2_phonics_pct: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SchoolDeprivation {
|
||||||
|
lsoa_code: string | null;
|
||||||
|
idaci_score: number | null;
|
||||||
|
idaci_decile: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SchoolFinance {
|
||||||
|
year: number;
|
||||||
|
per_pupil_spend: number | null;
|
||||||
|
staff_cost_pct: number | null;
|
||||||
|
teacher_cost_pct: number | null;
|
||||||
|
support_staff_cost_pct: number | null;
|
||||||
|
premises_cost_pct: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -152,6 +248,15 @@ export interface SchoolDetailsResponse {
|
|||||||
school_info: School;
|
school_info: School;
|
||||||
yearly_data: SchoolResult[];
|
yearly_data: SchoolResult[];
|
||||||
absence_data: AbsenceData | null;
|
absence_data: AbsenceData | null;
|
||||||
|
// Supplementary data (null until Kestra populates)
|
||||||
|
ofsted: OfstedInspection | null;
|
||||||
|
parent_view: OfstedParentView | null;
|
||||||
|
census: SchoolCensus | null;
|
||||||
|
admissions: SchoolAdmissions | null;
|
||||||
|
sen_detail: SenDetail | null;
|
||||||
|
phonics: Phonics | null;
|
||||||
|
deprivation: SchoolDeprivation | null;
|
||||||
|
finance: SchoolFinance | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ComparisonData {
|
export interface ComparisonData {
|
||||||
|
|||||||
Reference in New Issue
Block a user