Adds a full data integration pipeline for enriching school profiles with
supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT.
Backend:
- Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections,
ofsted_parent_view, school_census, admissions, sen_detail, phonics,
school_deprivation, school_finance) plus GIAS columns on schools
- Expose all supplementary data via GET /api/schools/{urn}
- Enrich school list responses with ofsted_grade + ofsted_date
Integrator (new service):
- FastAPI HTTP microservice; Kestra calls POST /run/{source}
- 9 source modules: ofsted, gias, parent_view, census, admissions,
sen_detail, phonics, idaci, finance
- 9 Kestra flow YAMLs with scheduled triggers and 3× retry
Frontend:
- SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate)
- SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View
survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation
Context, Finances
- types.ts: 8 new interfaces + extended School/SchoolDetailsResponse
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
177 lines
6.7 KiB
Python
177 lines
6.7 KiB
Python
"""
|
|
IDACI (Income Deprivation Affecting Children Index) loader.
|
|
|
|
Source: English Indices of Deprivation 2019
|
|
https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019
|
|
|
|
This is a one-time download (5-yearly release). We join school postcodes to LSOAs
|
|
via postcodes.io, then look up IDACI scores from the IoD2019 file.
|
|
|
|
Update: ~5-yearly (next release expected 2025/26)
|
|
"""
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
import requests
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
from config import SUPPLEMENTARY_DIR
|
|
from db import get_session
|
|
|
|
DEST_DIR = SUPPLEMENTARY_DIR / "idaci"
|
|
|
|
# IoD 2019 supplementary data — "Income Deprivation Affecting Children Index (IDACI)"
|
|
IOD_2019_URL = (
|
|
"https://assets.publishing.service.gov.uk/government/uploads/system/uploads/"
|
|
"attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx"
|
|
)
|
|
|
|
POSTCODES_IO_BATCH = "https://api.postcodes.io/postcodes"
|
|
BATCH_SIZE = 100
|
|
|
|
|
|
def download(data_dir: Path | None = None) -> Path:
|
|
dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
|
|
dest.mkdir(parents=True, exist_ok=True)
|
|
|
|
filename = "iod2019_idaci.xlsx"
|
|
dest_file = dest / filename
|
|
if dest_file.exists():
|
|
print(f" IDACI: {filename} already exists, skipping download.")
|
|
return dest_file
|
|
|
|
print(f" IDACI: downloading IoD2019 file ...")
|
|
resp = requests.get(IOD_2019_URL, timeout=300, stream=True)
|
|
resp.raise_for_status()
|
|
with open(dest_file, "wb") as f:
|
|
for chunk in resp.iter_content(chunk_size=65536):
|
|
f.write(chunk)
|
|
|
|
print(f" IDACI: saved {dest_file}")
|
|
return dest_file
|
|
|
|
|
|
def _postcode_to_lsoa(postcodes: list[str]) -> dict[str, str]:
|
|
"""Batch-resolve postcodes to LSOA codes via postcodes.io."""
|
|
result = {}
|
|
valid = [p.strip().upper() for p in postcodes if p and len(str(p).strip()) >= 5]
|
|
valid = list(set(valid))
|
|
|
|
for i in range(0, len(valid), BATCH_SIZE):
|
|
batch = valid[i:i + BATCH_SIZE]
|
|
try:
|
|
resp = requests.post(POSTCODES_IO_BATCH, json={"postcodes": batch}, timeout=30)
|
|
if resp.status_code == 200:
|
|
for item in resp.json().get("result", []):
|
|
if item and item.get("result"):
|
|
lsoa = item["result"].get("lsoa")
|
|
if lsoa:
|
|
result[item["query"].upper()] = lsoa
|
|
except Exception as e:
|
|
print(f" Warning: postcodes.io batch failed: {e}")
|
|
|
|
return result
|
|
|
|
|
|
def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
|
|
dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
|
|
if path is None:
|
|
files = sorted(dest.glob("*.xlsx"))
|
|
if not files:
|
|
raise FileNotFoundError(f"No IDACI file found in {dest}")
|
|
path = files[-1]
|
|
|
|
print(f" IDACI: loading IoD2019 from {path} ...")
|
|
|
|
# IoD2019 File 1 — sheet "IoD2019 IDACI" or similar
|
|
try:
|
|
iod_df = pd.read_excel(path, sheet_name=None)
|
|
# Find sheet with IDACI data
|
|
idaci_sheet = None
|
|
for name, df in iod_df.items():
|
|
if "IDACI" in name.upper() or "IDACI" in str(df.columns.tolist()).upper():
|
|
idaci_sheet = name
|
|
break
|
|
if idaci_sheet is None:
|
|
idaci_sheet = list(iod_df.keys())[0]
|
|
df_iod = iod_df[idaci_sheet]
|
|
except Exception as e:
|
|
raise RuntimeError(f"Could not read IoD2019 file: {e}")
|
|
|
|
# Normalise column names — IoD2019 uses specific headers
|
|
col_lsoa = next((c for c in df_iod.columns if "LSOA" in str(c).upper() and "code" in str(c).lower()), None)
|
|
col_score = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "score" in str(c).lower()), None)
|
|
col_rank = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "rank" in str(c).lower()), None)
|
|
|
|
if not col_lsoa or not col_score:
|
|
print(f" IDACI columns available: {list(df_iod.columns)[:20]}")
|
|
raise ValueError("Could not find LSOA code or IDACI score columns")
|
|
|
|
df_iod = df_iod[[col_lsoa, col_score]].copy()
|
|
df_iod.columns = ["lsoa_code", "idaci_score"]
|
|
df_iod = df_iod.dropna()
|
|
|
|
# Compute decile from rank (or from score distribution)
|
|
total = len(df_iod)
|
|
df_iod = df_iod.sort_values("idaci_score", ascending=False)
|
|
df_iod["idaci_decile"] = (pd.qcut(df_iod["idaci_score"], 10, labels=False) + 1).astype(int)
|
|
# Decile 1 = most deprived (highest IDACI score)
|
|
df_iod["idaci_decile"] = 11 - df_iod["idaci_decile"]
|
|
|
|
lsoa_lookup = df_iod.set_index("lsoa_code")[["idaci_score", "idaci_decile"]].to_dict("index")
|
|
print(f" IDACI: loaded {len(lsoa_lookup)} LSOA records")
|
|
|
|
# Fetch all school postcodes from the database
|
|
with get_session() as session:
|
|
from sqlalchemy import text
|
|
rows = session.execute(text("SELECT urn, postcode FROM schools WHERE postcode IS NOT NULL")).fetchall()
|
|
|
|
postcodes = [r[1] for r in rows]
|
|
print(f" IDACI: resolving {len(postcodes)} postcodes via postcodes.io ...")
|
|
pc_to_lsoa = _postcode_to_lsoa(postcodes)
|
|
print(f" IDACI: resolved {len(pc_to_lsoa)} postcodes to LSOAs")
|
|
|
|
inserted = skipped = 0
|
|
with get_session() as session:
|
|
from sqlalchemy import text
|
|
for urn, postcode in rows:
|
|
lsoa = pc_to_lsoa.get(str(postcode).strip().upper())
|
|
if not lsoa:
|
|
skipped += 1
|
|
continue
|
|
iod = lsoa_lookup.get(lsoa)
|
|
if not iod:
|
|
skipped += 1
|
|
continue
|
|
|
|
session.execute(
|
|
text("""
|
|
INSERT INTO school_deprivation (urn, lsoa_code, idaci_score, idaci_decile)
|
|
VALUES (:urn, :lsoa, :score, :decile)
|
|
ON CONFLICT (urn) DO UPDATE SET
|
|
lsoa_code = EXCLUDED.lsoa_code,
|
|
idaci_score = EXCLUDED.idaci_score,
|
|
idaci_decile = EXCLUDED.idaci_decile
|
|
"""),
|
|
{"urn": urn, "lsoa": lsoa, "score": float(iod["idaci_score"]), "decile": int(iod["idaci_decile"])},
|
|
)
|
|
inserted += 1
|
|
if inserted % 2000 == 0:
|
|
session.flush()
|
|
|
|
print(f" IDACI: upserted {inserted}, skipped {skipped}")
|
|
return {"inserted": inserted, "updated": 0, "skipped": skipped}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--action", choices=["download", "load", "all"], default="all")
|
|
parser.add_argument("--data-dir", type=Path, default=None)
|
|
args = parser.parse_args()
|
|
if args.action in ("download", "all"):
|
|
download(args.data_dir)
|
|
if args.action in ("load", "all"):
|
|
load(data_dir=args.data_dir)
|