feat(data): integrate 9 UK government data sources via Kestra

Adds a full data integration pipeline for enriching school profiles with supplementary data from Ofsted, GIAS, EES, IDACI, and FBIT. Backend: - Bump SCHEMA_VERSION to 3; add 8 new DB tables (ofsted_inspections, ofsted_parent_view, school_census, admissions, sen_detail, phonics, school_deprivation, school_finance) plus GIAS columns on schools - Expose all supplementary data via GET /api/schools/{urn} - Enrich school list responses with ofsted_grade + ofsted_date Integrator (new service): - FastAPI HTTP microservice; Kestra calls POST /run/{source} - 9 source modules: ofsted, gias, parent_view, census, admissions, sen_detail, phonics, idaci, finance - 9 Kestra flow YAMLs with scheduled triggers and 3× retry Frontend: - SchoolRow: colour-coded Ofsted badge (Outstanding/Good/RI/Inadequate) - SchoolDetailView: 7 new sections — Ofsted sub-judgements, Parent View survey bars, Admissions, Pupils & Inclusion / SEN, Phonics, Deprivation Context, Finances - types.ts: 8 new interfaces + extended School/SchoolDetailsResponse Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 11:44:04 +00:00
parent c49593d4d6
commit dd49ef28b2
36 changed files with 2849 additions and 8 deletions
@@ -24,6 +24,7 @@ from .data_loader import (
    clear_cache,
    load_school_data,
    geocode_single_postcode,
+    get_supplementary_data,
 )
 from .data_loader import get_data_info as get_db_info
 from .database import check_and_migrate_if_needed
@@ -384,6 +385,16 @@ async def get_school_details(request: Request, urn: int):
    # Get latest info for the school
    latest = school_data.iloc[-1]

+    # Fetch supplementary data (Ofsted, Parent View, admissions, etc.)
+    from .database import SessionLocal
+    supplementary = {}
+    try:
+        db = SessionLocal()
+        supplementary = get_supplementary_data(db, urn)
+        db.close()
+    except Exception:
+        pass
+
    return {
        "school_info": {
            "urn": urn,
@@ -396,8 +407,23 @@ async def get_school_details(request: Request, urn: int):
            "latitude": latest.get("latitude"),
            "longitude": latest.get("longitude"),
            "phase": "Primary",
+            # GIAS fields
+            "website": latest.get("website"),
+            "headteacher_name": latest.get("headteacher_name"),
+            "capacity": latest.get("capacity"),
+            "trust_name": latest.get("trust_name"),
+            "gender": latest.get("gender"),
        },
        "yearly_data": clean_for_json(school_data),
+        # Supplementary data (null if not yet populated by Kestra)
+        "ofsted": supplementary.get("ofsted"),
+        "parent_view": supplementary.get("parent_view"),
+        "census": supplementary.get("census"),
+        "admissions": supplementary.get("admissions"),
+        "sen_detail": supplementary.get("sen_detail"),
+        "phonics": supplementary.get("phonics"),
+        "deprivation": supplementary.get("deprivation"),
+        "finance": supplementary.get("finance"),
    }


@@ -16,7 +16,11 @@ from sqlalchemy.orm import joinedload, Session

 from .config import settings
 from .database import SessionLocal, get_db_session
-from .models import School, SchoolResult
+from .models import (
+    School, SchoolResult,
+    OfstedInspection, OfstedParentView, SchoolCensus,
+    SchoolAdmissions, SenDetail, Phonics, SchoolDeprivation, SchoolFinance,
+)
 from .schemas import SCHOOL_TYPE_MAP

 # Cache for user search postcode geocoding (not for school data)
@@ -381,6 +385,12 @@ def school_to_dict(school: School, include_results: bool = False) -> dict:
        "postcode": school.postcode,
        "latitude": school.latitude,
        "longitude": school.longitude,
+        # GIAS fields
+        "website": school.website,
+        "headteacher_name": school.headteacher_name,
+        "capacity": school.capacity,
+        "trust_name": school.trust_name,
+        "gender": school.gender,
    }

    if include_results and school.results:
@@ -455,8 +465,25 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
        # Query all schools with their results
        schools = db.query(School).options(joinedload(School.results)).all()

+        # Load Ofsted data into a lookup dict (urn → grade, date)
+        ofsted_lookup: Dict[int, dict] = {}
+        try:
+            ofsted_rows = db.query(
+                OfstedInspection.urn,
+                OfstedInspection.overall_effectiveness,
+                OfstedInspection.inspection_date,
+            ).all()
+            for o in ofsted_rows:
+                ofsted_lookup[o.urn] = {
+                    "ofsted_grade": o.overall_effectiveness,
+                    "ofsted_date": o.inspection_date.isoformat() if o.inspection_date else None,
+                }
+        except Exception:
+            pass  # Table may not exist yet on first run
+
        rows = []
        for school in schools:
+            ofsted = ofsted_lookup.get(school.urn, {})
            for result in school.results:
                row = {
                    "urn": school.urn,
@@ -468,6 +495,15 @@ def load_school_data_as_dataframe(db: Session = None) -> pd.DataFrame:
                    "postcode": school.postcode,
                    "latitude": school.latitude,
                    "longitude": school.longitude,
+                    # GIAS fields
+                    "website": school.website,
+                    "headteacher_name": school.headteacher_name,
+                    "capacity": school.capacity,
+                    "trust_name": school.trust_name,
+                    "gender": school.gender,
+                    # Ofsted (for list view)
+                    "ofsted_grade": ofsted.get("ofsted_grade"),
+                    "ofsted_date": ofsted.get("ofsted_date"),
                    **result_to_dict(result)
                }
                rows.append(row)
@@ -511,3 +547,126 @@ def clear_cache():
    """Clear all caches."""
    global _df_cache
    _df_cache = None
+
+
+def get_supplementary_data(db: Session, urn: int) -> dict:
+    """
+    Fetch all supplementary data for a single school URN.
+    Returns a dict with keys: ofsted, parent_view, census, admissions, sen_detail,
+    phonics, deprivation, finance. Values are dicts or None.
+    """
+    result = {}
+
+    def safe_query(model, pk_field, latest_year_field=None):
+        try:
+            if latest_year_field:
+                row = (
+                    db.query(model)
+                    .filter(getattr(model, pk_field) == urn)
+                    .order_by(getattr(model, latest_year_field).desc())
+                    .first()
+                )
+            else:
+                row = db.query(model).filter(getattr(model, pk_field) == urn).first()
+            return row
+        except Exception:
+            return None
+
+    # Ofsted inspection
+    o = safe_query(OfstedInspection, "urn")
+    result["ofsted"] = {
+        "overall_effectiveness": o.overall_effectiveness,
+        "quality_of_education": o.quality_of_education,
+        "behaviour_attitudes": o.behaviour_attitudes,
+        "personal_development": o.personal_development,
+        "leadership_management": o.leadership_management,
+        "early_years_provision": o.early_years_provision,
+        "previous_overall": o.previous_overall,
+        "inspection_date": o.inspection_date.isoformat() if o.inspection_date else None,
+        "inspection_type": o.inspection_type,
+    } if o else None
+
+    # Parent View
+    pv = safe_query(OfstedParentView, "urn")
+    result["parent_view"] = {
+        "survey_date": pv.survey_date.isoformat() if pv.survey_date else None,
+        "total_responses": pv.total_responses,
+        "q_happy_pct": pv.q_happy_pct,
+        "q_safe_pct": pv.q_safe_pct,
+        "q_behaviour_pct": pv.q_behaviour_pct,
+        "q_bullying_pct": pv.q_bullying_pct,
+        "q_communication_pct": pv.q_communication_pct,
+        "q_progress_pct": pv.q_progress_pct,
+        "q_teaching_pct": pv.q_teaching_pct,
+        "q_information_pct": pv.q_information_pct,
+        "q_curriculum_pct": pv.q_curriculum_pct,
+        "q_future_pct": pv.q_future_pct,
+        "q_leadership_pct": pv.q_leadership_pct,
+        "q_wellbeing_pct": pv.q_wellbeing_pct,
+        "q_recommend_pct": pv.q_recommend_pct,
+        "q_sen_pct": pv.q_sen_pct,
+    } if pv else None
+
+    # School Census (latest year)
+    c = safe_query(SchoolCensus, "urn", "year")
+    result["census"] = {
+        "year": c.year,
+        "class_size_avg": c.class_size_avg,
+        "ethnicity_white_pct": c.ethnicity_white_pct,
+        "ethnicity_asian_pct": c.ethnicity_asian_pct,
+        "ethnicity_black_pct": c.ethnicity_black_pct,
+        "ethnicity_mixed_pct": c.ethnicity_mixed_pct,
+        "ethnicity_other_pct": c.ethnicity_other_pct,
+    } if c else None
+
+    # Admissions (latest year)
+    a = safe_query(SchoolAdmissions, "urn", "year")
+    result["admissions"] = {
+        "year": a.year,
+        "published_admission_number": a.published_admission_number,
+        "total_applications": a.total_applications,
+        "first_preference_offers_pct": a.first_preference_offers_pct,
+        "oversubscribed": a.oversubscribed,
+    } if a else None
+
+    # SEN Detail (latest year)
+    s = safe_query(SenDetail, "urn", "year")
+    result["sen_detail"] = {
+        "year": s.year,
+        "primary_need_speech_pct": s.primary_need_speech_pct,
+        "primary_need_autism_pct": s.primary_need_autism_pct,
+        "primary_need_mld_pct": s.primary_need_mld_pct,
+        "primary_need_spld_pct": s.primary_need_spld_pct,
+        "primary_need_semh_pct": s.primary_need_semh_pct,
+        "primary_need_physical_pct": s.primary_need_physical_pct,
+        "primary_need_other_pct": s.primary_need_other_pct,
+    } if s else None
+
+    # Phonics (latest year)
+    ph = safe_query(Phonics, "urn", "year")
+    result["phonics"] = {
+        "year": ph.year,
+        "year1_phonics_pct": ph.year1_phonics_pct,
+        "year2_phonics_pct": ph.year2_phonics_pct,
+    } if ph else None
+
+    # Deprivation
+    d = safe_query(SchoolDeprivation, "urn")
+    result["deprivation"] = {
+        "lsoa_code": d.lsoa_code,
+        "idaci_score": d.idaci_score,
+        "idaci_decile": d.idaci_decile,
+    } if d else None
+
+    # Finance (latest year)
+    f = safe_query(SchoolFinance, "urn", "year")
+    result["finance"] = {
+        "year": f.year,
+        "per_pupil_spend": f.per_pupil_spend,
+        "staff_cost_pct": f.staff_cost_pct,
+        "teacher_cost_pct": f.teacher_cost_pct,
+        "support_staff_cost_pct": f.support_staff_cost_pct,
+        "premises_cost_pct": f.premises_cost_pct,
+    } if f else None
+
+    return result
@@ -7,7 +7,7 @@ from datetime import datetime

 from sqlalchemy import (
    Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
-    Text, Boolean, DateTime
+    Text, Boolean, DateTime, Date
 )
 from sqlalchemy.orm import relationship
 from .database import Base
@@ -39,6 +39,15 @@ class School(Base):
    latitude = Column(Float)
    longitude = Column(Float)

+    # GIAS enrichment fields
+    website = Column(String(255))
+    headteacher_name = Column(String(200))
+    capacity = Column(Integer)
+    trust_name = Column(String(255))
+    trust_uid = Column(String(20))
+    gender = Column(String(20))        # Mixed / Girls / Boys
+    nursery_provision = Column(Boolean)
+
    # Relationships
    results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
    
@@ -150,6 +159,169 @@ class SchemaVersion(Base):
        return f"<SchemaVersion(version={self.version}, migrated_at={self.migrated_at})>"


+# ---------------------------------------------------------------------------
+# Supplementary data tables (populated by the Kestra data integrator)
+# ---------------------------------------------------------------------------
+
+class OfstedInspection(Base):
+    """Latest Ofsted inspection judgement per school."""
+    __tablename__ = "ofsted_inspections"
+
+    urn = Column(Integer, primary_key=True)
+    inspection_date = Column(Date)
+    publication_date = Column(Date)
+    inspection_type = Column(String(100))   # Section 5 / Section 8 etc.
+    # 1=Outstanding 2=Good 3=Requires improvement 4=Inadequate
+    overall_effectiveness = Column(Integer)
+    quality_of_education = Column(Integer)
+    behaviour_attitudes = Column(Integer)
+    personal_development = Column(Integer)
+    leadership_management = Column(Integer)
+    early_years_provision = Column(Integer)   # nullable — not all schools
+    previous_overall = Column(Integer)        # for trend display
+
+    def __repr__(self):
+        return f"<OfstedInspection(urn={self.urn}, overall={self.overall_effectiveness})>"
+
+
+class OfstedParentView(Base):
+    """Ofsted Parent View survey — latest per school. 14 questions, % saying Yes."""
+    __tablename__ = "ofsted_parent_view"
+
+    urn = Column(Integer, primary_key=True)
+    survey_date = Column(Date)
+    total_responses = Column(Integer)
+    q_happy_pct = Column(Float)          # My child is happy at this school
+    q_safe_pct = Column(Float)           # My child feels safe at this school
+    q_bullying_pct = Column(Float)       # School deals with bullying well
+    q_communication_pct = Column(Float)  # School keeps me informed
+    q_progress_pct = Column(Float)       # My child does well / good progress
+    q_teaching_pct = Column(Float)       # Teaching is good
+    q_information_pct = Column(Float)    # I receive valuable info about progress
+    q_curriculum_pct = Column(Float)     # Broad range of subjects taught
+    q_future_pct = Column(Float)         # Prepares child well for the future
+    q_leadership_pct = Column(Float)     # Led and managed effectively
+    q_wellbeing_pct = Column(Float)      # Supports wider personal development
+    q_behaviour_pct = Column(Float)      # Pupils are well behaved
+    q_recommend_pct = Column(Float)      # I would recommend this school
+    q_sen_pct = Column(Float)            # Good information about child's SEN (where applicable)
+
+    def __repr__(self):
+        return f"<OfstedParentView(urn={self.urn}, responses={self.total_responses})>"
+
+
+class SchoolCensus(Base):
+    """Annual school census snapshot — class sizes and ethnicity breakdown."""
+    __tablename__ = "school_census"
+
+    urn = Column(Integer, primary_key=True)
+    year = Column(Integer, primary_key=True)
+    class_size_avg = Column(Float)
+    ethnicity_white_pct = Column(Float)
+    ethnicity_asian_pct = Column(Float)
+    ethnicity_black_pct = Column(Float)
+    ethnicity_mixed_pct = Column(Float)
+    ethnicity_other_pct = Column(Float)
+
+    __table_args__ = (
+        Index('ix_school_census_urn_year', 'urn', 'year'),
+    )
+
+    def __repr__(self):
+        return f"<SchoolCensus(urn={self.urn}, year={self.year})>"
+
+
+class SchoolAdmissions(Base):
+    """Annual admissions statistics per school."""
+    __tablename__ = "school_admissions"
+
+    urn = Column(Integer, primary_key=True)
+    year = Column(Integer, primary_key=True)
+    published_admission_number = Column(Integer)   # PAN
+    total_applications = Column(Integer)
+    first_preference_offers_pct = Column(Float)    # % receiving 1st choice
+    oversubscribed = Column(Boolean)
+
+    __table_args__ = (
+        Index('ix_school_admissions_urn_year', 'urn', 'year'),
+    )
+
+    def __repr__(self):
+        return f"<SchoolAdmissions(urn={self.urn}, year={self.year})>"
+
+
+class SenDetail(Base):
+    """SEN primary need type breakdown — more granular than school_results context fields."""
+    __tablename__ = "sen_detail"
+
+    urn = Column(Integer, primary_key=True)
+    year = Column(Integer, primary_key=True)
+    primary_need_speech_pct = Column(Float)    # SLCN
+    primary_need_autism_pct = Column(Float)    # ASD
+    primary_need_mld_pct = Column(Float)       # Moderate learning difficulty
+    primary_need_spld_pct = Column(Float)      # Specific learning difficulty (dyslexia etc.)
+    primary_need_semh_pct = Column(Float)      # Social, emotional, mental health
+    primary_need_physical_pct = Column(Float)  # Physical/sensory
+    primary_need_other_pct = Column(Float)
+
+    __table_args__ = (
+        Index('ix_sen_detail_urn_year', 'urn', 'year'),
+    )
+
+    def __repr__(self):
+        return f"<SenDetail(urn={self.urn}, year={self.year})>"
+
+
+class Phonics(Base):
+    """Phonics Screening Check pass rates."""
+    __tablename__ = "phonics"
+
+    urn = Column(Integer, primary_key=True)
+    year = Column(Integer, primary_key=True)
+    year1_phonics_pct = Column(Float)   # % reaching expected standard in Year 1
+    year2_phonics_pct = Column(Float)   # % reaching standard in Year 2 (re-takers)
+
+    __table_args__ = (
+        Index('ix_phonics_urn_year', 'urn', 'year'),
+    )
+
+    def __repr__(self):
+        return f"<Phonics(urn={self.urn}, year={self.year})>"
+
+
+class SchoolDeprivation(Base):
+    """IDACI deprivation index — derived via postcode → LSOA lookup."""
+    __tablename__ = "school_deprivation"
+
+    urn = Column(Integer, primary_key=True)
+    lsoa_code = Column(String(20))
+    idaci_score = Column(Float)    # 0–1, higher = more deprived
+    idaci_decile = Column(Integer) # 1 = most deprived, 10 = least deprived
+
+    def __repr__(self):
+        return f"<SchoolDeprivation(urn={self.urn}, decile={self.idaci_decile})>"
+
+
+class SchoolFinance(Base):
+    """FBIT financial benchmarking data."""
+    __tablename__ = "school_finance"
+
+    urn = Column(Integer, primary_key=True)
+    year = Column(Integer, primary_key=True)
+    per_pupil_spend = Column(Float)          # £ total expenditure per pupil
+    staff_cost_pct = Column(Float)           # % of budget on all staff
+    teacher_cost_pct = Column(Float)         # % on teachers specifically
+    support_staff_cost_pct = Column(Float)
+    premises_cost_pct = Column(Float)
+
+    __table_args__ = (
+        Index('ix_school_finance_urn_year', 'urn', 'year'),
+    )
+
+    def __repr__(self):
+        return f"<SchoolFinance(urn={self.urn}, year={self.year})>"
+
+
 # Mapping from CSV columns to model fields
 SCHOOL_FIELD_MAPPING = {
    'urn': 'urn',
@@ -13,10 +13,11 @@ WHEN TO BUMP:
 """

 # Current schema version - increment when models change
-SCHEMA_VERSION = 2
+SCHEMA_VERSION = 3

 # Changelog for documentation
 SCHEMA_CHANGELOG = {
    1: "Initial schema with School and SchoolResult tables",
    2: "Added pupil absence fields (reading, maths, gps, writing, science)",
+    3: "Added supplementary data tables: ofsted, parent_view, census, admissions, sen_detail, phonics, deprivation, finance; GIAS columns on schools",
 }
@@ -77,9 +77,70 @@ services:
      retries: 3
      start_period: 40s

+  # Kestra — workflow orchestrator (UI at http://localhost:8080)
+  kestra:
+    image: kestra/kestra:latest
+    container_name: schoolcompare_kestra
+    ports:
+      - "8080:8080"
+    volumes:
+      - kestra_storage:/app/storage
+      - ./integrator/flows:/flows
+    environment:
+      KESTRA_CONFIGURATION: |
+        datasources:
+          postgres:
+            url: jdbc:postgresql://db:5432/kestra
+            driverClassName: org.postgresql.Driver
+            username: schoolcompare
+            password: schoolcompare
+        kestra:
+          repository:
+            type: postgres
+          queue:
+            type: postgres
+          storage:
+            type: local
+            local:
+              base-path: /app/storage
+    depends_on:
+      db:
+        condition: service_healthy
+    networks:
+      - schoolcompare-network
+    restart: unless-stopped
+
+  # Data integrator — Python microservice called by Kestra
+  integrator:
+    build:
+      context: ./integrator
+      dockerfile: Dockerfile
+    container_name: schoolcompare_integrator
+    ports:
+      - "8001:8001"
+    environment:
+      DATABASE_URL: postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare
+      DATA_DIR: /data
+      PYTHONUNBUFFERED: 1
+    volumes:
+      - ./data:/data
+    depends_on:
+      db:
+        condition: service_healthy
+    networks:
+      - schoolcompare-network
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 15s
+
 networks:
  schoolcompare-network:
    driver: bridge

 volumes:
  postgres_data:
+  kestra_storage:
@@ -0,0 +1,13 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY scripts/ ./scripts/
+COPY server.py .
+
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8001"]
@@ -0,0 +1,25 @@
+id: admissions-annual-update
+namespace: schoolcompare.data
+description: Download and load school admissions data via EES API
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 4 1 7 *"   # 1 July annually at 04:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/admissions?action=download
+    method: POST
+    timeout: PT20M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/admissions?action=load
+    method: POST
+    timeout: PT30M
+
+retry:
+  maxAttempts: 3
+  delay: PT15M
@@ -0,0 +1,25 @@
+id: census-annual-update
+namespace: schoolcompare.data
+description: Download and load School Census (SPC) data via EES API
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 4 1 9 *"   # 1 September annually at 04:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/census?action=download
+    method: POST
+    timeout: PT20M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/census?action=load
+    method: POST
+    timeout: PT30M
+
+retry:
+  maxAttempts: 3
+  delay: PT15M
@@ -0,0 +1,25 @@
+id: finance-annual-update
+namespace: schoolcompare.data
+description: Fetch FBIT financial benchmarking data from DfE API for all schools
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 4 1 12 *"   # 1 December annually at 04:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/finance?action=download
+    method: POST
+    timeout: PT120M   # Fetches per-school from API — ~20k schools
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/finance?action=load
+    method: POST
+    timeout: PT30M
+
+retry:
+  maxAttempts: 2
+  delay: PT30M
@@ -0,0 +1,30 @@
+id: gias-weekly-update
+namespace: schoolcompare.data
+description: Download and load GIAS (Get Information About Schools) bulk CSV
+
+triggers:
+  - id: weekly-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 3 * * 0"   # Every Sunday at 03:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/gias?action=download
+    method: POST
+    timeout: PT30M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/gias?action=load
+    method: POST
+    timeout: PT30M
+
+errors:
+  - id: notify-failure
+    type: io.kestra.plugin.core.log.Log
+    message: "GIAS update FAILED: {{ error.message }}"
+
+retry:
+  maxAttempts: 3
+  delay: PT10M
@@ -0,0 +1,25 @@
+id: idaci-annual-check
+namespace: schoolcompare.data
+description: Download IoD2019 IDACI file and compute deprivation scores for all schools
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 5 1 1 *"   # 1 January annually at 05:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/idaci?action=download
+    method: POST
+    timeout: PT10M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/idaci?action=load
+    method: POST
+    timeout: PT60M
+
+retry:
+  maxAttempts: 2
+  delay: PT30M
@@ -0,0 +1,32 @@
+id: ofsted-monthly-update
+namespace: schoolcompare.data
+description: Download and load Ofsted Monthly Management Information CSV
+
+triggers:
+  - id: monthly-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 2 1 * *"   # 1st of each month at 02:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/ofsted?action=download
+    method: POST
+    allowFailed: false
+    timeout: PT10M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/ofsted?action=load
+    method: POST
+    allowFailed: false
+    timeout: PT30M
+
+errors:
+  - id: notify-failure
+    type: io.kestra.plugin.core.log.Log
+    message: "Ofsted update FAILED: {{ error.message }}"
+
+retry:
+  maxAttempts: 3
+  delay: PT10M
@@ -0,0 +1,30 @@
+id: parent-view-monthly-check
+namespace: schoolcompare.data
+description: Download and load Ofsted Parent View open data (released ~3x/year)
+
+triggers:
+  - id: monthly-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 3 1 * *"   # 1st of each month at 03:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/parent_view?action=download
+    method: POST
+    timeout: PT10M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/parent_view?action=load
+    method: POST
+    timeout: PT20M
+
+errors:
+  - id: notify-failure
+    type: io.kestra.plugin.core.log.Log
+    message: "Parent View update FAILED: {{ error.message }}"
+
+retry:
+  maxAttempts: 3
+  delay: PT10M
@@ -0,0 +1,25 @@
+id: phonics-annual-update
+namespace: schoolcompare.data
+description: Download and load Phonics Screening Check data via EES API
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 5 1 9 *"   # 1 September annually at 05:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/phonics?action=download
+    method: POST
+    timeout: PT20M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/phonics?action=load
+    method: POST
+    timeout: PT30M
+
+retry:
+  maxAttempts: 3
+  delay: PT15M
@@ -0,0 +1,25 @@
+id: sen-detail-annual-update
+namespace: schoolcompare.data
+description: Download and load SEN primary need breakdown via EES API
+
+triggers:
+  - id: annual-schedule
+    type: io.kestra.plugin.core.trigger.Schedule
+    cron: "0 4 15 9 *"   # 15 September annually at 04:00
+
+tasks:
+  - id: download
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/sen_detail?action=download
+    method: POST
+    timeout: PT20M
+
+  - id: load
+    type: io.kestra.plugin.core.http.Request
+    uri: http://integrator:8001/run/sen_detail?action=load
+    method: POST
+    timeout: PT30M
+
+retry:
+  maxAttempts: 3
+  delay: PT15M
@@ -0,0 +1,7 @@
+fastapi==0.115.0
+uvicorn[standard]==0.30.6
+requests==2.32.3
+pandas==2.2.3
+openpyxl==3.1.5
+psycopg2-binary==2.9.9
+sqlalchemy==2.0.35
@@ -0,0 +1,11 @@
+"""Configuration for the data integrator."""
+import os
+from pathlib import Path
+
+DATABASE_URL = os.environ.get(
+    "DATABASE_URL",
+    "postgresql://schoolcompare:schoolcompare@db:5432/schoolcompare",
+)
+
+DATA_DIR = Path(os.environ.get("DATA_DIR", "/data"))
+SUPPLEMENTARY_DIR = DATA_DIR / "supplementary"
@@ -0,0 +1,23 @@
+"""Database connection for the integrator."""
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from config import DATABASE_URL
+
+engine = create_engine(DATABASE_URL, pool_pre_ping=True)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+@contextmanager
+def get_session():
+    session = SessionLocal()
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
@@ -0,0 +1,158 @@
+"""
+School Admissions data downloader and loader.
+
+Source: EES publication "secondary-and-primary-school-applications-and-offers"
+Update: Annual (June/July post-offer round)
+"""
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+from sources.ees import get_latest_csv_url, download_csv
+
+DEST_DIR = SUPPLEMENTARY_DIR / "admissions"
+PUBLICATION_SLUG = "secondary-and-primary-school-applications-and-offers"
+
+NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
+
+COLUMN_MAP = {
+    "URN": "urn",
+    "urn": "urn",
+    "YEAR": "year",
+    "Year": "year",
+    # PAN
+    "PAN": "pan",
+    "published_admission_number": "pan",
+    "admissions_number": "pan",
+    # Applications
+    "total_applications": "total_applications",
+    "TAPP": "total_applications",
+    "applications_received": "total_applications",
+    # 1st preference offers
+    "first_preference_offers_pct": "first_preference_offers_pct",
+    "pct_1st_preference": "first_preference_offers_pct",
+    "PT1PREF": "first_preference_offers_pct",
+    # Oversubscription
+    "oversubscribed": "oversubscribed",
+}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "admissions") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    url = get_latest_csv_url(PUBLICATION_SLUG, keyword="primary")
+    if not url:
+        url = get_latest_csv_url(PUBLICATION_SLUG)
+    if not url:
+        raise RuntimeError("Could not find CSV URL for admissions publication")
+
+    filename = url.split("/")[-1].split("?")[0] or "admissions_latest.csv"
+    return download_csv(url, dest / filename)
+
+
+def _parse_int(val) -> int | None:
+    if pd.isna(val):
+        return None
+    s = str(val).strip().upper().replace(",", "")
+    if s in NULL_VALUES:
+        return None
+    try:
+        return int(float(s))
+    except ValueError:
+        return None
+
+
+def _parse_pct(val) -> float | None:
+    if pd.isna(val):
+        return None
+    s = str(val).strip().upper().replace("%", "")
+    if s in NULL_VALUES:
+        return None
+    try:
+        return float(s)
+    except ValueError:
+        return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "admissions") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No admissions CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  Admissions: loading {path} ...")
+    df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    year = None
+    m = re.search(r"20(\d{2})", path.stem)
+    if m:
+        year = int("20" + m.group(1))
+
+    inserted = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
+            if not row_year:
+                continue
+
+            pan = _parse_int(row.get("pan"))
+            total_apps = _parse_int(row.get("total_applications"))
+            pct_1st = _parse_pct(row.get("first_preference_offers_pct"))
+            oversubscribed = bool(row.get("oversubscribed")) if pd.notna(row.get("oversubscribed")) else (
+                True if (pan and total_apps and total_apps > pan) else None
+            )
+
+            session.execute(
+                text("""
+                    INSERT INTO school_admissions
+                        (urn, year, published_admission_number, total_applications,
+                         first_preference_offers_pct, oversubscribed)
+                    VALUES (:urn, :year, :pan, :total_apps, :pct_1st, :oversubscribed)
+                    ON CONFLICT (urn, year) DO UPDATE SET
+                        published_admission_number  = EXCLUDED.published_admission_number,
+                        total_applications          = EXCLUDED.total_applications,
+                        first_preference_offers_pct = EXCLUDED.first_preference_offers_pct,
+                        oversubscribed              = EXCLUDED.oversubscribed
+                """),
+                {
+                    "urn": urn, "year": row_year, "pan": pan,
+                    "total_apps": total_apps, "pct_1st": pct_1st,
+                    "oversubscribed": oversubscribed,
+                },
+            )
+            inserted += 1
+            if inserted % 5000 == 0:
+                session.flush()
+
+    print(f"  Admissions: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,148 @@
+"""
+School Census (SPC) downloader and loader.
+
+Source: EES publication "schools-pupils-and-their-characteristics"
+Update: Annual (June)
+Adds: class_size_avg, ethnicity breakdown by school
+"""
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+from sources.ees import get_latest_csv_url, download_csv
+
+DEST_DIR = SUPPLEMENTARY_DIR / "census"
+PUBLICATION_SLUG = "schools-pupils-and-their-characteristics"
+
+NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
+
+COLUMN_MAP = {
+    "URN": "urn",
+    "urn": "urn",
+    "YEAR": "year",
+    "Year": "year",
+    # Class size
+    "average_class_size": "class_size_avg",
+    "AVCLAS": "class_size_avg",
+    "avg_class_size": "class_size_avg",
+    # Ethnicity — DfE uses ethnicity major group percentages
+    "perc_white": "ethnicity_white_pct",
+    "perc_asian": "ethnicity_asian_pct",
+    "perc_black": "ethnicity_black_pct",
+    "perc_mixed": "ethnicity_mixed_pct",
+    "perc_other_ethnic": "ethnicity_other_pct",
+    "PTWHITE": "ethnicity_white_pct",
+    "PTASIAN": "ethnicity_asian_pct",
+    "PTBLACK": "ethnicity_black_pct",
+    "PTMIXED": "ethnicity_mixed_pct",
+    "PTOTHER": "ethnicity_other_pct",
+}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "census") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
+    if not url:
+        raise RuntimeError(f"Could not find CSV URL for census publication")
+
+    filename = url.split("/")[-1].split("?")[0] or "census_latest.csv"
+    return download_csv(url, dest / filename)
+
+
+def _parse_pct(val) -> float | None:
+    if pd.isna(val):
+        return None
+    s = str(val).strip().upper().replace("%", "")
+    if s in NULL_VALUES:
+        return None
+    try:
+        return float(s)
+    except ValueError:
+        return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "census") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No census CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  Census: loading {path} ...")
+    df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    year = None
+    m = re.search(r"20(\d{2})", path.stem)
+    if m:
+        year = int("20" + m.group(1))
+
+    inserted = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
+            if not row_year:
+                continue
+
+            session.execute(
+                text("""
+                    INSERT INTO school_census
+                        (urn, year, class_size_avg,
+                         ethnicity_white_pct, ethnicity_asian_pct, ethnicity_black_pct,
+                         ethnicity_mixed_pct, ethnicity_other_pct)
+                    VALUES (:urn, :year, :class_size_avg,
+                            :white, :asian, :black, :mixed, :other)
+                    ON CONFLICT (urn, year) DO UPDATE SET
+                        class_size_avg       = EXCLUDED.class_size_avg,
+                        ethnicity_white_pct  = EXCLUDED.ethnicity_white_pct,
+                        ethnicity_asian_pct  = EXCLUDED.ethnicity_asian_pct,
+                        ethnicity_black_pct  = EXCLUDED.ethnicity_black_pct,
+                        ethnicity_mixed_pct  = EXCLUDED.ethnicity_mixed_pct,
+                        ethnicity_other_pct  = EXCLUDED.ethnicity_other_pct
+                """),
+                {
+                    "urn": urn,
+                    "year": row_year,
+                    "class_size_avg": _parse_pct(row.get("class_size_avg")),
+                    "white": _parse_pct(row.get("ethnicity_white_pct")),
+                    "asian": _parse_pct(row.get("ethnicity_asian_pct")),
+                    "black": _parse_pct(row.get("ethnicity_black_pct")),
+                    "mixed": _parse_pct(row.get("ethnicity_mixed_pct")),
+                    "other": _parse_pct(row.get("ethnicity_other_pct")),
+                },
+            )
+            inserted += 1
+            if inserted % 5000 == 0:
+                session.flush()
+
+    print(f"  Census: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,53 @@
+"""
+Shared EES (Explore Education Statistics) API client.
+
+Base URL: https://api.education.gov.uk/statistics/v1
+"""
+import sys
+from pathlib import Path
+from typing import Optional
+
+import requests
+
+API_BASE = "https://api.education.gov.uk/statistics/v1"
+TIMEOUT = 60
+
+
+def get_publication_files(publication_slug: str) -> list[dict]:
+    """Return list of data-set file descriptors for a publication."""
+    url = f"{API_BASE}/publications/{publication_slug}/data-set-files"
+    resp = requests.get(url, timeout=TIMEOUT)
+    resp.raise_for_status()
+    return resp.json().get("results", [])
+
+
+def get_latest_csv_url(publication_slug: str, keyword: str = "") -> Optional[str]:
+    """
+    Find the most recent CSV download URL for a publication.
+    Optionally filter by a keyword in the file name.
+    """
+    files = get_publication_files(publication_slug)
+    for entry in files:
+        name = entry.get("name", "").lower()
+        if keyword and keyword.lower() not in name:
+            continue
+        csv_url = entry.get("csvDownloadUrl") or entry.get("file", {}).get("url")
+        if csv_url:
+            return csv_url
+    return None
+
+
+def download_csv(url: str, dest_path: Path) -> Path:
+    """Download a CSV from EES to dest_path."""
+    if dest_path.exists():
+        print(f"    EES: {dest_path.name} already exists, skipping.")
+        return dest_path
+    print(f"    EES: downloading {url} ...")
+    resp = requests.get(url, timeout=300, stream=True)
+    resp.raise_for_status()
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(dest_path, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=65536):
+            f.write(chunk)
+    print(f"    EES: saved {dest_path} ({dest_path.stat().st_size // 1024} KB)")
+    return dest_path
@@ -0,0 +1,143 @@
+"""
+FBIT (Financial Benchmarking and Insights Tool) financial data loader.
+
+Source: https://schools-financial-benchmarking.service.gov.uk/api/
+Update: Annual (December — data for the prior financial year)
+"""
+import argparse
+import sys
+import time
+from pathlib import Path
+
+import pandas as pd
+import requests
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+
+DEST_DIR = SUPPLEMENTARY_DIR / "finance"
+API_BASE = "https://schools-financial-benchmarking.service.gov.uk/api"
+RATE_LIMIT_DELAY = 0.1   # seconds between requests
+
+
+def download(data_dir: Path | None = None) -> Path:
+    """
+    Fetch per-URN financial data from FBIT API and save as CSV.
+    Batches all school URNs from the database.
+    """
+    dest = (data_dir / "supplementary" / "finance") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    # Determine year from API (use current year minus 1 for completed financials)
+    from datetime import date
+    year = date.today().year - 1
+    dest_file = dest / f"fbit_{year}.csv"
+
+    if dest_file.exists():
+        print(f"  Finance: {dest_file.name} already exists, skipping download.")
+        return dest_file
+
+    # Get all URNs from the database
+    with get_session() as session:
+        from sqlalchemy import text
+        rows = session.execute(text("SELECT urn FROM schools")).fetchall()
+    urns = [r[0] for r in rows]
+    print(f"  Finance: fetching FBIT data for {len(urns)} schools (year {year}) ...")
+
+    records = []
+    errors = 0
+    for i, urn in enumerate(urns):
+        if i % 500 == 0:
+            print(f"    {i}/{len(urns)} ...")
+        try:
+            resp = requests.get(
+                f"{API_BASE}/schoolFinancialDataObject/{urn}",
+                timeout=10,
+            )
+            if resp.status_code == 200:
+                data = resp.json()
+                if data:
+                    records.append({
+                        "urn": urn,
+                        "year": year,
+                        "per_pupil_spend": data.get("totalExpenditure") and
+                                           data.get("numberOfPupils") and
+                                           round(data["totalExpenditure"] / data["numberOfPupils"], 2),
+                        "staff_cost_pct": data.get("staffCostPercent"),
+                        "teacher_cost_pct": data.get("teachingStaffCostPercent"),
+                        "support_staff_cost_pct": data.get("educationSupportStaffCostPercent"),
+                        "premises_cost_pct": data.get("premisesStaffCostPercent"),
+                    })
+            elif resp.status_code not in (404, 400):
+                errors += 1
+        except Exception:
+            errors += 1
+
+        time.sleep(RATE_LIMIT_DELAY)
+
+    df = pd.DataFrame(records)
+    df.to_csv(dest_file, index=False)
+    print(f"  Finance: saved {len(records)} records to {dest_file} ({errors} errors)")
+    return dest_file
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "finance") if data_dir else DEST_DIR
+        files = sorted(dest.glob("fbit_*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No finance CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  Finance: loading {path} ...")
+    df = pd.read_csv(path)
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    inserted = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            session.execute(
+                text("""
+                    INSERT INTO school_finance
+                        (urn, year, per_pupil_spend, staff_cost_pct, teacher_cost_pct,
+                         support_staff_cost_pct, premises_cost_pct)
+                    VALUES (:urn, :year, :per_pupil, :staff, :teacher, :support, :premises)
+                    ON CONFLICT (urn, year) DO UPDATE SET
+                        per_pupil_spend        = EXCLUDED.per_pupil_spend,
+                        staff_cost_pct         = EXCLUDED.staff_cost_pct,
+                        teacher_cost_pct       = EXCLUDED.teacher_cost_pct,
+                        support_staff_cost_pct = EXCLUDED.support_staff_cost_pct,
+                        premises_cost_pct      = EXCLUDED.premises_cost_pct
+                """),
+                {
+                    "urn": int(row["urn"]),
+                    "year": int(row["year"]),
+                    "per_pupil": float(row["per_pupil_spend"]) if pd.notna(row.get("per_pupil_spend")) else None,
+                    "staff": float(row["staff_cost_pct"]) if pd.notna(row.get("staff_cost_pct")) else None,
+                    "teacher": float(row["teacher_cost_pct"]) if pd.notna(row.get("teacher_cost_pct")) else None,
+                    "support": float(row["support_staff_cost_pct"]) if pd.notna(row.get("support_staff_cost_pct")) else None,
+                    "premises": float(row["premises_cost_pct"]) if pd.notna(row.get("premises_cost_pct")) else None,
+                },
+            )
+            inserted += 1
+            if inserted % 2000 == 0:
+                session.flush()
+
+    print(f"  Finance: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,159 @@
+"""
+GIAS (Get Information About Schools) bulk CSV downloader and loader.
+
+Source: https://get-information-schools.service.gov.uk/Downloads
+Update: Daily; we refresh weekly.
+Adds: website, headteacher_name, capacity, trust_name, trust_uid, gender, nursery_provision
+"""
+import argparse
+import sys
+from datetime import date
+from pathlib import Path
+
+import pandas as pd
+import requests
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+
+DEST_DIR = SUPPLEMENTARY_DIR / "gias"
+
+# GIAS bulk download URL — date is injected at runtime
+GIAS_URL_TEMPLATE = "https://ea-edubase-api-prod.azurewebsites.net/edubase/downloads/public/edubasealldata{date}.csv"
+
+COLUMN_MAP = {
+    "URN": "urn",
+    "SchoolWebsite": "website",
+    "SchoolCapacity": "capacity",
+    "TrustName": "trust_name",
+    "TrustUID": "trust_uid",
+    "Gender (name)": "gender",
+    "NurseryProvision (name)": "nursery_provision_raw",
+    "HeadTitle": "head_title",
+    "HeadFirstName": "head_first",
+    "HeadLastName": "head_last",
+}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "gias") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    today = date.today().strftime("%Y%m%d")
+    url = GIAS_URL_TEMPLATE.format(date=today)
+    filename = f"gias_{today}.csv"
+    dest_file = dest / filename
+
+    if dest_file.exists():
+        print(f"  GIAS: {filename} already exists, skipping download.")
+        return dest_file
+
+    print(f"  GIAS: downloading {url} ...")
+    resp = requests.get(url, timeout=300, stream=True)
+
+    # GIAS may not have today's file yet — fall back to yesterday
+    if resp.status_code == 404:
+        from datetime import timedelta
+        yesterday = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
+        url = GIAS_URL_TEMPLATE.format(date=yesterday)
+        filename = f"gias_{yesterday}.csv"
+        dest_file = dest / filename
+        if dest_file.exists():
+            print(f"  GIAS: {filename} already exists, skipping download.")
+            return dest_file
+        resp = requests.get(url, timeout=300, stream=True)
+
+    resp.raise_for_status()
+    with open(dest_file, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=65536):
+            f.write(chunk)
+
+    print(f"  GIAS: saved {dest_file} ({dest_file.stat().st_size // 1024} KB)")
+    return dest_file
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "gias") if data_dir else DEST_DIR
+        files = sorted(dest.glob("gias_*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No GIAS CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  GIAS: loading {path} ...")
+    df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    # Build headteacher_name from parts
+    def build_name(row):
+        parts = [
+            str(row.get("head_title", "") or "").strip(),
+            str(row.get("head_first", "") or "").strip(),
+            str(row.get("head_last", "") or "").strip(),
+        ]
+        return " ".join(p for p in parts if p) or None
+
+    df["headteacher_name"] = df.apply(build_name, axis=1)
+    df["nursery_provision"] = df.get("nursery_provision_raw", pd.Series()).apply(
+        lambda v: True if str(v).strip().lower().startswith("has") else False if pd.notna(v) else None
+    )
+
+    def clean_str(val):
+        s = str(val).strip() if pd.notna(val) else None
+        return s if s and s.lower() not in ("nan", "none", "") else None
+
+    updated = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            session.execute(
+                text("""
+                    UPDATE schools SET
+                        website            = :website,
+                        headteacher_name   = :headteacher_name,
+                        capacity           = :capacity,
+                        trust_name         = :trust_name,
+                        trust_uid          = :trust_uid,
+                        gender             = :gender,
+                        nursery_provision  = :nursery_provision
+                    WHERE urn = :urn
+                """),
+                {
+                    "urn": urn,
+                    "website": clean_str(row.get("website")),
+                    "headteacher_name": row.get("headteacher_name"),
+                    "capacity": int(row["capacity"]) if pd.notna(row.get("capacity")) and str(row.get("capacity")).strip().isdigit() else None,
+                    "trust_name": clean_str(row.get("trust_name")),
+                    "trust_uid": clean_str(row.get("trust_uid")),
+                    "gender": clean_str(row.get("gender")),
+                    "nursery_provision": row.get("nursery_provision"),
+                },
+            )
+            updated += 1
+            if updated % 5000 == 0:
+                session.flush()
+                print(f"    Updated {updated} schools...")
+
+    print(f"  GIAS: updated {updated} school records")
+    return {"inserted": 0, "updated": updated, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+
+    if args.action in ("download", "all"):
+        path = download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,176 @@
+"""
+IDACI (Income Deprivation Affecting Children Index) loader.
+
+Source: English Indices of Deprivation 2019
+https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019
+
+This is a one-time download (5-yearly release). We join school postcodes to LSOAs
+via postcodes.io, then look up IDACI scores from the IoD2019 file.
+
+Update: ~5-yearly (next release expected 2025/26)
+"""
+import argparse
+import sys
+from pathlib import Path
+
+import pandas as pd
+import requests
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+
+DEST_DIR = SUPPLEMENTARY_DIR / "idaci"
+
+# IoD 2019 supplementary data — "Income Deprivation Affecting Children Index (IDACI)"
+IOD_2019_URL = (
+    "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/"
+    "attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx"
+)
+
+POSTCODES_IO_BATCH = "https://api.postcodes.io/postcodes"
+BATCH_SIZE = 100
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    filename = "iod2019_idaci.xlsx"
+    dest_file = dest / filename
+    if dest_file.exists():
+        print(f"  IDACI: {filename} already exists, skipping download.")
+        return dest_file
+
+    print(f"  IDACI: downloading IoD2019 file ...")
+    resp = requests.get(IOD_2019_URL, timeout=300, stream=True)
+    resp.raise_for_status()
+    with open(dest_file, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=65536):
+            f.write(chunk)
+
+    print(f"  IDACI: saved {dest_file}")
+    return dest_file
+
+
+def _postcode_to_lsoa(postcodes: list[str]) -> dict[str, str]:
+    """Batch-resolve postcodes to LSOA codes via postcodes.io."""
+    result = {}
+    valid = [p.strip().upper() for p in postcodes if p and len(str(p).strip()) >= 5]
+    valid = list(set(valid))
+
+    for i in range(0, len(valid), BATCH_SIZE):
+        batch = valid[i:i + BATCH_SIZE]
+        try:
+            resp = requests.post(POSTCODES_IO_BATCH, json={"postcodes": batch}, timeout=30)
+            if resp.status_code == 200:
+                for item in resp.json().get("result", []):
+                    if item and item.get("result"):
+                        lsoa = item["result"].get("lsoa")
+                        if lsoa:
+                            result[item["query"].upper()] = lsoa
+        except Exception as e:
+            print(f"  Warning: postcodes.io batch failed: {e}")
+
+    return result
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    dest = (data_dir / "supplementary" / "idaci") if data_dir else DEST_DIR
+    if path is None:
+        files = sorted(dest.glob("*.xlsx"))
+        if not files:
+            raise FileNotFoundError(f"No IDACI file found in {dest}")
+        path = files[-1]
+
+    print(f"  IDACI: loading IoD2019 from {path} ...")
+
+    # IoD2019 File 1 — sheet "IoD2019 IDACI" or similar
+    try:
+        iod_df = pd.read_excel(path, sheet_name=None)
+        # Find sheet with IDACI data
+        idaci_sheet = None
+        for name, df in iod_df.items():
+            if "IDACI" in name.upper() or "IDACI" in str(df.columns.tolist()).upper():
+                idaci_sheet = name
+                break
+        if idaci_sheet is None:
+            idaci_sheet = list(iod_df.keys())[0]
+        df_iod = iod_df[idaci_sheet]
+    except Exception as e:
+        raise RuntimeError(f"Could not read IoD2019 file: {e}")
+
+    # Normalise column names — IoD2019 uses specific headers
+    col_lsoa = next((c for c in df_iod.columns if "LSOA" in str(c).upper() and "code" in str(c).lower()), None)
+    col_score = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "score" in str(c).lower()), None)
+    col_rank = next((c for c in df_iod.columns if "IDACI" in str(c).upper() and "rank" in str(c).lower()), None)
+
+    if not col_lsoa or not col_score:
+        print(f"  IDACI columns available: {list(df_iod.columns)[:20]}")
+        raise ValueError("Could not find LSOA code or IDACI score columns")
+
+    df_iod = df_iod[[col_lsoa, col_score]].copy()
+    df_iod.columns = ["lsoa_code", "idaci_score"]
+    df_iod = df_iod.dropna()
+
+    # Compute decile from rank (or from score distribution)
+    total = len(df_iod)
+    df_iod = df_iod.sort_values("idaci_score", ascending=False)
+    df_iod["idaci_decile"] = (pd.qcut(df_iod["idaci_score"], 10, labels=False) + 1).astype(int)
+    # Decile 1 = most deprived (highest IDACI score)
+    df_iod["idaci_decile"] = 11 - df_iod["idaci_decile"]
+
+    lsoa_lookup = df_iod.set_index("lsoa_code")[["idaci_score", "idaci_decile"]].to_dict("index")
+    print(f"  IDACI: loaded {len(lsoa_lookup)} LSOA records")
+
+    # Fetch all school postcodes from the database
+    with get_session() as session:
+        from sqlalchemy import text
+        rows = session.execute(text("SELECT urn, postcode FROM schools WHERE postcode IS NOT NULL")).fetchall()
+
+    postcodes = [r[1] for r in rows]
+    print(f"  IDACI: resolving {len(postcodes)} postcodes via postcodes.io ...")
+    pc_to_lsoa = _postcode_to_lsoa(postcodes)
+    print(f"  IDACI: resolved {len(pc_to_lsoa)} postcodes to LSOAs")
+
+    inserted = skipped = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for urn, postcode in rows:
+            lsoa = pc_to_lsoa.get(str(postcode).strip().upper())
+            if not lsoa:
+                skipped += 1
+                continue
+            iod = lsoa_lookup.get(lsoa)
+            if not iod:
+                skipped += 1
+                continue
+
+            session.execute(
+                text("""
+                    INSERT INTO school_deprivation (urn, lsoa_code, idaci_score, idaci_decile)
+                    VALUES (:urn, :lsoa, :score, :decile)
+                    ON CONFLICT (urn) DO UPDATE SET
+                        lsoa_code    = EXCLUDED.lsoa_code,
+                        idaci_score  = EXCLUDED.idaci_score,
+                        idaci_decile = EXCLUDED.idaci_decile
+                """),
+                {"urn": urn, "lsoa": lsoa, "score": float(iod["idaci_score"]), "decile": int(iod["idaci_decile"])},
+            )
+            inserted += 1
+            if inserted % 2000 == 0:
+                session.flush()
+
+    print(f"  IDACI: upserted {inserted}, skipped {skipped}")
+    return {"inserted": inserted, "updated": 0, "skipped": skipped}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,226 @@
+"""
+Ofsted Monthly Management Information CSV downloader and loader.
+
+Source: https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes
+Update: Monthly (released ~2 weeks into each month)
+"""
+import argparse
+import re
+import sys
+from datetime import date, datetime
+from pathlib import Path
+
+import pandas as pd
+import requests
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+
+# Current Ofsted MI download URL — update this when Ofsted releases a new file.
+# The URL follows a predictable pattern; we attempt to discover it from the GOV.UK page.
+GOV_UK_PAGE = "https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes"
+
+COLUMN_MAP = {
+    "URN": "urn",
+    "Inspection date": "inspection_date",
+    "Publication date": "publication_date",
+    "Inspection type": "inspection_type",
+    "Overall effectiveness": "overall_effectiveness",
+    "Quality of education": "quality_of_education",
+    "Behaviour and attitudes": "behaviour_attitudes",
+    "Personal development": "personal_development",
+    "Leadership and management": "leadership_management",
+    "Early years provision": "early_years_provision",
+    # Some CSVs use shortened names
+    "Urn": "urn",
+    "InspectionDate": "inspection_date",
+    "PublicationDate": "publication_date",
+    "InspectionType": "inspection_type",
+    "OverallEffectiveness": "overall_effectiveness",
+    "QualityOfEducation": "quality_of_education",
+    "BehaviourAndAttitudes": "behaviour_attitudes",
+    "PersonalDevelopment": "personal_development",
+    "LeadershipAndManagement": "leadership_management",
+    "EarlyYearsProvision": "early_years_provision",
+}
+
+GRADE_MAP = {
+    "Outstanding": 1, "1": 1, 1: 1,
+    "Good": 2, "2": 2, 2: 2,
+    "Requires improvement": 3, "3": 3, 3: 3,
+    "Requires Improvement": 3,
+    "Inadequate": 4, "4": 4, 4: 4,
+}
+
+DEST_DIR = SUPPLEMENTARY_DIR / "ofsted"
+
+
+def _discover_csv_url() -> str | None:
+    """Scrape the GOV.UK page for the most recent CSV/ZIP link."""
+    try:
+        resp = requests.get(GOV_UK_PAGE, timeout=30)
+        resp.raise_for_status()
+        # Look for links to assets.publishing.service.gov.uk CSV or ZIP files
+        pattern = r'href="(https://assets\.publishing\.service\.gov\.uk[^"]+\.(?:csv|zip))"'
+        urls = re.findall(pattern, resp.text, re.IGNORECASE)
+        if urls:
+            return urls[0]
+    except Exception as e:
+        print(f"  Warning: could not scrape GOV.UK page: {e}")
+    return None
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "ofsted") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    url = _discover_csv_url()
+    if not url:
+        raise RuntimeError(
+            "Could not discover Ofsted MI download URL. "
+            "Visit https://www.gov.uk/government/statistical-data-sets/"
+            "monthly-management-information-ofsteds-school-inspections-outcomes "
+            "to get the latest URL and update MANUAL_URL in ofsted.py"
+        )
+
+    filename = url.split("/")[-1]
+    dest_file = dest / filename
+
+    if dest_file.exists():
+        print(f"  Ofsted: {filename} already exists, skipping download.")
+        return dest_file
+
+    print(f"  Ofsted: downloading {url} ...")
+    resp = requests.get(url, timeout=120, stream=True)
+    resp.raise_for_status()
+    with open(dest_file, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=65536):
+            f.write(chunk)
+
+    print(f"  Ofsted: saved {dest_file} ({dest_file.stat().st_size // 1024} KB)")
+    return dest_file
+
+
+def _parse_grade(val) -> int | None:
+    if pd.isna(val):
+        return None
+    key = str(val).strip()
+    return GRADE_MAP.get(key)
+
+
+def _parse_date(val) -> date | None:
+    if pd.isna(val):
+        return None
+    for fmt in ("%d/%m/%Y", "%Y-%m-%d", "%d-%m-%Y", "%d %B %Y"):
+        try:
+            return datetime.strptime(str(val).strip(), fmt).date()
+        except ValueError:
+            pass
+    return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "ofsted") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.csv")) + sorted(dest.glob("*.zip"))
+        if not files:
+            raise FileNotFoundError(f"No Ofsted MI file found in {dest}")
+        path = files[-1]
+
+    print(f"  Ofsted: loading {path} ...")
+
+    if str(path).endswith(".zip"):
+        import zipfile, io
+        with zipfile.ZipFile(path) as z:
+            csv_names = [n for n in z.namelist() if n.endswith(".csv")]
+            if not csv_names:
+                raise ValueError("No CSV found inside Ofsted ZIP")
+            with z.open(csv_names[0]) as f:
+                df = pd.read_csv(io.TextIOWrapper(f, encoding="latin-1"), low_memory=False)
+    else:
+        df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+
+    # Normalise column names
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    # Only keep rows with a valid URN
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    inserted = updated = skipped = 0
+
+    with get_session() as session:
+        # Keep only the most recent inspection per URN
+        if "inspection_date" in df.columns:
+            df["_date_parsed"] = df["inspection_date"].apply(_parse_date)
+            df = df.sort_values("_date_parsed", ascending=False).groupby("urn").first().reset_index()
+
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+
+            record = {
+                "urn": urn,
+                "inspection_date": _parse_date(row.get("inspection_date")),
+                "publication_date": _parse_date(row.get("publication_date")),
+                "inspection_type": str(row.get("inspection_type", "")).strip() or None,
+                "overall_effectiveness": _parse_grade(row.get("overall_effectiveness")),
+                "quality_of_education": _parse_grade(row.get("quality_of_education")),
+                "behaviour_attitudes": _parse_grade(row.get("behaviour_attitudes")),
+                "personal_development": _parse_grade(row.get("personal_development")),
+                "leadership_management": _parse_grade(row.get("leadership_management")),
+                "early_years_provision": _parse_grade(row.get("early_years_provision")),
+                "previous_overall": None,
+            }
+
+            from sqlalchemy import text
+            session.execute(
+                text("""
+                    INSERT INTO ofsted_inspections
+                        (urn, inspection_date, publication_date, inspection_type,
+                         overall_effectiveness, quality_of_education, behaviour_attitudes,
+                         personal_development, leadership_management, early_years_provision,
+                         previous_overall)
+                    VALUES
+                        (:urn, :inspection_date, :publication_date, :inspection_type,
+                         :overall_effectiveness, :quality_of_education, :behaviour_attitudes,
+                         :personal_development, :leadership_management, :early_years_provision,
+                         :previous_overall)
+                    ON CONFLICT (urn) DO UPDATE SET
+                        previous_overall = ofsted_inspections.overall_effectiveness,
+                        inspection_date = EXCLUDED.inspection_date,
+                        publication_date = EXCLUDED.publication_date,
+                        inspection_type = EXCLUDED.inspection_type,
+                        overall_effectiveness = EXCLUDED.overall_effectiveness,
+                        quality_of_education = EXCLUDED.quality_of_education,
+                        behaviour_attitudes = EXCLUDED.behaviour_attitudes,
+                        personal_development = EXCLUDED.personal_development,
+                        leadership_management = EXCLUDED.leadership_management,
+                        early_years_provision = EXCLUDED.early_years_provision
+                """),
+                record,
+            )
+            inserted += 1
+
+            if inserted % 5000 == 0:
+                session.flush()
+                print(f"    Processed {inserted} records...")
+
+    print(f"  Ofsted: upserted {inserted} records")
+    return {"inserted": inserted, "updated": updated, "skipped": skipped}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+
+    if args.action in ("download", "all"):
+        path = download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,229 @@
+"""
+Ofsted Parent View open data downloader and loader.
+
+Source: https://parentview.ofsted.gov.uk/open-data
+Update: ~3 times/year (Spring, Autumn, Summer)
+"""
+import argparse
+import re
+import sys
+from datetime import date, datetime
+from pathlib import Path
+
+import pandas as pd
+import requests
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+
+DEST_DIR = SUPPLEMENTARY_DIR / "parent_view"
+OPEN_DATA_PAGE = "https://parentview.ofsted.gov.uk/open-data"
+
+# Question column mapping — Parent View open data uses descriptive column headers
+# Map any variant to our internal field names
+QUESTION_MAP = {
+    # Q1 — happiness
+    "My child is happy at this school": "q_happy_pct",
+    "Happy": "q_happy_pct",
+    # Q2 — safety
+    "My child feels safe at this school": "q_safe_pct",
+    "Safe": "q_safe_pct",
+    # Q3 — bullying
+    "The school makes sure its pupils are well behaved": "q_behaviour_pct",
+    "Well Behaved": "q_behaviour_pct",
+    # Q4 — bullying dealt with (sometimes separate)
+    "My child has been bullied and the school dealt with the bullying quickly and effectively": "q_bullying_pct",
+    "Bullying": "q_bullying_pct",
+    # Q5 — curriculum info
+    "The school makes me aware of what my child will learn during the year": "q_communication_pct",
+    "Aware of learning": "q_communication_pct",
+    # Q6 — concerns dealt with
+    "When I have raised concerns with the school, they have been dealt with properly": "q_communication_pct",
+    # Q7 — child does well
+    "My child does well at this school": "q_progress_pct",
+    "Does well": "q_progress_pct",
+    # Q8 — teaching
+    "The teaching is good at this school": "q_teaching_pct",
+    "Good teaching": "q_teaching_pct",
+    # Q9 — progress info
+    "I receive valuable information from the school about my child's progress": "q_information_pct",
+    "Progress information": "q_information_pct",
+    # Q10 — curriculum breadth
+    "My child is taught a broad range of subjects": "q_curriculum_pct",
+    "Broad subjects": "q_curriculum_pct",
+    # Q11 — prepares for future
+    "The school prepares my child well for the future": "q_future_pct",
+    "Prepared for future": "q_future_pct",
+    # Q12 — leadership
+    "The school is led and managed effectively": "q_leadership_pct",
+    "Led well": "q_leadership_pct",
+    # Q13 — wellbeing
+    "The school supports my child's wider personal development": "q_wellbeing_pct",
+    "Personal development": "q_wellbeing_pct",
+    # Q14 — recommendation
+    "I would recommend this school to another parent": "q_recommend_pct",
+    "Recommend": "q_recommend_pct",
+}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "parent_view") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    # Scrape the open data page for the download link
+    try:
+        resp = requests.get(OPEN_DATA_PAGE, timeout=30)
+        resp.raise_for_status()
+        pattern = r'href="([^"]+\.(?:xlsx|csv|zip))"'
+        urls = re.findall(pattern, resp.text, re.IGNORECASE)
+        if not urls:
+            raise RuntimeError("No download link found on Parent View open data page")
+        url = urls[0] if urls[0].startswith("http") else "https://parentview.ofsted.gov.uk" + urls[0]
+    except Exception as e:
+        raise RuntimeError(f"Could not discover Parent View download URL: {e}")
+
+    filename = url.split("/")[-1].split("?")[0]
+    dest_file = dest / filename
+
+    if dest_file.exists():
+        print(f"  ParentView: {filename} already exists, skipping download.")
+        return dest_file
+
+    print(f"  ParentView: downloading {url} ...")
+    resp = requests.get(url, timeout=120, stream=True)
+    resp.raise_for_status()
+    with open(dest_file, "wb") as f:
+        for chunk in resp.iter_content(chunk_size=65536):
+            f.write(chunk)
+
+    print(f"  ParentView: saved {dest_file}")
+    return dest_file
+
+
+def _positive_pct(row: pd.Series, q_col_base: str) -> float | None:
+    """Sum 'Strongly agree' + 'Agree' percentages for a question."""
+    # Parent View open data has columns like "Q1 - Strongly agree %", "Q1 - Agree %"
+    strongly = row.get(f"{q_col_base} - Strongly agree %") or row.get(f"{q_col_base} - Strongly Agree %")
+    agree = row.get(f"{q_col_base} - Agree %")
+    try:
+        total = 0.0
+        if pd.notna(strongly):
+            total += float(strongly)
+        if pd.notna(agree):
+            total += float(agree)
+        return round(total, 1) if total > 0 else None
+    except (TypeError, ValueError):
+        return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "parent_view") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.xlsx")) + sorted(dest.glob("*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No Parent View file found in {dest}")
+        path = files[-1]
+
+    print(f"  ParentView: loading {path} ...")
+
+    if str(path).endswith(".xlsx"):
+        df = pd.read_excel(path)
+    else:
+        df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+
+    # Normalise URN column
+    urn_col = next((c for c in df.columns if c.strip().upper() == "URN"), None)
+    if not urn_col:
+        raise ValueError(f"URN column not found. Columns: {list(df.columns)[:20]}")
+    df.rename(columns={urn_col: "urn"}, inplace=True)
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    # Try to find total responses column
+    resp_col = next((c for c in df.columns if "total" in c.lower() and "respon" in c.lower()), None)
+
+    inserted = 0
+    today = date.today()
+
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            total = int(row[resp_col]) if resp_col and pd.notna(row.get(resp_col)) else None
+
+            # Try to extract % positive per question from wide-format columns
+            # Parent View has numbered questions Q1–Q12 (or Q1–Q14 depending on year)
+            record = {
+                "urn": urn,
+                "survey_date": today,
+                "total_responses": total,
+                "q_happy_pct": _positive_pct(row, "Q1"),
+                "q_safe_pct": _positive_pct(row, "Q2"),
+                "q_behaviour_pct": _positive_pct(row, "Q3"),
+                "q_bullying_pct": _positive_pct(row, "Q4"),
+                "q_communication_pct": _positive_pct(row, "Q5"),
+                "q_progress_pct": _positive_pct(row, "Q7"),
+                "q_teaching_pct": _positive_pct(row, "Q8"),
+                "q_information_pct": _positive_pct(row, "Q9"),
+                "q_curriculum_pct": _positive_pct(row, "Q10"),
+                "q_future_pct": _positive_pct(row, "Q11"),
+                "q_leadership_pct": _positive_pct(row, "Q12"),
+                "q_wellbeing_pct": _positive_pct(row, "Q13"),
+                "q_recommend_pct": _positive_pct(row, "Q14"),
+                "q_sen_pct": None,
+            }
+
+            session.execute(
+                text("""
+                    INSERT INTO ofsted_parent_view
+                        (urn, survey_date, total_responses,
+                         q_happy_pct, q_safe_pct, q_behaviour_pct, q_bullying_pct,
+                         q_communication_pct, q_progress_pct, q_teaching_pct,
+                         q_information_pct, q_curriculum_pct, q_future_pct,
+                         q_leadership_pct, q_wellbeing_pct, q_recommend_pct, q_sen_pct)
+                    VALUES
+                        (:urn, :survey_date, :total_responses,
+                         :q_happy_pct, :q_safe_pct, :q_behaviour_pct, :q_bullying_pct,
+                         :q_communication_pct, :q_progress_pct, :q_teaching_pct,
+                         :q_information_pct, :q_curriculum_pct, :q_future_pct,
+                         :q_leadership_pct, :q_wellbeing_pct, :q_recommend_pct, :q_sen_pct)
+                    ON CONFLICT (urn) DO UPDATE SET
+                        survey_date = EXCLUDED.survey_date,
+                        total_responses = EXCLUDED.total_responses,
+                        q_happy_pct = EXCLUDED.q_happy_pct,
+                        q_safe_pct = EXCLUDED.q_safe_pct,
+                        q_behaviour_pct = EXCLUDED.q_behaviour_pct,
+                        q_bullying_pct = EXCLUDED.q_bullying_pct,
+                        q_communication_pct = EXCLUDED.q_communication_pct,
+                        q_progress_pct = EXCLUDED.q_progress_pct,
+                        q_teaching_pct = EXCLUDED.q_teaching_pct,
+                        q_information_pct = EXCLUDED.q_information_pct,
+                        q_curriculum_pct = EXCLUDED.q_curriculum_pct,
+                        q_future_pct = EXCLUDED.q_future_pct,
+                        q_leadership_pct = EXCLUDED.q_leadership_pct,
+                        q_wellbeing_pct = EXCLUDED.q_wellbeing_pct,
+                        q_recommend_pct = EXCLUDED.q_recommend_pct,
+                        q_sen_pct = EXCLUDED.q_sen_pct
+                """),
+                record,
+            )
+            inserted += 1
+            if inserted % 2000 == 0:
+                session.flush()
+
+    print(f"  ParentView: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,132 @@
+"""
+Phonics Screening Check downloader and loader.
+
+Source: EES publication "phonics-screening-check-and-key-stage-1-assessments-england"
+Update: Annual (September/October)
+"""
+import argparse
+import sys
+from pathlib import Path
+
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+from sources.ees import get_latest_csv_url, download_csv
+
+DEST_DIR = SUPPLEMENTARY_DIR / "phonics"
+PUBLICATION_SLUG = "phonics-screening-check-and-key-stage-1-assessments-england"
+
+# Known column names in the phonics CSV (vary by year)
+COLUMN_MAP = {
+    "URN": "urn",
+    "urn": "urn",
+    # Year 1 pass rate
+    "PPTA1": "year1_phonics_pct",          # % meeting expected standard Y1
+    "PPTA1B": "year1_phonics_pct",
+    "PT_MET_PHON_Y1": "year1_phonics_pct",
+    "Y1_MET_EXPECTED_PCT": "year1_phonics_pct",
+    # Year 2 (re-takers)
+    "PPTA2": "year2_phonics_pct",
+    "PT_MET_PHON_Y2": "year2_phonics_pct",
+    "Y2_MET_EXPECTED_PCT": "year2_phonics_pct",
+    # Year label
+    "YEAR": "year",
+    "Year": "year",
+}
+
+NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", ""}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "phonics") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
+    if not url:
+        raise RuntimeError(f"Could not find CSV URL for phonics publication")
+
+    filename = url.split("/")[-1].split("?")[0] or "phonics_latest.csv"
+    return download_csv(url, dest / filename)
+
+
+def _parse_pct(val) -> float | None:
+    if pd.isna(val):
+        return None
+    s = str(val).strip().upper().replace("%", "")
+    if s in NULL_VALUES:
+        return None
+    try:
+        return float(s)
+    except ValueError:
+        return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "phonics") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No phonics CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  Phonics: loading {path} ...")
+    df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    # Infer year from filename if not in data
+    year = None
+    import re
+    m = re.search(r"20(\d{2})", path.stem)
+    if m:
+        year = int("20" + m.group(1))
+
+    inserted = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
+            if not row_year:
+                continue
+
+            session.execute(
+                text("""
+                    INSERT INTO phonics (urn, year, year1_phonics_pct, year2_phonics_pct)
+                    VALUES (:urn, :year, :y1, :y2)
+                    ON CONFLICT (urn, year) DO UPDATE SET
+                        year1_phonics_pct = EXCLUDED.year1_phonics_pct,
+                        year2_phonics_pct = EXCLUDED.year2_phonics_pct
+                """),
+                {
+                    "urn": urn,
+                    "year": row_year,
+                    "y1": _parse_pct(row.get("year1_phonics_pct")),
+                    "y2": _parse_pct(row.get("year2_phonics_pct")),
+                },
+            )
+            inserted += 1
+            if inserted % 5000 == 0:
+                session.flush()
+
+    print(f"  Phonics: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,150 @@
+"""
+SEN (Special Educational Needs) primary need type breakdown.
+
+Source: EES publication "special-educational-needs-in-england"
+Update: Annual (September)
+"""
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from config import SUPPLEMENTARY_DIR
+from db import get_session
+from sources.ees import get_latest_csv_url, download_csv
+
+DEST_DIR = SUPPLEMENTARY_DIR / "sen_detail"
+PUBLICATION_SLUG = "special-educational-needs-in-england"
+
+NULL_VALUES = {"SUPP", "NE", "NA", "NP", "NEW", "LOW", "X", ""}
+
+COLUMN_MAP = {
+    "URN": "urn",
+    "urn": "urn",
+    "YEAR": "year",
+    "Year": "year",
+    # Primary need types — DfE abbreviated codes
+    "PT_SPEECH": "primary_need_speech_pct",       # SLCN
+    "PT_ASD": "primary_need_autism_pct",           # ASD
+    "PT_MLD": "primary_need_mld_pct",             # Moderate learning difficulty
+    "PT_SPLD": "primary_need_spld_pct",           # Specific learning difficulty
+    "PT_SEMH": "primary_need_semh_pct",           # Social, emotional, mental health
+    "PT_PHYSICAL": "primary_need_physical_pct",   # Physical/sensory
+    "PT_OTHER": "primary_need_other_pct",
+    # Alternative naming
+    "SLCN_PCT": "primary_need_speech_pct",
+    "ASD_PCT": "primary_need_autism_pct",
+    "MLD_PCT": "primary_need_mld_pct",
+    "SPLD_PCT": "primary_need_spld_pct",
+    "SEMH_PCT": "primary_need_semh_pct",
+    "PHYSICAL_PCT": "primary_need_physical_pct",
+    "OTHER_PCT": "primary_need_other_pct",
+}
+
+
+def download(data_dir: Path | None = None) -> Path:
+    dest = (data_dir / "supplementary" / "sen_detail") if data_dir else DEST_DIR
+    dest.mkdir(parents=True, exist_ok=True)
+
+    url = get_latest_csv_url(PUBLICATION_SLUG, keyword="school")
+    if not url:
+        url = get_latest_csv_url(PUBLICATION_SLUG)
+    if not url:
+        raise RuntimeError("Could not find CSV URL for SEN publication")
+
+    filename = url.split("/")[-1].split("?")[0] or "sen_latest.csv"
+    return download_csv(url, dest / filename)
+
+
+def _parse_pct(val) -> float | None:
+    if pd.isna(val):
+        return None
+    s = str(val).strip().upper().replace("%", "")
+    if s in NULL_VALUES:
+        return None
+    try:
+        return float(s)
+    except ValueError:
+        return None
+
+
+def load(path: Path | None = None, data_dir: Path | None = None) -> dict:
+    if path is None:
+        dest = (data_dir / "supplementary" / "sen_detail") if data_dir else DEST_DIR
+        files = sorted(dest.glob("*.csv"))
+        if not files:
+            raise FileNotFoundError(f"No SEN CSV found in {dest}")
+        path = files[-1]
+
+    print(f"  SEN Detail: loading {path} ...")
+    df = pd.read_csv(path, encoding="latin-1", low_memory=False)
+    df.rename(columns=COLUMN_MAP, inplace=True)
+
+    if "urn" not in df.columns:
+        raise ValueError(f"URN column not found. Available: {list(df.columns)[:20]}")
+
+    df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
+    df = df.dropna(subset=["urn"])
+    df["urn"] = df["urn"].astype(int)
+
+    year = None
+    m = re.search(r"20(\d{2})", path.stem)
+    if m:
+        year = int("20" + m.group(1))
+
+    inserted = 0
+    with get_session() as session:
+        from sqlalchemy import text
+        for _, row in df.iterrows():
+            urn = int(row["urn"])
+            row_year = int(row["year"]) if "year" in df.columns and pd.notna(row.get("year")) else year
+            if not row_year:
+                continue
+
+            session.execute(
+                text("""
+                    INSERT INTO sen_detail
+                        (urn, year, primary_need_speech_pct, primary_need_autism_pct,
+                         primary_need_mld_pct, primary_need_spld_pct, primary_need_semh_pct,
+                         primary_need_physical_pct, primary_need_other_pct)
+                    VALUES (:urn, :year, :speech, :autism, :mld, :spld, :semh, :physical, :other)
+                    ON CONFLICT (urn, year) DO UPDATE SET
+                        primary_need_speech_pct   = EXCLUDED.primary_need_speech_pct,
+                        primary_need_autism_pct   = EXCLUDED.primary_need_autism_pct,
+                        primary_need_mld_pct      = EXCLUDED.primary_need_mld_pct,
+                        primary_need_spld_pct     = EXCLUDED.primary_need_spld_pct,
+                        primary_need_semh_pct     = EXCLUDED.primary_need_semh_pct,
+                        primary_need_physical_pct = EXCLUDED.primary_need_physical_pct,
+                        primary_need_other_pct    = EXCLUDED.primary_need_other_pct
+                """),
+                {
+                    "urn": urn, "year": row_year,
+                    "speech": _parse_pct(row.get("primary_need_speech_pct")),
+                    "autism": _parse_pct(row.get("primary_need_autism_pct")),
+                    "mld": _parse_pct(row.get("primary_need_mld_pct")),
+                    "spld": _parse_pct(row.get("primary_need_spld_pct")),
+                    "semh": _parse_pct(row.get("primary_need_semh_pct")),
+                    "physical": _parse_pct(row.get("primary_need_physical_pct")),
+                    "other": _parse_pct(row.get("primary_need_other_pct")),
+                },
+            )
+            inserted += 1
+            if inserted % 5000 == 0:
+                session.flush()
+
+    print(f"  SEN Detail: upserted {inserted} records")
+    return {"inserted": inserted, "updated": 0, "skipped": 0}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action", choices=["download", "load", "all"], default="all")
+    parser.add_argument("--data-dir", type=Path, default=None)
+    args = parser.parse_args()
+    if args.action in ("download", "all"):
+        download(args.data_dir)
+    if args.action in ("load", "all"):
+        load(data_dir=args.data_dir)
@@ -0,0 +1,70 @@
+"""
+Data integrator HTTP server.
+Kestra calls this server via HTTP tasks to trigger download/load operations.
+"""
+import importlib
+import sys
+import traceback
+from pathlib import Path
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+
+sys.path.insert(0, "/app/scripts")
+
+app = FastAPI(title="SchoolCompare Data Integrator", version="1.0.0")
+
+SOURCES = {
+    "ofsted", "gias", "parent_view",
+    "census", "admissions", "sen_detail",
+    "phonics", "idaci", "finance",
+}
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+@app.post("/run/{source}")
+def run_source(source: str, action: str = "all"):
+    """
+    Trigger a data source download and/or load.
+    action: "download" | "load" | "all"
+    """
+    if source not in SOURCES:
+        raise HTTPException(status_code=404, detail=f"Unknown source '{source}'. Available: {sorted(SOURCES)}")
+    if action not in ("download", "load", "all"):
+        raise HTTPException(status_code=400, detail="action must be 'download', 'load', or 'all'")
+
+    try:
+        mod = importlib.import_module(f"sources.{source}")
+        result = {}
+
+        if action in ("download", "all"):
+            mod.download()
+
+        if action in ("load", "all"):
+            result = mod.load()
+
+        return {"source": source, "action": action, "result": result}
+
+    except Exception as e:
+        tb = traceback.format_exc()
+        raise HTTPException(status_code=500, detail={"error": str(e), "traceback": tb})
+
+
+@app.post("/run-all")
+def run_all(action: str = "all"):
+    """Trigger all sources in sequence."""
+    results = {}
+    for source in sorted(SOURCES):
+        try:
+            mod = importlib.import_module(f"sources.{source}")
+            if action in ("download", "all"):
+                mod.download()
+            if action in ("load", "all"):
+                results[source] = mod.load()
+        except Exception as e:
+            results[source] = {"error": str(e)}
+    return results
@@ -77,7 +77,7 @@ export default async function SchoolPage({ params }: SchoolPageProps) {
    notFound();
  }

-  const { school_info, yearly_data, absence_data } = data;
+  const { school_info, yearly_data, absence_data, ofsted, parent_view, census, admissions, sen_detail, phonics, deprivation, finance } = data;

  // Generate JSON-LD structured data for SEO
  const structuredData = {
@@ -116,6 +116,14 @@ export default async function SchoolPage({ params }: SchoolPageProps) {
        schoolInfo={school_info}
        yearlyData={yearly_data}
        absenceData={absence_data}
+        ofsted={ofsted ?? null}
+        parentView={parent_view ?? null}
+        census={census ?? null}
+        admissions={admissions ?? null}
+        senDetail={sen_detail ?? null}
+        phonics={phonics ?? null}
+        deprivation={deprivation ?? null}
+        finance={finance ?? null}
      />
    </>
  );
@@ -424,3 +424,120 @@
  color: var(--text-muted);
  font-style: italic;
 }
+
+/* ── Supplementary Data Sections ──────────────────────── */
+.supplementarySection {
+  background: var(--bg-card, white);
+  border: 1px solid var(--border-color, #e5dfd5);
+  border-radius: 10px;
+  padding: 1.25rem 1.5rem;
+}
+
+.supplementarySubtitle {
+  font-size: 0.85rem;
+  color: var(--text-muted, #8a847a);
+  margin-bottom: 1rem;
+}
+
+.subSectionTitle {
+  font-size: 0.875rem;
+  font-weight: 600;
+  color: var(--text-secondary, #5c564d);
+  margin: 1.25rem 0 0.75rem;
+}
+
+/* Ofsted */
+.ofstedHeader {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  margin-bottom: 1rem;
+}
+
+.ofstedGrade {
+  display: inline-block;
+  padding: 0.3rem 0.75rem;
+  font-size: 1rem;
+  font-weight: 700;
+  border-radius: 6px;
+  white-space: nowrap;
+}
+
+.ofstedGrade1 { background: rgba(45, 125, 125, 0.12); color: var(--accent-teal, #2d7d7d); }
+.ofstedGrade2 { background: rgba(60, 140, 60, 0.12);  color: #3c8c3c; }
+.ofstedGrade3 { background: rgba(201, 162, 39, 0.15); color: #b8920e; }
+.ofstedGrade4 { background: rgba(224, 114, 86, 0.15); color: var(--accent-coral, #e07256); }
+
+.ofstedDate {
+  font-size: 0.85rem;
+  color: var(--text-muted, #8a847a);
+}
+
+.ofstedType {
+  font-size: 0.8rem;
+  color: var(--text-muted, #8a847a);
+  margin-top: 0.5rem;
+  font-style: italic;
+}
+
+/* Parent View */
+.parentViewGrid {
+  display: flex;
+  flex-direction: column;
+  gap: 0.5rem;
+}
+
+.parentViewRow {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  font-size: 0.875rem;
+}
+
+.parentViewLabel {
+  flex: 0 0 18rem;
+  color: var(--text-secondary, #5c564d);
+  font-size: 0.8125rem;
+}
+
+.parentViewBar {
+  flex: 1;
+  height: 0.5rem;
+  background: var(--bg-secondary, #f3ede4);
+  border-radius: 4px;
+  overflow: hidden;
+}
+
+.parentViewFill {
+  height: 100%;
+  background: var(--accent-teal, #2d7d7d);
+  border-radius: 4px;
+  transition: width 0.4s ease;
+}
+
+.parentViewPct {
+  flex: 0 0 2.75rem;
+  text-align: right;
+  font-size: 0.8125rem;
+  font-weight: 600;
+  color: var(--text-primary, #1a1612);
+}
+
+/* Metric hint (small label below metricValue) */
+.metricHint {
+  font-size: 0.75rem;
+  color: var(--text-muted, #8a847a);
+  margin-top: 0.25rem;
+  font-style: italic;
+}
+
+/* ── Mobile ──────────────────────────────────────────── */
+@media (max-width: 640px) {
+  .supplementarySection {
+    padding: 1rem;
+  }
+
+  .parentViewLabel {
+    flex: 0 0 10rem;
+  }
+}
@@ -9,17 +9,37 @@ import { useRouter } from 'next/navigation';
 import { useComparison } from '@/hooks/useComparison';
 import { PerformanceChart } from './PerformanceChart';
 import { SchoolMap } from './SchoolMap';
-import type { School, SchoolResult, AbsenceData } from '@/lib/types';
+import type {
+  School, SchoolResult, AbsenceData,
+  OfstedInspection, OfstedParentView, SchoolCensus,
+  SchoolAdmissions, SenDetail, Phonics,
+  SchoolDeprivation, SchoolFinance,
+} from '@/lib/types';
 import { formatPercentage, formatProgress, calculateTrend } from '@/lib/utils';
 import styles from './SchoolDetailView.module.css';

+const OFSTED_LABELS: Record<number, string> = {
+  1: 'Outstanding', 2: 'Good', 3: 'Requires Improvement', 4: 'Inadequate',
+};
+
 interface SchoolDetailViewProps {
  schoolInfo: School;
  yearlyData: SchoolResult[];
  absenceData: AbsenceData | null;
+  ofsted: OfstedInspection | null;
+  parentView: OfstedParentView | null;
+  census: SchoolCensus | null;
+  admissions: SchoolAdmissions | null;
+  senDetail: SenDetail | null;
+  phonics: Phonics | null;
+  deprivation: SchoolDeprivation | null;
+  finance: SchoolFinance | null;
 }

-export function SchoolDetailView({ schoolInfo, yearlyData, absenceData }: SchoolDetailViewProps) {
+export function SchoolDetailView({
+  schoolInfo, yearlyData, absenceData,
+  ofsted, parentView, census, admissions, senDetail, phonics, deprivation, finance,
+}: SchoolDetailViewProps) {
  const router = useRouter();
  const { addSchool, removeSchool, isSelected } = useComparison();
  const isInComparison = isSelected(schoolInfo.urn);
@@ -322,6 +342,209 @@ export function SchoolDetailView({ schoolInfo, yearlyData, absenceData }: School
          </div>
        </section>
      )}
+
+      {/* Ofsted Section */}
+      {ofsted && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Ofsted Inspection</h2>
+          <div className={styles.ofstedHeader}>
+            <span className={`${styles.ofstedGrade} ${styles[`ofstedGrade${ofsted.overall_effectiveness}`]}`}>
+              {ofsted.overall_effectiveness ? OFSTED_LABELS[ofsted.overall_effectiveness] : 'Not rated'}
+            </span>
+            {ofsted.inspection_date && (
+              <span className={styles.ofstedDate}>
+                Inspected: {new Date(ofsted.inspection_date).toLocaleDateString('en-GB', { day: 'numeric', month: 'long', year: 'numeric' })}
+              </span>
+            )}
+          </div>
+          <div className={styles.metricsGrid}>
+            {[
+              { label: 'Quality of Education', value: ofsted.quality_of_education },
+              { label: 'Behaviour & Attitudes', value: ofsted.behaviour_attitudes },
+              { label: 'Personal Development', value: ofsted.personal_development },
+              { label: 'Leadership & Management', value: ofsted.leadership_management },
+              ...(ofsted.early_years_provision != null ? [{ label: 'Early Years', value: ofsted.early_years_provision }] : []),
+            ].map(({ label, value }) => value != null && (
+              <div key={label} className={styles.metricCard}>
+                <div className={styles.metricLabel}>{label}</div>
+                <div className={`${styles.metricValue} ${styles[`ofstedGrade${value}`]}`}>
+                  {OFSTED_LABELS[value]}
+                </div>
+              </div>
+            ))}
+          </div>
+          {ofsted.inspection_type && (
+            <p className={styles.ofstedType}>{ofsted.inspection_type}</p>
+          )}
+        </section>
+      )}
+
+      {/* What Parents Think */}
+      {parentView && parentView.total_responses != null && parentView.total_responses > 0 && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>What Parents Think</h2>
+          <p className={styles.supplementarySubtitle}>
+            Based on {parentView.total_responses.toLocaleString()} parent responses to the Ofsted Parent View survey.
+          </p>
+          <div className={styles.parentViewGrid}>
+            {[
+              { label: 'My child is happy here', pct: parentView.q_happy_pct },
+              { label: 'My child feels safe here', pct: parentView.q_safe_pct },
+              { label: 'Would recommend this school', pct: parentView.q_recommend_pct },
+              { label: 'Teaching is good', pct: parentView.q_teaching_pct },
+              { label: 'My child makes good progress', pct: parentView.q_progress_pct },
+              { label: 'School looks after wellbeing', pct: parentView.q_wellbeing_pct },
+              { label: 'Led and managed effectively', pct: parentView.q_leadership_pct },
+              { label: 'Behaviour is well managed', pct: parentView.q_behaviour_pct },
+              { label: 'Communicates well with parents', pct: parentView.q_communication_pct },
+            ].filter(q => q.pct != null).map(({ label, pct }) => (
+              <div key={label} className={styles.parentViewRow}>
+                <span className={styles.parentViewLabel}>{label}</span>
+                <div className={styles.parentViewBar}>
+                  <div className={styles.parentViewFill} style={{ width: `${pct}%` }} />
+                </div>
+                <span className={styles.parentViewPct}>{pct}%</span>
+              </div>
+            ))}
+          </div>
+        </section>
+      )}
+
+      {/* Admissions */}
+      {admissions && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Admissions ({admissions.year})</h2>
+          <div className={styles.metricsGrid}>
+            {admissions.published_admission_number != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Places available</div>
+                <div className={styles.metricValue}>{admissions.published_admission_number}</div>
+              </div>
+            )}
+            {admissions.total_applications != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Applications received</div>
+                <div className={styles.metricValue}>{admissions.total_applications.toLocaleString()}</div>
+              </div>
+            )}
+            {admissions.first_preference_offers_pct != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Got first choice</div>
+                <div className={styles.metricValue}>{admissions.first_preference_offers_pct}%</div>
+              </div>
+            )}
+            {admissions.oversubscribed != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Oversubscribed</div>
+                <div className={styles.metricValue}>{admissions.oversubscribed ? 'Yes' : 'No'}</div>
+              </div>
+            )}
+          </div>
+        </section>
+      )}
+
+      {/* Pupils & Inclusion (Census + SEN) */}
+      {(census || senDetail) && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Pupils &amp; Inclusion</h2>
+          <div className={styles.metricsGrid}>
+            {census?.class_size_avg != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Average class size</div>
+                <div className={styles.metricValue}>{census.class_size_avg.toFixed(1)}</div>
+              </div>
+            )}
+          </div>
+          {senDetail && (
+            <>
+              <h3 className={styles.subSectionTitle}>Primary SEN Needs (latest year)</h3>
+              <div className={styles.metricsGrid}>
+                {[
+                  { label: 'Speech & Language', pct: senDetail.primary_need_speech_pct },
+                  { label: 'Autism (ASD)', pct: senDetail.primary_need_autism_pct },
+                  { label: 'Learning Difficulties', pct: senDetail.primary_need_mld_pct },
+                  { label: 'Specific Learning (Dyslexia etc.)', pct: senDetail.primary_need_spld_pct },
+                  { label: 'Social, Emotional & Mental Health', pct: senDetail.primary_need_semh_pct },
+                  { label: 'Physical / Sensory', pct: senDetail.primary_need_physical_pct },
+                ].filter(n => n.pct != null).map(({ label, pct }) => (
+                  <div key={label} className={styles.metricCard}>
+                    <div className={styles.metricLabel}>{label}</div>
+                    <div className={styles.metricValue}>{pct}%</div>
+                  </div>
+                ))}
+              </div>
+            </>
+          )}
+        </section>
+      )}
+
+      {/* Year 1 Phonics */}
+      {phonics && phonics.year1_phonics_pct != null && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Year 1 Phonics ({phonics.year})</h2>
+          <div className={styles.metricsGrid}>
+            <div className={styles.metricCard}>
+              <div className={styles.metricLabel}>Reached expected standard</div>
+              <div className={styles.metricValue}>{formatPercentage(phonics.year1_phonics_pct)}</div>
+            </div>
+            {phonics.year2_phonics_pct != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Year 2 (re-takers) standard</div>
+                <div className={styles.metricValue}>{formatPercentage(phonics.year2_phonics_pct)}</div>
+              </div>
+            )}
+          </div>
+        </section>
+      )}
+
+      {/* Deprivation Context */}
+      {deprivation && deprivation.idaci_decile != null && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Deprivation Context</h2>
+          <div className={styles.metricsGrid}>
+            <div className={styles.metricCard}>
+              <div className={styles.metricLabel}>Area deprivation decile</div>
+              <div className={styles.metricValue}>{deprivation.idaci_decile} / 10</div>
+              <div className={styles.metricHint}>
+                1 = most deprived, 10 = least deprived
+              </div>
+            </div>
+            {deprivation.idaci_score != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>IDACI score</div>
+                <div className={styles.metricValue}>{deprivation.idaci_score.toFixed(3)}</div>
+              </div>
+            )}
+          </div>
+        </section>
+      )}
+
+      {/* Finances */}
+      {finance && finance.per_pupil_spend != null && (
+        <section className={styles.supplementarySection}>
+          <h2 className={styles.sectionTitle}>Finances ({finance.year})</h2>
+          <div className={styles.metricsGrid}>
+            {finance.per_pupil_spend != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Spend per pupil</div>
+                <div className={styles.metricValue}>£{Math.round(finance.per_pupil_spend).toLocaleString()}</div>
+              </div>
+            )}
+            {finance.teacher_cost_pct != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>Teacher costs</div>
+                <div className={styles.metricValue}>{finance.teacher_cost_pct.toFixed(1)}% of budget</div>
+              </div>
+            )}
+            {finance.staff_cost_pct != null && (
+              <div className={styles.metricCard}>
+                <div className={styles.metricLabel}>All staff costs</div>
+                <div className={styles.metricValue}>{finance.staff_cost_pct.toFixed(1)}% of budget</div>
+              </div>
+            )}
+          </div>
+        </section>
+      )}
    </div>
  );
 }
@@ -211,6 +211,23 @@
  color: var(--text-primary, #1a1612);
 }

+/* ── Ofsted badge ────────────────────────────────────── */
+.ofstedBadge {
+  display: inline-block;
+  padding: 0.0625rem 0.375rem;
+  font-size: 0.6875rem;
+  font-weight: 600;
+  border-radius: 3px;
+  white-space: nowrap;
+  flex-shrink: 0;
+  line-height: 1.4;
+}
+
+.ofsted1 { background: rgba(45, 125, 125, 0.12); color: var(--accent-teal, #2d7d7d); }
+.ofsted2 { background: rgba(60, 140, 60, 0.12); color: #3c8c3c; }
+.ofsted3 { background: rgba(201, 162, 39, 0.15); color: #b8920e; }
+.ofsted4 { background: rgba(224, 114, 86, 0.15); color: var(--accent-coral, #e07256); }
+
 /* ── Mobile ──────────────────────────────────────────── */
@media (max-width: 640px) {
  .row {
@@ -12,6 +12,13 @@ import { formatPercentage, formatProgress, calculateTrend } from '@/lib/utils';
 import { progressBand } from '@/lib/metrics';
 import styles from './SchoolRow.module.css';

+const OFSTED_LABELS: Record<number, string> = {
+  1: 'Outstanding',
+  2: 'Good',
+  3: 'Req. Improvement',
+  4: 'Inadequate',
+};
+
 interface SchoolRowProps {
  school: School;
  isLocationSearch?: boolean;
@@ -46,7 +53,7 @@ export function SchoolRow({
      {/* Left: three content lines */}
      <div className={styles.rowContent}>

-        {/* Line 1: School name + type */}
+        {/* Line 1: School name + type + Ofsted badge */}
        <div className={styles.line1}>
          <a href={`/school/${school.urn}`} className={styles.schoolName}>
            {school.school_name}
@@ -54,6 +61,11 @@ export function SchoolRow({
          {school.school_type && (
            <span className={styles.schoolType}>{school.school_type}</span>
          )}
+          {school.ofsted_grade && (
+            <span className={`${styles.ofstedBadge} ${styles[`ofsted${school.ofsted_grade}`]}`}>
+              {OFSTED_LABELS[school.ofsted_grade]}
+            </span>
+          )}
        </div>

        {/* Line 2: Key stats */}
@@ -47,6 +47,102 @@ export interface School {

  // Location search fields
  distance?: number | null;
+
+  // GIAS enrichment fields
+  website?: string | null;
+  headteacher_name?: string | null;
+  capacity?: number | null;
+  trust_name?: string | null;
+  gender?: string | null;
+
+  // Ofsted (for list view — summary only)
+  ofsted_grade?: 1 | 2 | 3 | 4 | null;
+  ofsted_date?: string | null;
+}
+
+// ============================================================================
+// Supplementary Data Types (populated by Kestra data integrator)
+// ============================================================================
+
+export interface OfstedInspection {
+  overall_effectiveness: 1 | 2 | 3 | 4 | null;
+  quality_of_education: number | null;
+  behaviour_attitudes: number | null;
+  personal_development: number | null;
+  leadership_management: number | null;
+  early_years_provision: number | null;
+  previous_overall: number | null;
+  inspection_date: string | null;
+  inspection_type: string | null;
+}
+
+export interface OfstedParentView {
+  survey_date: string | null;
+  total_responses: number | null;
+  q_happy_pct: number | null;
+  q_safe_pct: number | null;
+  q_behaviour_pct: number | null;
+  q_bullying_pct: number | null;
+  q_communication_pct: number | null;
+  q_progress_pct: number | null;
+  q_teaching_pct: number | null;
+  q_information_pct: number | null;
+  q_curriculum_pct: number | null;
+  q_future_pct: number | null;
+  q_leadership_pct: number | null;
+  q_wellbeing_pct: number | null;
+  q_recommend_pct: number | null;
+  q_sen_pct: number | null;
+}
+
+export interface SchoolCensus {
+  year: number;
+  class_size_avg: number | null;
+  ethnicity_white_pct: number | null;
+  ethnicity_asian_pct: number | null;
+  ethnicity_black_pct: number | null;
+  ethnicity_mixed_pct: number | null;
+  ethnicity_other_pct: number | null;
+}
+
+export interface SchoolAdmissions {
+  year: number;
+  published_admission_number: number | null;
+  total_applications: number | null;
+  first_preference_offers_pct: number | null;
+  oversubscribed: boolean | null;
+}
+
+export interface SenDetail {
+  year: number;
+  primary_need_speech_pct: number | null;
+  primary_need_autism_pct: number | null;
+  primary_need_mld_pct: number | null;
+  primary_need_spld_pct: number | null;
+  primary_need_semh_pct: number | null;
+  primary_need_physical_pct: number | null;
+  primary_need_other_pct: number | null;
+}
+
+export interface Phonics {
+  year: number;
+  year1_phonics_pct: number | null;
+  year2_phonics_pct: number | null;
+}
+
+export interface SchoolDeprivation {
+  lsoa_code: string | null;
+  idaci_score: number | null;
+  idaci_decile: number | null;
+}
+
+export interface SchoolFinance {
+  year: number;
+  per_pupil_spend: number | null;
+  staff_cost_pct: number | null;
+  teacher_cost_pct: number | null;
+  support_staff_cost_pct: number | null;
+  premises_cost_pct: number | null;
 }

 // ============================================================================
@@ -152,6 +248,15 @@ export interface SchoolDetailsResponse {
  school_info: School;
  yearly_data: SchoolResult[];
  absence_data: AbsenceData | null;
+  // Supplementary data (null until Kestra populates)
+  ofsted: OfstedInspection | null;
+  parent_view: OfstedParentView | null;
+  census: SchoolCensus | null;
+  admissions: SchoolAdmissions | null;
+  sen_detail: SenDetail | null;
+  phonics: Phonics | null;
+  deprivation: SchoolDeprivation | null;
+  finance: SchoolFinance | null;
 }

 export interface ComparisonData {