feat(pipeline): add Meltano + dbt + Airflow ELT pipeline scaffold

Replaces the hand-rolled integrator with a production-grade ELT pipeline using Meltano (Singer taps), dbt Core (medallion architecture), and Apache Airflow (orchestration). Adds Typesense for search and PostGIS for geospatial queries. - 6 custom Singer taps (GIAS, EES, Ofsted, Parent View, FBIT, IDACI) - dbt project: 12 staging, 5 intermediate, 12 mart models - 3 Airflow DAGs (daily/monthly/annual schedules) - Typesense sync + batch geocoding scripts - docker-compose: add Airflow, Typesense; upgrade to PostGIS - Portainer stack definition matching live deployment topology Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 08:37:53 +00:00
parent 8aca0a7a53
commit 8f02b5125e
65 changed files with 2822 additions and 72 deletions
@@ -0,0 +1,37 @@
+FROM python:3.12-slim
+
+WORKDIR /opt/pipeline
+
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    libpq-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install custom Singer taps
+COPY plugins/ plugins/
+RUN pip install --no-cache-dir \
+    ./plugins/extractors/tap-uk-gias \
+    ./plugins/extractors/tap-uk-ees \
+    ./plugins/extractors/tap-uk-ofsted \
+    ./plugins/extractors/tap-uk-parent-view \
+    ./plugins/extractors/tap-uk-fbit \
+    ./plugins/extractors/tap-uk-idaci
+
+# Copy pipeline code
+COPY meltano.yml .
+COPY transform/ transform/
+COPY scripts/ scripts/
+COPY dags/ dags/
+
+# dbt deps
+RUN cd transform && dbt deps --profiles-dir . 2>/dev/null || true
+
+ENV AIRFLOW_HOME=/opt/airflow
+ENV PYTHONPATH=/opt/pipeline
+
+CMD ["airflow", "webserver"]
@@ -0,0 +1,163 @@
+"""
+School Data Pipeline — Airflow DAG
+
+Orchestrates the full ELT pipeline:
+  Extract (Meltano) → Validate → Transform (dbt) → Geocode → Sync Typesense → Invalidate Cache
+
+Schedule:
+  - GIAS: Daily at 03:00
+  - Ofsted: 1st of month at 02:00
+  - EES datasets: Annual (triggered manually or on detected release)
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+from airflow.operators.python import PythonOperator
+from airflow.utils.task_group import TaskGroup
+
+PIPELINE_DIR = "/opt/pipeline"
+MELTANO_BIN = f"{PIPELINE_DIR}/.venv/bin/meltano"
+DBT_BIN = f"{PIPELINE_DIR}/.venv/bin/dbt"
+
+default_args = {
+    "owner": "school-compare",
+    "depends_on_past": False,
+    "email_on_failure": False,
+    "retries": 1,
+    "retry_delay": timedelta(minutes=5),
+}
+
+
+# ── Daily DAG (GIAS + downstream) ──────────────────────────────────────
+
+with DAG(
+    dag_id="school_data_daily",
+    default_args=default_args,
+    description="Daily school data pipeline (GIAS extract → full transform)",
+    schedule="0 3 * * *",
+    start_date=datetime(2025, 1, 1),
+    catchup=False,
+    tags=["school-compare", "daily"],
+) as daily_dag:
+
+    with TaskGroup("extract") as extract_group:
+        extract_gias = BashOperator(
+            task_id="extract_gias",
+            bash_command=f"cd {PIPELINE_DIR} && {MELTANO_BIN} elt tap-uk-gias target-postgres",
+        )
+
+    validate_raw = BashOperator(
+        task_id="validate_raw",
+        bash_command=f"""
+            cd {PIPELINE_DIR} && python -c "
+import psycopg2, os, sys
+conn = psycopg2.connect(
+    host=os.environ.get('PG_HOST', 'localhost'),
+    port=os.environ.get('PG_PORT', '5432'),
+    user=os.environ.get('PG_USER', 'postgres'),
+    password=os.environ.get('PG_PASSWORD', 'postgres'),
+    dbname=os.environ.get('PG_DATABASE', 'school_compare'),
+)
+cur = conn.cursor()
+cur.execute('SELECT count(*) FROM raw.gias_establishments')
+count = cur.fetchone()[0]
+conn.close()
+if count < 20000:
+    print(f'WARN: GIAS only has {{count}} rows, expected 60k+', file=sys.stderr)
+    sys.exit(1)
+print(f'Validation passed: {{count}} GIAS rows')
+"
+        """,
+    )
+
+    dbt_build = BashOperator(
+        task_id="dbt_build",
+        bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production",
+    )
+
+    dbt_test = BashOperator(
+        task_id="dbt_test",
+        bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} test --profiles-dir . --target production",
+    )
+
+    geocode_new = BashOperator(
+        task_id="geocode_new",
+        bash_command=f"cd {PIPELINE_DIR} && python scripts/geocode_postcodes.py",
+    )
+
+    sync_typesense = BashOperator(
+        task_id="sync_typesense",
+        bash_command=f"cd {PIPELINE_DIR} && python scripts/sync_typesense.py",
+    )
+
+    extract_group >> validate_raw >> dbt_build >> dbt_test >> geocode_new >> sync_typesense
+
+
+# ── Monthly DAG (Ofsted) ───────────────────────────────────────────────
+
+with DAG(
+    dag_id="school_data_monthly_ofsted",
+    default_args=default_args,
+    description="Monthly Ofsted MI extraction and transform",
+    schedule="0 2 1 * *",
+    start_date=datetime(2025, 1, 1),
+    catchup=False,
+    tags=["school-compare", "monthly"],
+) as monthly_ofsted_dag:
+
+    extract_ofsted = BashOperator(
+        task_id="extract_ofsted",
+        bash_command=f"cd {PIPELINE_DIR} && {MELTANO_BIN} elt tap-uk-ofsted target-postgres",
+    )
+
+    dbt_build_ofsted = BashOperator(
+        task_id="dbt_build",
+        bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production --select stg_ofsted_inspections+ int_ofsted_latest+ fact_ofsted_inspection+ dim_school+",
+    )
+
+    sync_typesense_ofsted = BashOperator(
+        task_id="sync_typesense",
+        bash_command=f"cd {PIPELINE_DIR} && python scripts/sync_typesense.py",
+    )
+
+    extract_ofsted >> dbt_build_ofsted >> sync_typesense_ofsted
+
+
+# ── Annual DAG (EES: KS2, KS4, Census, Admissions, Phonics) ───────────
+
+with DAG(
+    dag_id="school_data_annual_ees",
+    default_args=default_args,
+    description="Annual EES data extraction (KS2, KS4, Census, Admissions, Phonics)",
+    schedule=None,  # Triggered manually when new releases are published
+    start_date=datetime(2025, 1, 1),
+    catchup=False,
+    tags=["school-compare", "annual"],
+) as annual_ees_dag:
+
+    with TaskGroup("extract_ees") as extract_ees_group:
+        extract_ees = BashOperator(
+            task_id="extract_ees",
+            bash_command=f"cd {PIPELINE_DIR} && {MELTANO_BIN} elt tap-uk-ees target-postgres",
+        )
+
+    dbt_build_ees = BashOperator(
+        task_id="dbt_build",
+        bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} build --profiles-dir . --target production",
+    )
+
+    dbt_test_ees = BashOperator(
+        task_id="dbt_test",
+        bash_command=f"cd {PIPELINE_DIR}/transform && {DBT_BIN} test --profiles-dir . --target production",
+    )
+
+    sync_typesense_ees = BashOperator(
+        task_id="sync_typesense",
+        bash_command=f"cd {PIPELINE_DIR} && python scripts/sync_typesense.py",
+    )
+
+    extract_ees_group >> dbt_build_ees >> dbt_test_ees >> sync_typesense_ees
@@ -0,0 +1,114 @@
+version: 1
+project_id: school-compare-pipeline
+
+plugins:
+  extractors:
+    - name: tap-uk-gias
+      namespace: uk_gias
+      pip_url: ./plugins/extractors/tap-uk-gias
+      executable: tap-uk-gias
+      capabilities:
+        - catalog
+        - state
+      settings:
+        - name: download_url
+          kind: string
+          description: GIAS bulk CSV download URL
+          value: https://ea-edubase-api-prod.azurewebsites.net/edubase/downloads/public/edubasealldata.csv
+
+    - name: tap-uk-ees
+      namespace: uk_ees
+      pip_url: ./plugins/extractors/tap-uk-ees
+      executable: tap-uk-ees
+      capabilities:
+        - catalog
+        - state
+      settings:
+        - name: base_url
+          kind: string
+          value: https://content.explore-education-statistics.service.gov.uk/api/v1
+        - name: datasets
+          kind: array
+          description: List of EES dataset configs to extract
+
+    - name: tap-uk-ofsted
+      namespace: uk_ofsted
+      pip_url: ./plugins/extractors/tap-uk-ofsted
+      executable: tap-uk-ofsted
+      capabilities:
+        - catalog
+        - state
+      settings:
+        - name: mi_url
+          kind: string
+          description: Ofsted Management Information download URL
+
+    - name: tap-uk-parent-view
+      namespace: uk_parent_view
+      pip_url: ./plugins/extractors/tap-uk-parent-view
+      executable: tap-uk-parent-view
+      capabilities:
+        - catalog
+
+    - name: tap-uk-fbit
+      namespace: uk_fbit
+      pip_url: ./plugins/extractors/tap-uk-fbit
+      executable: tap-uk-fbit
+      capabilities:
+        - catalog
+        - state
+      settings:
+        - name: base_url
+          kind: string
+          value: https://financial-benchmarking-and-insights-tool.education.gov.uk/api
+
+    - name: tap-uk-idaci
+      namespace: uk_idaci
+      pip_url: ./plugins/extractors/tap-uk-idaci
+      executable: tap-uk-idaci
+      capabilities:
+        - catalog
+
+  loaders:
+    - name: target-postgres
+      variant: transferwise
+      pip_url: pipelinewise-target-postgres
+      config:
+        host: $PG_HOST
+        port: $PG_PORT
+        user: $PG_USER
+        password: $PG_PASSWORD
+        dbname: $PG_DATABASE
+        default_target_schema: raw
+
+  utilities:
+    - name: dbt-postgres
+      variant: dbt-labs
+      pip_url: dbt-postgres~=1.8
+      config:
+        project_dir: $MELTANO_PROJECT_ROOT/transform
+        profiles_dir: $MELTANO_PROJECT_ROOT/transform
+
+environments:
+  - name: dev
+    config:
+      plugins:
+        loaders:
+          - name: target-postgres
+            config:
+              host: localhost
+              port: 5432
+              user: postgres
+              password: postgres
+              dbname: school_compare
+  - name: production
+    config:
+      plugins:
+        loaders:
+          - name: target-postgres
+            config:
+              host: ${PG_HOST}
+              port: ${PG_PORT}
+              user: ${PG_USER}
+              password: ${PG_PASSWORD}
+              dbname: ${PG_DATABASE}
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-ees"
+version = "0.1.0"
+description = "Singer tap for UK Explore Education Statistics (KS2, KS4, Census, Admissions, Phonics)"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+    "pandas>=2.0",
+]
+
+[project.scripts]
+tap-uk-ees = "tap_uk_ees.tap:TapUKEES.cli"
@@ -0,0 +1 @@
+"""tap-uk-ees: Singer tap for Explore Education Statistics API."""
@@ -0,0 +1,154 @@
+"""EES Singer tap — extracts KS2, KS4, Census, Admissions, Phonics data."""
+
+from __future__ import annotations
+
+import io
+import zipfile
+
+import requests
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+CONTENT_API_BASE = (
+    "https://content.explore-education-statistics.service.gov.uk/api"
+)
+STATS_API_BASE = "https://api.education.gov.uk/statistics/v1"
+TIMEOUT = 120
+
+
+def get_content_release_id(publication_slug: str) -> str:
+    """Return the latest release ID via the EES content API."""
+    url = f"{CONTENT_API_BASE}/publications/{publication_slug}/releases/latest"
+    resp = requests.get(url, timeout=TIMEOUT)
+    resp.raise_for_status()
+    return resp.json()["id"]
+
+
+def download_release_zip(release_id: str) -> zipfile.ZipFile:
+    """Download all data files for a release as a ZIP."""
+    url = f"{CONTENT_API_BASE}/releases/{release_id}/files"
+    resp = requests.get(url, timeout=300, stream=True)
+    resp.raise_for_status()
+    return zipfile.ZipFile(io.BytesIO(resp.content))
+
+
+class EESDatasetStream(Stream):
+    """Base stream for an EES dataset extracted from a release ZIP."""
+
+    replication_key = None
+    _publication_slug: str = ""
+    _file_keyword: str = ""
+
+    def get_records(self, context):
+        import pandas as pd
+
+        release_id = get_content_release_id(self._publication_slug)
+        self.logger.info(
+            "Downloading release %s for %s",
+            release_id,
+            self._publication_slug,
+        )
+        zf = download_release_zip(release_id)
+
+        # Find the CSV matching our keyword
+        csv_names = [n for n in zf.namelist() if n.endswith(".csv")]
+        target = None
+        for name in csv_names:
+            if self._file_keyword.lower() in name.lower():
+                target = name
+                break
+        if not target and csv_names:
+            target = csv_names[0]
+
+        if not target:
+            self.logger.warning("No CSV found in release ZIP")
+            return
+
+        self.logger.info("Reading %s from ZIP", target)
+        with zf.open(target) as f:
+            df = pd.read_csv(f, dtype=str, keep_default_na=False)
+
+        # Filter to school-level data
+        if "geographic_level" in df.columns:
+            df = df[df["geographic_level"] == "School"]
+
+        for _, row in df.iterrows():
+            yield row.to_dict()
+
+
+class EESKS2Stream(EESDatasetStream):
+    name = "ees_ks2"
+    primary_keys = ["urn", "time_period"]
+    _publication_slug = "key-stage-2-attainment"
+    _file_keyword = "school"
+    schema = th.PropertiesList(
+        th.Property("urn", th.StringType, required=True),
+        th.Property("time_period", th.StringType, required=True),
+    ).to_dict()
+
+
+class EESKS4Stream(EESDatasetStream):
+    name = "ees_ks4"
+    primary_keys = ["urn", "time_period"]
+    _publication_slug = "key-stage-4-performance-revised"
+    _file_keyword = "school"
+    schema = th.PropertiesList(
+        th.Property("urn", th.StringType, required=True),
+        th.Property("time_period", th.StringType, required=True),
+    ).to_dict()
+
+
+class EESCensusStream(EESDatasetStream):
+    name = "ees_census"
+    primary_keys = ["urn", "time_period"]
+    _publication_slug = "school-pupils-and-their-characteristics"
+    _file_keyword = "school"
+    schema = th.PropertiesList(
+        th.Property("urn", th.StringType, required=True),
+        th.Property("time_period", th.StringType, required=True),
+    ).to_dict()
+
+
+class EESAdmissionsStream(EESDatasetStream):
+    name = "ees_admissions"
+    primary_keys = ["urn", "time_period"]
+    _publication_slug = "secondary-and-primary-school-applications-and-offers"
+    _file_keyword = "school"
+    schema = th.PropertiesList(
+        th.Property("urn", th.StringType, required=True),
+        th.Property("time_period", th.StringType, required=True),
+    ).to_dict()
+
+
+class EESPhonicsStream(EESDatasetStream):
+    name = "ees_phonics"
+    primary_keys = ["urn", "time_period"]
+    _publication_slug = "phonics-screening-check-and-key-stage-1-assessments"
+    _file_keyword = "school"
+    schema = th.PropertiesList(
+        th.Property("urn", th.StringType, required=True),
+        th.Property("time_period", th.StringType, required=True),
+    ).to_dict()
+
+
+class TapUKEES(Tap):
+    """Singer tap for UK Explore Education Statistics."""
+
+    name = "tap-uk-ees"
+
+    config_jsonschema = th.PropertiesList(
+        th.Property("base_url", th.StringType, description="EES API base URL"),
+    ).to_dict()
+
+    def discover_streams(self):
+        return [
+            EESKS2Stream(self),
+            EESKS4Stream(self),
+            EESCensusStream(self),
+            EESAdmissionsStream(self),
+            EESPhonicsStream(self),
+        ]
+
+
+if __name__ == "__main__":
+    TapUKEES.cli()
@@ -0,0 +1,16 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-fbit"
+version = "0.1.0"
+description = "Singer tap for UK FBIT (Financial Benchmarking and Insights Tool)"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+]
+
+[project.scripts]
+tap-uk-fbit = "tap_uk_fbit.tap:TapUKFBIT.cli"
@@ -0,0 +1 @@
+"""tap-uk-fbit: Singer tap for Financial Benchmarking and Insights Tool API."""
@@ -0,0 +1,53 @@
+"""FBIT Singer tap — extracts financial data from the FBIT REST API."""
+
+from __future__ import annotations
+
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+
+class FBITFinanceStream(Stream):
+    """Stream: School financial benchmarking data."""
+
+    name = "fbit_finance"
+    primary_keys = ["urn", "year"]
+    replication_key = None
+
+    schema = th.PropertiesList(
+        th.Property("urn", th.IntegerType, required=True),
+        th.Property("year", th.IntegerType, required=True),
+        th.Property("per_pupil_spend", th.NumberType),
+        th.Property("staff_cost_pct", th.NumberType),
+        th.Property("teacher_cost_pct", th.NumberType),
+        th.Property("support_staff_cost_pct", th.NumberType),
+        th.Property("premises_cost_pct", th.NumberType),
+    ).to_dict()
+
+    def get_records(self, context):
+        # TODO: Implement FBIT API extraction
+        # The FBIT API requires per-URN requests with rate limiting.
+        # Implementation will batch URNs from dim_school and request
+        # financial data for each.
+        self.logger.warning("FBIT extraction not yet implemented")
+        return iter([])
+
+
+class TapUKFBIT(Tap):
+    """Singer tap for UK FBIT financial data."""
+
+    name = "tap-uk-fbit"
+
+    config_jsonschema = th.PropertiesList(
+        th.Property(
+            "base_url",
+            th.StringType,
+            default="https://financial-benchmarking-and-insights-tool.education.gov.uk/api",
+        ),
+    ).to_dict()
+
+    def discover_streams(self):
+        return [FBITFinanceStream(self)]
+
+
+if __name__ == "__main__":
+    TapUKFBIT.cli()
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-gias"
+version = "0.1.0"
+description = "Singer tap for UK GIAS (Get Information About Schools) bulk data"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+    "pandas>=2.0",
+]
+
+[project.scripts]
+tap-uk-gias = "tap_uk_gias.tap:TapUKGIAS.cli"
@@ -0,0 +1 @@
+"""tap-uk-gias: Singer tap for GIAS bulk establishment data."""
@@ -0,0 +1,135 @@
+"""GIAS Singer tap — extracts bulk establishment CSV from GIAS API."""
+
+from __future__ import annotations
+
+from datetime import date
+
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+GIAS_URL_TEMPLATE = (
+    "https://ea-edubase-api-prod.azurewebsites.net"
+    "/edubase/downloads/public/edubasealldata{date}.csv"
+)
+
+
+class GIASEstablishmentsStream(Stream):
+    """Stream: GIAS establishments (one row per URN)."""
+
+    name = "gias_establishments"
+    primary_keys = ["URN"]
+    replication_key = None
+
+    # Schema is wide (~250 columns); we declare key columns and pass through the rest
+    schema = th.PropertiesList(
+        th.Property("URN", th.IntegerType, required=True),
+        th.Property("EstablishmentName", th.StringType),
+        th.Property("TypeOfEstablishment (name)", th.StringType),
+        th.Property("PhaseOfEducation (name)", th.StringType),
+        th.Property("LA (code)", th.IntegerType),
+        th.Property("LA (name)", th.StringType),
+        th.Property("EstablishmentNumber", th.IntegerType),
+        th.Property("EstablishmentStatus (name)", th.StringType),
+        th.Property("Postcode", th.StringType),
+    ).to_dict()
+
+    def get_records(self, context):
+        """Download GIAS CSV and yield rows."""
+        import io
+
+        import pandas as pd
+        import requests
+
+        today = date.today().strftime("%Y%m%d")
+        url = GIAS_URL_TEMPLATE.format(date=today)
+
+        self.logger.info("Downloading GIAS bulk CSV: %s", url)
+        resp = requests.get(url, timeout=120)
+        resp.raise_for_status()
+
+        df = pd.read_csv(
+            io.StringIO(resp.text),
+            encoding="utf-8-sig",
+            dtype=str,
+            keep_default_na=False,
+        )
+
+        for _, row in df.iterrows():
+            record = row.to_dict()
+            # Cast URN to int
+            try:
+                record["URN"] = int(record["URN"])
+            except (ValueError, KeyError):
+                continue
+            yield record
+
+
+class GIASLinksStream(Stream):
+    """Stream: GIAS school links (predecessor/successor)."""
+
+    name = "gias_links"
+    primary_keys = ["URN", "LinkURN"]
+    replication_key = None
+
+    schema = th.PropertiesList(
+        th.Property("URN", th.IntegerType, required=True),
+        th.Property("LinkURN", th.IntegerType, required=True),
+        th.Property("LinkType", th.StringType),
+        th.Property("LinkEstablishedDate", th.StringType),
+    ).to_dict()
+
+    def get_records(self, context):
+        """Download GIAS links CSV and yield rows."""
+        import io
+
+        import pandas as pd
+        import requests
+
+        url = (
+            "https://ea-edubase-api-prod.azurewebsites.net"
+            "/edubase/downloads/public/links_edubasealldata.csv"
+        )
+
+        self.logger.info("Downloading GIAS links CSV: %s", url)
+        resp = requests.get(url, timeout=120)
+        resp.raise_for_status()
+
+        df = pd.read_csv(
+            io.StringIO(resp.text),
+            encoding="utf-8-sig",
+            dtype=str,
+            keep_default_na=False,
+        )
+
+        for _, row in df.iterrows():
+            record = row.to_dict()
+            try:
+                record["URN"] = int(record["URN"])
+                record["LinkURN"] = int(record["LinkURN"])
+            except (ValueError, KeyError):
+                continue
+            yield record
+
+
+class TapUKGIAS(Tap):
+    """Singer tap for UK GIAS data."""
+
+    name = "tap-uk-gias"
+
+    config_jsonschema = th.PropertiesList(
+        th.Property(
+            "download_url",
+            th.StringType,
+            description="Override GIAS CSV download URL",
+        ),
+    ).to_dict()
+
+    def discover_streams(self):
+        return [
+            GIASEstablishmentsStream(self),
+            GIASLinksStream(self),
+        ]
+
+
+if __name__ == "__main__":
+    TapUKGIAS.cli()
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-idaci"
+version = "0.1.0"
+description = "Singer tap for UK IDACI deprivation index"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+    "pandas>=2.0",
+]
+
+[project.scripts]
+tap-uk-idaci = "tap_uk_idaci.tap:TapUKIDACI.cli"
@@ -0,0 +1 @@
+"""tap-uk-idaci: Singer tap for IDACI (Income Deprivation Affecting Children Index)."""
@@ -0,0 +1,41 @@
+"""IDACI Singer tap — extracts deprivation index lookup data."""
+
+from __future__ import annotations
+
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+
+class IDACIStream(Stream):
+    """Stream: IDACI scores by LSOA."""
+
+    name = "idaci"
+    primary_keys = ["lsoa_code"]
+    replication_key = None
+
+    schema = th.PropertiesList(
+        th.Property("lsoa_code", th.StringType, required=True),
+        th.Property("idaci_score", th.NumberType),
+        th.Property("idaci_decile", th.IntegerType),
+    ).to_dict()
+
+    def get_records(self, context):
+        # TODO: Implement IDACI extraction
+        # Source: MHCLG IoD 2019 LSOA-level data
+        # Available as a static CSV download
+        self.logger.warning("IDACI extraction not yet implemented")
+        return iter([])
+
+
+class TapUKIDACI(Tap):
+    """Singer tap for UK IDACI data."""
+
+    name = "tap-uk-idaci"
+    config_jsonschema = th.PropertiesList().to_dict()
+
+    def discover_streams(self):
+        return [IDACIStream(self)]
+
+
+if __name__ == "__main__":
+    TapUKIDACI.cli()
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-ofsted"
+version = "0.1.0"
+description = "Singer tap for UK Ofsted Management Information"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+    "pandas>=2.0",
+    "odfpy>=1.4",
+]
+
+[project.scripts]
+tap-uk-ofsted = "tap_uk_ofsted.tap:TapUKOfsted.cli"
@@ -0,0 +1 @@
+"""tap-uk-ofsted: Singer tap for Ofsted Management Information."""
@@ -0,0 +1,176 @@
+"""Ofsted MI Singer tap — extracts inspection records from GOV.UK CSV/ODS."""
+
+from __future__ import annotations
+
+import io
+import re
+
+import requests
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+GOV_UK_PAGE = (
+    "https://www.gov.uk/government/statistical-data-sets/"
+    "monthly-management-information-ofsteds-school-inspections-outcomes"
+)
+
+# Column name → internal field, in priority order (first match wins).
+# Handles both current and older file formats.
+COLUMN_PRIORITY = {
+    "urn": ["URN", "Urn", "urn"],
+    "inspection_date": [
+        "Inspection start date of latest OEIF graded inspection",
+        "Inspection start date",
+        "Inspection date",
+    ],
+    "inspection_type": [
+        "Inspection type of latest OEIF graded inspection",
+        "Inspection type",
+    ],
+    "event_type_grouping": [
+        "Event type grouping",
+        "Inspection type grouping",
+    ],
+    "overall_effectiveness": [
+        "Latest OEIF overall effectiveness",
+        "Overall effectiveness",
+    ],
+    "quality_of_education": [
+        "Latest OEIF quality of education",
+        "Quality of education",
+    ],
+    "behaviour_and_attitudes": [
+        "Latest OEIF behaviour and attitudes",
+        "Behaviour and attitudes",
+    ],
+    "personal_development": [
+        "Latest OEIF personal development",
+        "Personal development",
+    ],
+    "effectiveness_of_leadership_and_management": [
+        "Latest OEIF effectiveness of leadership and management",
+        "Effectiveness of leadership and management",
+    ],
+    "early_years_provision": [
+        "Latest OEIF early years provision",
+        "Early years provision (where applicable)",
+    ],
+    "sixth_form_provision": [
+        "Latest OEIF sixth form provision",
+        "Sixth form provision (where applicable)",
+    ],
+}
+
+
+def discover_csv_url() -> str | None:
+    """Scrape GOV.UK page to find the latest MI CSV download link."""
+    resp = requests.get(GOV_UK_PAGE, timeout=30)
+    resp.raise_for_status()
+    # Look for CSV attachment links
+    matches = re.findall(
+        r'href="(https://assets\.publishing\.service\.gov\.uk/[^"]+\.csv)"',
+        resp.text,
+    )
+    if matches:
+        return matches[0]
+    # Fall back to ODS
+    matches = re.findall(
+        r'href="(https://assets\.publishing\.service\.gov\.uk/[^"]+\.ods)"',
+        resp.text,
+    )
+    return matches[0] if matches else None
+
+
+class OfstedInspectionsStream(Stream):
+    """Stream: Ofsted inspection records."""
+
+    name = "ofsted_inspections"
+    primary_keys = ["urn", "inspection_date"]
+    replication_key = None
+
+    schema = th.PropertiesList(
+        th.Property("urn", th.IntegerType, required=True),
+        th.Property("inspection_date", th.StringType),
+        th.Property("inspection_type", th.StringType),
+        th.Property("event_type_grouping", th.StringType),
+        th.Property("overall_effectiveness", th.StringType),
+        th.Property("quality_of_education", th.StringType),
+        th.Property("behaviour_and_attitudes", th.StringType),
+        th.Property("personal_development", th.StringType),
+        th.Property("effectiveness_of_leadership_and_management", th.StringType),
+        th.Property("early_years_provision", th.StringType),
+        th.Property("sixth_form_provision", th.StringType),
+        th.Property("report_url", th.StringType),
+    ).to_dict()
+
+    def _resolve_columns(self, df_columns: list[str]) -> dict[str, str]:
+        """Map internal field names to actual CSV column names."""
+        mapping = {}
+        for field, candidates in COLUMN_PRIORITY.items():
+            for candidate in candidates:
+                if candidate in df_columns:
+                    mapping[field] = candidate
+                    break
+        return mapping
+
+    def get_records(self, context):
+        import pandas as pd
+
+        url = self.config.get("mi_url") or discover_csv_url()
+        if not url:
+            self.logger.error("Could not discover Ofsted MI download URL")
+            return
+
+        self.logger.info("Downloading Ofsted MI: %s", url)
+        resp = requests.get(url, timeout=120)
+        resp.raise_for_status()
+
+        if url.endswith(".ods"):
+            df = pd.read_excel(io.BytesIO(resp.content), engine="odf", dtype=str)
+        else:
+            # Detect header row (may not be row 0)
+            text = resp.content.decode("utf-8-sig", errors="replace")
+            lines = text.split("\n")
+            header_idx = 0
+            for i, line in enumerate(lines[:20]):
+                if "URN" in line or "urn" in line.lower():
+                    header_idx = i
+                    break
+            df = pd.read_csv(
+                io.StringIO(text),
+                skiprows=header_idx,
+                dtype=str,
+                keep_default_na=False,
+            )
+
+        col_map = self._resolve_columns(list(df.columns))
+
+        for _, row in df.iterrows():
+            record = {}
+            for field, col in col_map.items():
+                record[field] = row.get(col, None)
+
+            # Cast URN
+            try:
+                record["urn"] = int(record["urn"])
+            except (ValueError, KeyError, TypeError):
+                continue
+
+            yield record
+
+
+class TapUKOfsted(Tap):
+    """Singer tap for UK Ofsted Management Information."""
+
+    name = "tap-uk-ofsted"
+
+    config_jsonschema = th.PropertiesList(
+        th.Property("mi_url", th.StringType, description="Direct URL to Ofsted MI file"),
+    ).to_dict()
+
+    def discover_streams(self):
+        return [OfstedInspectionsStream(self)]
+
+
+if __name__ == "__main__":
+    TapUKOfsted.cli()
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tap-uk-parent-view"
+version = "0.1.0"
+description = "Singer tap for UK Ofsted Parent View survey data"
+requires-python = ">=3.10"
+dependencies = [
+    "singer-sdk~=0.39",
+    "requests>=2.31",
+    "pandas>=2.0",
+    "openpyxl>=3.1",
+]
+
+[project.scripts]
+tap-uk-parent-view = "tap_uk_parent_view.tap:TapUKParentView.cli"
@@ -0,0 +1 @@
+"""tap-uk-parent-view: Singer tap for Ofsted Parent View survey data."""
@@ -0,0 +1,49 @@
+"""Parent View Singer tap — extracts survey data from Ofsted Parent View portal."""
+
+from __future__ import annotations
+
+from singer_sdk import Stream, Tap
+from singer_sdk import typing as th
+
+
+class ParentViewStream(Stream):
+    """Stream: Parent View survey responses per school."""
+
+    name = "parent_view"
+    primary_keys = ["urn"]
+    replication_key = None
+
+    schema = th.PropertiesList(
+        th.Property("urn", th.IntegerType, required=True),
+        th.Property("survey_date", th.StringType),
+        th.Property("total_responses", th.IntegerType),
+        th.Property("q_happy_pct", th.NumberType),
+        th.Property("q_safe_pct", th.NumberType),
+        th.Property("q_progress_pct", th.NumberType),
+        th.Property("q_well_taught_pct", th.NumberType),
+        th.Property("q_well_led_pct", th.NumberType),
+        th.Property("q_behaviour_pct", th.NumberType),
+        th.Property("q_bullying_pct", th.NumberType),
+        th.Property("q_recommend_pct", th.NumberType),
+    ).to_dict()
+
+    def get_records(self, context):
+        # TODO: Implement Parent View data extraction
+        # Source: Ofsted Parent View portal XLSX/CSV download
+        # URL discovery requires scraping parentview.ofsted.gov.uk
+        self.logger.warning("Parent View extraction not yet implemented")
+        return iter([])
+
+
+class TapUKParentView(Tap):
+    """Singer tap for UK Ofsted Parent View."""
+
+    name = "tap-uk-parent-view"
+    config_jsonschema = th.PropertiesList().to_dict()
+
+    def discover_streams(self):
+        return [ParentViewStream(self)]
+
+
+if __name__ == "__main__":
+    TapUKParentView.cli()
@@ -0,0 +1,10 @@
+# Pipeline dependencies
+meltano==3.5.*
+dbt-postgres~=1.8
+apache-airflow==2.10.*
+apache-airflow-providers-postgres>=5.0
+typesense>=0.21
+requests>=2.31
+openpyxl>=3.1
+odfpy>=1.4
+psycopg2-binary>=2.9
@@ -0,0 +1,118 @@
+"""
+Batch geocode postcodes via Postcodes.io and update dim_location with lat/lng + PostGIS geometry.
+
+Usage:
+    python geocode_postcodes.py [--batch-size 100]
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+
+import psycopg2
+import psycopg2.extras
+import requests
+
+POSTCODES_IO_BULK = "https://api.postcodes.io/postcodes"
+BATCH_SIZE = 100  # Postcodes.io max per request
+
+
+def get_db_connection():
+    return psycopg2.connect(
+        host=os.environ.get("PG_HOST", "localhost"),
+        port=os.environ.get("PG_PORT", "5432"),
+        user=os.environ.get("PG_USER", "postgres"),
+        password=os.environ.get("PG_PASSWORD", "postgres"),
+        dbname=os.environ.get("PG_DATABASE", "school_compare"),
+    )
+
+
+def fetch_ungeooded_postcodes(conn, limit: int = 5000) -> list[dict]:
+    """Get postcodes from dim_location that don't have lat/lng yet."""
+    with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
+        cur.execute("""
+            SELECT urn, postcode
+            FROM marts.dim_location
+            WHERE geom IS NULL
+              AND postcode IS NOT NULL
+            LIMIT %s
+        """, (limit,))
+        return cur.fetchall()
+
+
+def bulk_geocode(postcodes: list[str]) -> dict[str, tuple[float, float]]:
+    """Geocode a batch of postcodes via Postcodes.io bulk API."""
+    resp = requests.post(
+        POSTCODES_IO_BULK,
+        json={"postcodes": postcodes},
+        timeout=30,
+    )
+    resp.raise_for_status()
+
+    results = {}
+    for item in resp.json().get("result", []):
+        if item["result"]:
+            pc = item["query"].upper().replace(" ", "")
+            results[pc] = (item["result"]["latitude"], item["result"]["longitude"])
+    return results
+
+
+def update_locations(conn, updates: list[tuple[float, float, int]]):
+    """Update dim_location with lat/lng and PostGIS geometry."""
+    with conn.cursor() as cur:
+        psycopg2.extras.execute_batch(cur, """
+            UPDATE marts.dim_location
+            SET geom = ST_SetSRID(ST_MakePoint(%s, %s), 4326)
+            WHERE urn = %s
+        """, [(lng, lat, urn) for lat, lng, urn in updates])
+    conn.commit()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Batch geocode school postcodes")
+    parser.add_argument("--batch-size", type=int, default=BATCH_SIZE)
+    args = parser.parse_args()
+
+    conn = get_db_connection()
+
+    rows = fetch_ungeooded_postcodes(conn)
+    if not rows:
+        print("All postcodes already geocoded.")
+        return
+
+    print(f"Geocoding {len(rows)} postcodes...")
+
+    total_updated = 0
+    for i in range(0, len(rows), args.batch_size):
+        batch = rows[i : i + args.batch_size]
+        postcodes = [r["postcode"] for r in batch if r["postcode"]]
+        urn_by_pc = {}
+        for r in batch:
+            if r["postcode"]:
+                pc_key = r["postcode"].upper().replace(" ", "")
+                urn_by_pc.setdefault(pc_key, []).append(r["urn"])
+
+        results = bulk_geocode(postcodes)
+
+        updates = []
+        for pc, (lat, lng) in results.items():
+            for urn in urn_by_pc.get(pc, []):
+                updates.append((lat, lng, urn))
+
+        if updates:
+            update_locations(conn, updates)
+            total_updated += len(updates)
+
+        print(f"  Batch {i // args.batch_size + 1}: geocoded {len(results)}/{len(postcodes)} postcodes")
+
+        # Rate limit: Postcodes.io is generous but be polite
+        time.sleep(0.2)
+
+    conn.close()
+    print(f"Done. Updated {total_updated} locations.")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,177 @@
+"""
+Sync dbt marts → Typesense search index.
+
+Reads dim_school + dim_location + latest fact data from PostgreSQL marts,
+then upserts into a Typesense collection with zero-downtime alias swapping.
+
+Usage:
+    python sync_typesense.py [--typesense-url http://localhost:8108] [--api-key xyz]
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+import time
+
+import psycopg2
+import psycopg2.extras
+import typesense
+
+COLLECTION_SCHEMA = {
+    "fields": [
+        {"name": "urn", "type": "int32"},
+        {"name": "school_name", "type": "string"},
+        {"name": "phase", "type": "string", "facet": True},
+        {"name": "school_type", "type": "string", "facet": True},
+        {"name": "local_authority", "type": "string", "facet": True},
+        {"name": "religious_character", "type": "string", "facet": True, "optional": True},
+        {"name": "ofsted_rating", "type": "string", "facet": True, "optional": True},
+        {"name": "postcode", "type": "string"},
+        {"name": "location", "type": "geopoint", "optional": True},
+        {"name": "headteacher_name", "type": "string", "optional": True},
+        {"name": "rwm_expected_pct", "type": "float", "optional": True},
+        {"name": "progress_8_score", "type": "float", "optional": True},
+        {"name": "total_pupils", "type": "int32", "optional": True},
+    ],
+    "default_sorting_field": "school_name",
+}
+
+OFSTED_LABELS = {1: "Outstanding", 2: "Good", 3: "Requires Improvement", 4: "Inadequate"}
+
+QUERY = """
+    SELECT
+        s.urn,
+        s.school_name,
+        s.phase,
+        s.school_type,
+        l.local_authority_name as local_authority,
+        s.religious_character,
+        s.ofsted_grade,
+        l.postcode,
+        s.headteacher_name,
+        s.total_pupils,
+        -- Latest KS2
+        ks2.rwm_expected_pct,
+        -- Latest KS4
+        ks4.progress_8_score
+    FROM marts.dim_school s
+    LEFT JOIN marts.dim_location l ON s.urn = l.urn
+    LEFT JOIN LATERAL (
+        SELECT rwm_expected_pct
+        FROM marts.fact_ks2_performance
+        WHERE urn = s.urn
+        ORDER BY year DESC
+        LIMIT 1
+    ) ks2 ON true
+    LEFT JOIN LATERAL (
+        SELECT progress_8_score
+        FROM marts.fact_ks4_performance
+        WHERE urn = s.urn
+        ORDER BY year DESC
+        LIMIT 1
+    ) ks4 ON true
+"""
+
+
+def get_db_connection():
+    return psycopg2.connect(
+        host=os.environ.get("PG_HOST", "localhost"),
+        port=os.environ.get("PG_PORT", "5432"),
+        user=os.environ.get("PG_USER", "postgres"),
+        password=os.environ.get("PG_PASSWORD", "postgres"),
+        dbname=os.environ.get("PG_DATABASE", "school_compare"),
+    )
+
+
+def build_document(row: dict) -> dict:
+    """Convert a DB row to a Typesense document."""
+    doc = {
+        "id": str(row["urn"]),
+        "urn": row["urn"],
+        "school_name": row["school_name"] or "",
+        "phase": row["phase"] or "",
+        "school_type": row["school_type"] or "",
+        "local_authority": row["local_authority"] or "",
+        "postcode": row["postcode"] or "",
+    }
+
+    if row.get("religious_character"):
+        doc["religious_character"] = row["religious_character"]
+    if row.get("ofsted_grade"):
+        doc["ofsted_rating"] = OFSTED_LABELS.get(row["ofsted_grade"], "")
+    if row.get("headteacher_name"):
+        doc["headteacher_name"] = row["headteacher_name"]
+    if row.get("total_pupils"):
+        doc["total_pupils"] = row["total_pupils"]
+    if row.get("rwm_expected_pct") is not None:
+        doc["rwm_expected_pct"] = float(row["rwm_expected_pct"])
+    if row.get("progress_8_score") is not None:
+        doc["progress_8_score"] = float(row["progress_8_score"])
+
+    # Geo: location field expects [lat, lng] — will be populated once
+    # dim_location has lat/lng from PostGIS geocoding
+
+    return doc
+
+
+def sync(typesense_url: str, api_key: str):
+    client = typesense.Client({
+        "nodes": [{"host": typesense_url.split("//")[-1].split(":")[0],
+                    "port": typesense_url.split(":")[-1],
+                    "protocol": "http"}],
+        "api_key": api_key,
+        "connection_timeout_seconds": 10,
+    })
+
+    # Create timestamped collection for zero-downtime swap
+    ts = int(time.time())
+    collection_name = f"schools_{ts}"
+
+    print(f"Creating collection: {collection_name}")
+    schema = {**COLLECTION_SCHEMA, "name": collection_name}
+    client.collections.create(schema)
+
+    # Fetch data from marts
+    conn = get_db_connection()
+    with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
+        cur.execute(QUERY)
+        rows = cur.fetchall()
+    conn.close()
+
+    print(f"Indexing {len(rows)} schools...")
+
+    # Batch import
+    batch_size = 500
+    for i in range(0, len(rows), batch_size):
+        batch = [build_document(r) for r in rows[i : i + batch_size]]
+        client.collections[collection_name].documents.import_(batch, {"action": "upsert"})
+        print(f"  Indexed {min(i + batch_size, len(rows))}/{len(rows)}")
+
+    # Swap alias
+    print("Swapping alias 'schools' → new collection")
+    try:
+        client.aliases.upsert("schools", {"collection_name": collection_name})
+    except Exception:
+        # If alias doesn't exist yet, create it
+        client.aliases.upsert("schools", {"collection_name": collection_name})
+
+    print("Done.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Sync marts to Typesense")
+    parser.add_argument("--typesense-url", default=os.environ.get("TYPESENSE_URL", "http://localhost:8108"))
+    parser.add_argument("--api-key", default=os.environ.get("TYPESENSE_API_KEY", ""))
+    args = parser.parse_args()
+
+    if not args.api_key:
+        print("Error: --api-key or TYPESENSE_API_KEY required", file=sys.stderr)
+        sys.exit(1)
+
+    sync(args.typesense_url, args.api_key)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,28 @@
+name: school_compare
+version: "1.0.0"
+config-version: 2
+
+profile: school_compare
+
+model-paths: ["models"]
+macro-paths: ["macros"]
+test-paths: ["tests"]
+seed-paths: ["seeds"]
+target-path: "target"
+clean-targets: ["target", "dbt_packages"]
+
+models:
+  school_compare:
+    staging:
+      +materialized: view
+      +schema: staging
+    intermediate:
+      +materialized: view
+      +schema: intermediate
+    marts:
+      +materialized: table
+      +schema: marts
+
+seeds:
+  school_compare:
+    +schema: seeds
@@ -0,0 +1,36 @@
+-- Macro: Generate a CTE that unions current and predecessor data for a given source
+
+{% macro chain_lineage(source_ref, urn_col='urn', year_col='year') %}
+
+with current_data as (
+    select
+        {{ urn_col }} as current_urn,
+        {{ urn_col }} as source_urn,
+        *
+    from {{ source_ref }}
+),
+
+predecessor_data as (
+    select
+        lin.current_urn,
+        src.{{ urn_col }} as source_urn,
+        src.*
+    from {{ source_ref }} src
+    inner join {{ ref('int_school_lineage') }} lin
+        on src.{{ urn_col }} = lin.predecessor_urn
+    where not exists (
+        select 1 from {{ source_ref }} curr
+        where curr.{{ urn_col }} = lin.current_urn
+          and curr.{{ year_col }} = src.{{ year_col }}
+    )
+),
+
+combined as (
+    select * from current_data
+    union all
+    select * from predecessor_data
+)
+
+select * from combined
+
+{% endmacro %}
@@ -0,0 +1,13 @@
+-- Macro: Parse Ofsted grade from various text/numeric representations
+
+{% macro parse_ofsted_grade(column) %}
+    case
+        when {{ column }}::text in ('1', 'Outstanding') then 1
+        when {{ column }}::text in ('2', 'Good') then 2
+        when {{ column }}::text in ('3', 'Requires improvement', 'Requires Improvement', 'Satisfactory') then 3
+        when {{ column }}::text in ('4', 'Inadequate') then 4
+        when {{ column }}::text in ('9', 'SWK', 'Serious Weaknesses') then 4
+        when {{ column }}::text in ('SM', 'Special Measures') then 4
+        else null
+    end
+{% endmacro %}
@@ -0,0 +1,8 @@
+-- Macro: Validate that latitude/longitude fall within UK bounding box
+
+{% macro validate_uk_coordinates(lat_col, lng_col) %}
+    (
+        {{ lat_col }} between 49.0 and 61.0
+        and {{ lng_col }} between -8.0 and 2.0
+    )
+{% endmacro %}
@@ -0,0 +1,62 @@
+-- Intermediate model: KS2 data chained across academy conversions
+-- Maps predecessor URN data to the current active URN
+
+with current_ks2 as (
+    select
+        urn as current_urn,
+        urn as source_urn,
+        year,
+        total_pupils,
+        rwm_expected_pct,
+        reading_expected_pct,
+        writing_expected_pct,
+        maths_expected_pct,
+        rwm_high_pct,
+        reading_high_pct,
+        writing_high_pct,
+        maths_high_pct,
+        reading_progress,
+        writing_progress,
+        maths_progress,
+        reading_avg_score,
+        maths_avg_score
+    from {{ ref('stg_ees_ks2') }}
+),
+
+predecessor_ks2 as (
+    select
+        lin.current_urn,
+        ks2.urn as source_urn,
+        ks2.year,
+        ks2.total_pupils,
+        ks2.rwm_expected_pct,
+        ks2.reading_expected_pct,
+        ks2.writing_expected_pct,
+        ks2.maths_expected_pct,
+        ks2.rwm_high_pct,
+        ks2.reading_high_pct,
+        ks2.writing_high_pct,
+        ks2.maths_high_pct,
+        ks2.reading_progress,
+        ks2.writing_progress,
+        ks2.maths_progress,
+        ks2.reading_avg_score,
+        ks2.maths_avg_score
+    from {{ ref('stg_ees_ks2') }} ks2
+    inner join {{ ref('int_school_lineage') }} lin
+        on ks2.urn = lin.predecessor_urn
+    -- Only include predecessor data for years before the current URN has data
+    where not exists (
+        select 1 from {{ ref('stg_ees_ks2') }} curr
+        where curr.urn = lin.current_urn
+          and curr.year = ks2.year
+    )
+),
+
+combined as (
+    select * from current_ks2
+    union all
+    select * from predecessor_ks2
+)
+
+select * from combined
@@ -0,0 +1,50 @@
+-- Intermediate model: KS4 data chained across academy conversions
+
+with current_ks4 as (
+    select
+        urn as current_urn,
+        urn as source_urn,
+        year,
+        total_pupils,
+        progress_8_score,
+        attainment_8_score,
+        ebacc_entry_pct,
+        ebacc_achievement_pct,
+        english_strong_pass_pct,
+        maths_strong_pass_pct,
+        english_maths_strong_pass_pct,
+        staying_in_education_pct
+    from {{ ref('stg_ees_ks4') }}
+),
+
+predecessor_ks4 as (
+    select
+        lin.current_urn,
+        ks4.urn as source_urn,
+        ks4.year,
+        ks4.total_pupils,
+        ks4.progress_8_score,
+        ks4.attainment_8_score,
+        ks4.ebacc_entry_pct,
+        ks4.ebacc_achievement_pct,
+        ks4.english_strong_pass_pct,
+        ks4.maths_strong_pass_pct,
+        ks4.english_maths_strong_pass_pct,
+        ks4.staying_in_education_pct
+    from {{ ref('stg_ees_ks4') }} ks4
+    inner join {{ ref('int_school_lineage') }} lin
+        on ks4.urn = lin.predecessor_urn
+    where not exists (
+        select 1 from {{ ref('stg_ees_ks4') }} curr
+        where curr.urn = lin.current_urn
+          and curr.year = ks4.year
+    )
+),
+
+combined as (
+    select * from current_ks4
+    union all
+    select * from predecessor_ks4
+)
+
+select * from combined
@@ -0,0 +1,37 @@
+-- Intermediate model: Latest Ofsted inspection per URN
+-- Picks the most recent inspection for each school
+
+with ranked as (
+    select
+        *,
+        row_number() over (
+            partition by urn
+            order by inspection_date desc
+        ) as rn
+    from {{ ref('stg_ofsted_inspections') }}
+)
+
+select
+    urn,
+    inspection_date,
+    inspection_type,
+    framework,
+    overall_effectiveness,
+    quality_of_education,
+    behaviour_attitudes,
+    personal_development,
+    leadership_management,
+    early_years_provision,
+    sixth_form_provision,
+    rc_safeguarding_met,
+    rc_inclusion,
+    rc_curriculum_teaching,
+    rc_achievement,
+    rc_attendance_behaviour,
+    rc_personal_development,
+    rc_leadership_governance,
+    rc_early_years,
+    rc_sixth_form,
+    report_url
+from ranked
+where rn = 1
@@ -0,0 +1,18 @@
+-- Intermediate model: Merged pupil characteristics from census data
+
+select
+    urn,
+    year,
+    fsm_pct,
+    sen_support_pct,
+    sen_ehcp_pct,
+    eal_pct,
+    disadvantaged_pct,
+    ethnicity_white_pct,
+    ethnicity_asian_pct,
+    ethnicity_black_pct,
+    ethnicity_mixed_pct,
+    ethnicity_other_pct,
+    class_size_avg,
+    stability_pct
+from {{ ref('stg_ees_census') }}
@@ -0,0 +1,48 @@
+-- Intermediate model: Recursive predecessor mapping
+-- Resolves academy conversion chains so historical data can be attributed
+-- to the current (active) URN.
+
+with recursive lineage as (
+    -- Base: schools that are predecessors (linked via academy conversion, amalgamation, etc.)
+    select
+        urn,
+        linked_urn as predecessor_urn,
+        link_type,
+        link_date,
+        1 as depth
+    from {{ ref('stg_gias_links') }}
+    where link_type in (
+        'Predecessor',
+        'Predecessor - Loss of academy converter',
+        'Predecessor - amalgamated',
+        'Predecessor - Fresh Start'
+    )
+
+    union all
+
+    -- Recursive step: follow the chain
+    select
+        l.urn,
+        links.linked_urn as predecessor_urn,
+        links.link_type,
+        links.link_date,
+        l.depth + 1
+    from lineage l
+    inner join {{ ref('stg_gias_links') }} links
+        on l.predecessor_urn = links.urn
+    where links.link_type in (
+        'Predecessor',
+        'Predecessor - Loss of academy converter',
+        'Predecessor - amalgamated',
+        'Predecessor - Fresh Start'
+    )
+    and l.depth < 5  -- safety limit
+)
+
+select
+    urn as current_urn,
+    predecessor_urn,
+    link_type,
+    link_date,
+    depth
+from lineage
@@ -0,0 +1,110 @@
+version: 2
+
+models:
+  - name: dim_school
+    description: Canonical school dimension — one row per active URN
+    columns:
+      - name: urn
+        tests: [not_null, unique]
+      - name: school_name
+        tests: [not_null]
+      - name: phase
+        tests: [not_null]
+      - name: status
+        tests:
+          - accepted_values:
+              values: ["Open"]
+
+  - name: dim_location
+    description: School location dimension with PostGIS geometry
+    columns:
+      - name: urn
+        tests:
+          - not_null
+          - unique
+          - relationships:
+              to: ref('dim_school')
+              field: urn
+      - name: postcode
+        tests: [not_null]
+
+  - name: map_school_lineage
+    description: Predecessor/successor lineage map
+    columns:
+      - name: urn
+        tests:
+          - not_null
+          - relationships:
+              to: ref('dim_school')
+              field: urn
+
+  - name: fact_ks2_performance
+    description: KS2 attainment — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+    tests:
+      - unique:
+          column_name: "urn || '-' || year"
+
+  - name: fact_ks4_performance
+    description: KS4 attainment — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+    tests:
+      - unique:
+          column_name: "urn || '-' || year"
+
+  - name: fact_ofsted_inspection
+    description: Full Ofsted inspection history
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: inspection_date
+        tests: [not_null]
+
+  - name: fact_pupil_characteristics
+    description: Pupil demographics — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+
+  - name: fact_admissions
+    description: School admissions — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+
+  - name: fact_finance
+    description: School financial data — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+
+  - name: fact_phonics
+    description: Phonics screening results — one row per URN per year
+    columns:
+      - name: urn
+        tests: [not_null]
+      - name: year
+        tests: [not_null]
+
+  - name: fact_parent_view
+    description: Parent View survey responses
+    columns:
+      - name: urn
+        tests: [not_null]
+
+  - name: fact_deprivation
+    description: IDACI deprivation index
@@ -0,0 +1,19 @@
+-- Mart: School location dimension — one row per URN, PostGIS-enabled
+-- The geom column is populated by a post-hook or the geocode script.
+
+select
+    s.urn,
+    s.address_line1,
+    s.address_line2,
+    s.town,
+    s.county,
+    s.postcode,
+    s.local_authority_code,
+    s.local_authority_name,
+    s.parliamentary_constituency,
+    s.urban_rural,
+    s.easting,
+    s.northing
+from {{ ref('stg_gias_establishments') }} s
+where s.status = 'Open'
+  and s.postcode is not null
@@ -0,0 +1,40 @@
+-- Mart: Canonical school dimension — one row per active URN
+
+with schools as (
+    select * from {{ ref('stg_gias_establishments') }}
+),
+
+latest_ofsted as (
+    select * from {{ ref('int_ofsted_latest') }}
+)
+
+select
+    s.urn,
+    s.local_authority_code * 1000 + s.establishment_number as laestab,
+    s.school_name,
+    s.phase,
+    s.school_type,
+    s.academy_trust_name,
+    s.academy_trust_uid,
+    s.religious_character,
+    s.gender,
+    s.statutory_low_age || '-' || s.statutory_high_age as age_range,
+    s.capacity,
+    s.total_pupils,
+    concat_ws(' ', s.head_title, s.head_first_name, s.head_last_name) as headteacher_name,
+    s.website,
+    s.telephone,
+    s.open_date,
+    s.close_date,
+    s.status,
+    s.nursery_provision,
+    s.admissions_policy,
+
+    -- Latest Ofsted
+    o.overall_effectiveness as ofsted_grade,
+    o.inspection_date as ofsted_date,
+    o.framework as ofsted_framework
+
+from schools s
+left join latest_ofsted o on s.urn = o.urn
+where s.status = 'Open'
@@ -0,0 +1,10 @@
+-- Mart: School admissions — one row per URN per year
+
+select
+    urn,
+    year,
+    published_admission_number,
+    total_applications,
+    first_preference_offers_pct,
+    oversubscribed
+from {{ ref('stg_ees_admissions') }}
@@ -0,0 +1,22 @@
+-- Mart: Deprivation index — one row per URN
+-- Joins school postcode → LSOA → IDACI score
+
+with school_postcodes as (
+    select
+        urn,
+        postcode
+    from {{ ref('stg_gias_establishments') }}
+    where status = 'Open'
+      and postcode is not null
+)
+
+-- Note: The join between postcode and LSOA requires a postcode-to-LSOA
+-- lookup table. This will be populated by the geocode script or a seed.
+-- For now, this model serves as a placeholder that will be completed
+-- once the IDACI tap provides the postcode→LSOA mapping.
+
+select
+    i.lsoa_code,
+    i.idaci_score,
+    i.idaci_decile
+from {{ ref('stg_idaci') }} i
@@ -0,0 +1,11 @@
+-- Mart: School financial data — one row per URN per year
+
+select
+    urn,
+    year,
+    per_pupil_spend,
+    staff_cost_pct,
+    teacher_cost_pct,
+    support_staff_cost_pct,
+    premises_cost_pct
+from {{ ref('stg_fbit_finance') }}
@@ -0,0 +1,22 @@
+-- Mart: KS2 performance fact table — one row per URN per year
+-- Includes predecessor data via lineage resolution
+
+select
+    current_urn as urn,
+    source_urn,
+    year,
+    total_pupils,
+    rwm_expected_pct,
+    reading_expected_pct,
+    writing_expected_pct,
+    maths_expected_pct,
+    rwm_high_pct,
+    reading_high_pct,
+    writing_high_pct,
+    maths_high_pct,
+    reading_progress,
+    writing_progress,
+    maths_progress,
+    reading_avg_score,
+    maths_avg_score
+from {{ ref('int_ks2_with_lineage') }}
@@ -0,0 +1,16 @@
+-- Mart: KS4 performance fact table — one row per URN per year
+
+select
+    current_urn as urn,
+    source_urn,
+    year,
+    total_pupils,
+    progress_8_score,
+    attainment_8_score,
+    ebacc_entry_pct,
+    ebacc_achievement_pct,
+    english_strong_pass_pct,
+    maths_strong_pass_pct,
+    english_maths_strong_pass_pct,
+    staying_in_education_pct
+from {{ ref('int_ks4_with_lineage') }}
@@ -0,0 +1,25 @@
+-- Mart: Full Ofsted inspection history — one row per inspection
+
+select
+    urn,
+    inspection_date,
+    inspection_type,
+    framework,
+    overall_effectiveness,
+    quality_of_education,
+    behaviour_attitudes,
+    personal_development,
+    leadership_management,
+    early_years_provision,
+    sixth_form_provision,
+    rc_safeguarding_met,
+    rc_inclusion,
+    rc_curriculum_teaching,
+    rc_achievement,
+    rc_attendance_behaviour,
+    rc_personal_development,
+    rc_leadership_governance,
+    rc_early_years,
+    rc_sixth_form,
+    report_url
+from {{ ref('stg_ofsted_inspections') }}
@@ -0,0 +1,15 @@
+-- Mart: Parent View survey responses — one row per URN (latest survey)
+
+select
+    urn,
+    survey_date,
+    total_responses,
+    q_happy_pct,
+    q_safe_pct,
+    q_progress_pct,
+    q_well_taught_pct,
+    q_well_led_pct,
+    q_behaviour_pct,
+    q_bullying_pct,
+    q_recommend_pct
+from {{ ref('stg_parent_view') }}
@@ -0,0 +1,8 @@
+-- Mart: Phonics screening results — one row per URN per year
+
+select
+    urn,
+    year,
+    year1_phonics_pct,
+    year2_phonics_pct
+from {{ ref('stg_ees_phonics') }}
@@ -0,0 +1,18 @@
+-- Mart: Pupil characteristics — one row per URN per year
+
+select
+    urn,
+    year,
+    fsm_pct,
+    sen_support_pct,
+    sen_ehcp_pct,
+    eal_pct,
+    disadvantaged_pct,
+    ethnicity_white_pct,
+    ethnicity_asian_pct,
+    ethnicity_black_pct,
+    ethnicity_mixed_pct,
+    ethnicity_other_pct,
+    class_size_avg,
+    stability_pct
+from {{ ref('int_pupil_chars_merged') }}
@@ -0,0 +1,9 @@
+-- Mart: School predecessor/successor lineage map
+
+select
+    current_urn as urn,
+    predecessor_urn,
+    link_type,
+    link_date,
+    depth
+from {{ ref('int_school_lineage') }}
@@ -0,0 +1,69 @@
+version: 2
+
+sources:
+  - name: raw
+    description: Raw data loaded by Meltano Singer taps into the raw schema
+    schema: raw
+    tables:
+      - name: gias_establishments
+        description: GIAS bulk establishment data (one row per URN)
+        columns:
+          - name: urn
+            tests: [not_null, unique]
+
+      - name: gias_links
+        description: GIAS predecessor/successor links between schools
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ofsted_inspections
+        description: Ofsted Management Information inspection records
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ees_ks2
+        description: KS2 attainment data from Explore Education Statistics
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ees_ks4
+        description: KS4 attainment data from Explore Education Statistics
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ees_census
+        description: School census pupil characteristics
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ees_admissions
+        description: Primary and secondary school admissions data
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: ees_phonics
+        description: Phonics screening check results
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: parent_view
+        description: Ofsted Parent View survey responses
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: fbit_finance
+        description: Financial benchmarking data from FBIT API
+        columns:
+          - name: urn
+            tests: [not_null]
+
+      - name: idaci
+        description: Income Deprivation Affecting Children Index lookups
@@ -0,0 +1,19 @@
+-- Staging model: Primary and secondary school admissions from EES
+
+with source as (
+    select * from {{ source('raw', 'ees_admissions') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                              as urn,
+        cast(time_period as integer)                      as year,
+        cast(published_admission_number as integer)       as published_admission_number,
+        cast(total_applications as integer)               as total_applications,
+        cast(first_preference_offers_pct as numeric)      as first_preference_offers_pct,
+        cast(oversubscribed as boolean)                   as oversubscribed
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,27 @@
+-- Staging model: School census pupil characteristics from EES
+
+with source as (
+    select * from {{ source('raw', 'ees_census') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                        as urn,
+        cast(time_period as integer)                as year,
+        cast(fsm_pct as numeric)                    as fsm_pct,
+        cast(sen_support_pct as numeric)            as sen_support_pct,
+        cast(sen_ehcp_pct as numeric)               as sen_ehcp_pct,
+        cast(eal_pct as numeric)                    as eal_pct,
+        cast(disadvantaged_pct as numeric)          as disadvantaged_pct,
+        cast(ethnicity_white_pct as numeric)        as ethnicity_white_pct,
+        cast(ethnicity_asian_pct as numeric)        as ethnicity_asian_pct,
+        cast(ethnicity_black_pct as numeric)        as ethnicity_black_pct,
+        cast(ethnicity_mixed_pct as numeric)        as ethnicity_mixed_pct,
+        cast(ethnicity_other_pct as numeric)        as ethnicity_other_pct,
+        cast(class_size_avg as numeric)             as class_size_avg,
+        cast(stability_pct as numeric)              as stability_pct
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,31 @@
+-- Staging model: KS2 attainment data from EES
+-- Column names depend on the EES dataset schema; these will be finalised
+-- once the tap-uk-ees extractor resolves the actual column names.
+
+with source as (
+    select * from {{ source('raw', 'ees_ks2') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                    as urn,
+        cast(time_period as integer)            as year,
+        cast(t_pupils as integer)               as total_pupils,
+        cast(pt_rwm_met_expected_standard as numeric) as rwm_expected_pct,
+        cast(pt_read_met_expected_standard as numeric) as reading_expected_pct,
+        cast(pt_write_met_expected_standard as numeric) as writing_expected_pct,
+        cast(pt_maths_met_expected_standard as numeric) as maths_expected_pct,
+        cast(pt_rwm_met_higher_standard as numeric) as rwm_high_pct,
+        cast(pt_read_met_higher_standard as numeric) as reading_high_pct,
+        cast(pt_write_met_higher_standard as numeric) as writing_high_pct,
+        cast(pt_maths_met_higher_standard as numeric) as maths_high_pct,
+        cast(read_progress as numeric)          as reading_progress,
+        cast(write_progress as numeric)         as writing_progress,
+        cast(maths_progress as numeric)         as maths_progress,
+        cast(read_average_score as numeric)     as reading_avg_score,
+        cast(maths_average_score as numeric)    as maths_avg_score
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,24 @@
+-- Staging model: KS4 attainment data from EES (secondary schools — NEW)
+
+with source as (
+    select * from {{ source('raw', 'ees_ks4') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                        as urn,
+        cast(time_period as integer)                as year,
+        cast(t_pupils as integer)                   as total_pupils,
+        cast(progress_8_score as numeric)           as progress_8_score,
+        cast(attainment_8_score as numeric)         as attainment_8_score,
+        cast(ebacc_entry_pct as numeric)            as ebacc_entry_pct,
+        cast(ebacc_achievement_pct as numeric)      as ebacc_achievement_pct,
+        cast(english_strong_pass_pct as numeric)    as english_strong_pass_pct,
+        cast(maths_strong_pass_pct as numeric)      as maths_strong_pass_pct,
+        cast(english_maths_strong_pass_pct as numeric) as english_maths_strong_pass_pct,
+        cast(staying_in_education_pct as numeric)   as staying_in_education_pct
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,17 @@
+-- Staging model: Phonics screening check results from EES
+
+with source as (
+    select * from {{ source('raw', 'ees_phonics') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                    as urn,
+        cast(time_period as integer)            as year,
+        cast(year1_phonics_pct as numeric)      as year1_phonics_pct,
+        cast(year2_phonics_pct as numeric)      as year2_phonics_pct
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,20 @@
+-- Staging model: Financial benchmarking data from FBIT API
+
+with source as (
+    select * from {{ source('raw', 'fbit_finance') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                    as urn,
+        cast(year as integer)                   as year,
+        cast(per_pupil_spend as numeric)        as per_pupil_spend,
+        cast(staff_cost_pct as numeric)         as staff_cost_pct,
+        cast(teacher_cost_pct as numeric)       as teacher_cost_pct,
+        cast(support_staff_cost_pct as numeric) as support_staff_cost_pct,
+        cast(premises_cost_pct as numeric)      as premises_cost_pct
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,49 @@
+-- Staging model: GIAS establishments
+-- Light cleaning, type casting, column renaming from raw GIAS bulk CSV
+
+with source as (
+    select * from {{ source('raw', 'gias_establishments') }}
+),
+
+renamed as (
+    select
+        cast("URN" as integer)                          as urn,
+        cast("LA (code)" as integer)                    as local_authority_code,
+        "LA (name)"                                     as local_authority_name,
+        cast("EstablishmentNumber" as integer)          as establishment_number,
+        "EstablishmentName"                             as school_name,
+        "TypeOfEstablishment (name)"                    as school_type,
+        "PhaseOfEducation (name)"                       as phase,
+        "Gender (name)"                                 as gender,
+        "ReligiousCharacter (name)"                     as religious_character,
+        "AdmissionsPolicy (name)"                       as admissions_policy,
+        "SchoolCapacity"                                as capacity,
+        cast("NumberOfPupils" as integer)               as total_pupils,
+        "HeadTitle (name)"                              as head_title,
+        "HeadFirstName"                                 as head_first_name,
+        "HeadLastName"                                  as head_last_name,
+        "TelephoneNum"                                  as telephone,
+        "SchoolWebsite"                                 as website,
+        "Street"                                        as address_line1,
+        "Locality"                                      as address_line2,
+        "Town"                                          as town,
+        "County (name)"                                 as county,
+        "Postcode"                                      as postcode,
+        "EstablishmentStatus (name)"                    as status,
+        cast("OpenDate" as date)                        as open_date,
+        cast("CloseDate" as date)                       as close_date,
+        "Trusts (name)"                                 as academy_trust_name,
+        cast("Trusts (code)" as integer)                as academy_trust_uid,
+        "UrbanRural (name)"                             as urban_rural,
+        "ParliamentaryConstituency (name)"              as parliamentary_constituency,
+        "NurseryProvision (name)"                       as nursery_provision,
+        cast("Easting" as integer)                      as easting,
+        cast("Northing" as integer)                     as northing,
+        -- Age range
+        cast("StatutoryLowAge" as integer)              as statutory_low_age,
+        cast("StatutoryHighAge" as integer)             as statutory_high_age
+    from source
+    where "URN" is not null
+)
+
+select * from renamed
@@ -0,0 +1,18 @@
+-- Staging model: GIAS school links (predecessor/successor chains)
+
+with source as (
+    select * from {{ source('raw', 'gias_links') }}
+),
+
+renamed as (
+    select
+        cast("URN" as integer)              as urn,
+        cast("LinkURN" as integer)          as linked_urn,
+        "LinkType"                          as link_type,
+        cast("LinkEstablishedDate" as date) as link_date
+    from source
+    where "URN" is not null
+      and "LinkURN" is not null
+)
+
+select * from renamed
@@ -0,0 +1,15 @@
+-- Staging model: Income Deprivation Affecting Children Index
+
+with source as (
+    select * from {{ source('raw', 'idaci') }}
+),
+
+renamed as (
+    select
+        lsoa_code,
+        cast(idaci_score as numeric)    as idaci_score,
+        cast(idaci_decile as integer)   as idaci_decile
+    from source
+)
+
+select * from renamed
@@ -0,0 +1,40 @@
+-- Staging model: Ofsted inspection records
+-- Handles both OEIF (pre-Nov 2025) and Report Card (post-Nov 2025) frameworks
+
+with source as (
+    select * from {{ source('raw', 'ofsted_inspections') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                                as urn,
+        cast(inspection_date as date)                       as inspection_date,
+        inspection_type,
+        event_type_grouping                                 as framework,
+
+        -- OEIF grades (1-4 scale)
+        cast(overall_effectiveness as integer)              as overall_effectiveness,
+        cast(quality_of_education as integer)               as quality_of_education,
+        cast(behaviour_and_attitudes as integer)            as behaviour_attitudes,
+        cast(personal_development as integer)               as personal_development,
+        cast(effectiveness_of_leadership_and_management as integer) as leadership_management,
+        cast(early_years_provision as integer)              as early_years_provision,
+        cast(sixth_form_provision as integer)               as sixth_form_provision,
+
+        -- Report Card fields (populated for post-Nov 2025 inspections)
+        rc_safeguarding_met,
+        rc_inclusion,
+        rc_curriculum_teaching,
+        rc_achievement,
+        rc_attendance_behaviour,
+        rc_personal_development,
+        rc_leadership_governance,
+        rc_early_years,
+        rc_sixth_form,
+
+        report_url
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,24 @@
+-- Staging model: Ofsted Parent View survey responses
+
+with source as (
+    select * from {{ source('raw', 'parent_view') }}
+),
+
+renamed as (
+    select
+        cast(urn as integer)                    as urn,
+        cast(survey_date as date)               as survey_date,
+        cast(total_responses as integer)        as total_responses,
+        cast(q_happy_pct as numeric)            as q_happy_pct,
+        cast(q_safe_pct as numeric)             as q_safe_pct,
+        cast(q_progress_pct as numeric)         as q_progress_pct,
+        cast(q_well_taught_pct as numeric)      as q_well_taught_pct,
+        cast(q_well_led_pct as numeric)         as q_well_led_pct,
+        cast(q_behaviour_pct as numeric)        as q_behaviour_pct,
+        cast(q_bullying_pct as numeric)         as q_bullying_pct,
+        cast(q_recommend_pct as numeric)        as q_recommend_pct
+    from source
+    where urn is not null
+)
+
+select * from renamed
@@ -0,0 +1,22 @@
+school_compare:
+  target: dev
+  outputs:
+    dev:
+      type: postgres
+      host: "{{ env_var('PG_HOST', 'localhost') }}"
+      port: "{{ env_var('PG_PORT', '5432') | int }}"
+      user: "{{ env_var('PG_USER', 'postgres') }}"
+      password: "{{ env_var('PG_PASSWORD', 'postgres') }}"
+      dbname: "{{ env_var('PG_DATABASE', 'school_compare') }}"
+      schema: public
+      threads: 4
+
+    production:
+      type: postgres
+      host: "{{ env_var('PG_HOST') }}"
+      port: "{{ env_var('PG_PORT') | int }}"
+      user: "{{ env_var('PG_USER') }}"
+      password: "{{ env_var('PG_PASSWORD') }}"
+      dbname: "{{ env_var('PG_DATABASE') }}"
+      schema: public
+      threads: 4
@@ -0,0 +1,66 @@
+la_code,la_name
+201,City of London
+202,Camden
+203,Greenwich
+204,Hackney
+205,Hammersmith and Fulham
+206,Islington
+207,Kensington and Chelsea
+208,Lambeth
+209,Lewisham
+210,Merton
+211,Newham
+212,Tower Hamlets
+213,Wandsworth
+214,Westminster
+301,Barking and Dagenham
+302,Barnet
+303,Bexley
+304,Brent
+305,Bromley
+306,Croydon
+307,Ealing
+308,Enfield
+309,Haringey
+310,Harrow
+311,Havering
+312,Hillingdon
+313,Hounslow
+314,Kingston upon Thames
+315,Redbridge
+316,Richmond upon Thames
+317,Sutton
+318,Waltham Forest
+319,City of London
+320,City of London
+330,Birmingham
+331,Coventry
+332,Dudley
+333,Sandwell
+334,Solihull
+335,Walsall
+336,Wolverhampton
+340,Knowsley
+341,Liverpool
+342,St Helens
+343,Sefton
+344,Wirral
+350,Bolton
+351,Bury
+352,Manchester
+353,Oldham
+354,Rochdale
+355,Salford
+356,Stockport
+357,Tameside
+358,Trafford
+359,Wigan
+370,Barnsley
+371,Doncaster
+372,Rotherham
+373,Sheffield
+380,Bradford
+381,Calderdale
+382,Kirklees
+383,Leeds
+384,Wakefield
@@ -0,0 +1,30 @@
+type_code,type_name,type_group
+1,Community school,Maintained
+2,Voluntary aided school,Maintained
+3,Voluntary controlled school,Maintained
+5,Foundation school,Maintained
+6,City technology college,Independent
+7,Community special school,Special
+8,Non-maintained special school,Special
+10,Other independent school,Independent
+11,Other independent special school,Independent
+12,Foundation special school,Special
+14,Pupil referral unit,PRU
+24,Secure unit,Other
+25,Offshore school,Other
+26,Service children's education,Other
+28,Academy sponsor led,Academy
+33,Academy special sponsor led,Academy
+34,Academy converter,Academy
+35,Free schools,Academy
+36,Free schools special,Academy
+37,British schools overseas,Other
+38,Free schools - alternative provision,Academy
+39,Free schools - 16-19,Academy
+40,University technical college,Academy
+41,Studio school,Academy
+42,Academy alternative provision converter,Academy
+43,Academy alternative provision sponsor led,Academy
+44,Academy special converter,Academy
+46,Academy 16-19 converter,Academy
+47,Academy 16-19 sponsor led,Academy
@@ -0,0 +1,7 @@
+-- Custom test: All fact table URNs should exist in dim_school
+
+select f.urn
+from {{ ref('fact_ks2_performance') }} f
+left join {{ ref('dim_school') }} d on f.urn = d.urn
+where d.urn is null
+limit 10
@@ -0,0 +1,13 @@
+-- Custom test: All geocoded schools should have coordinates within the UK
+
+select
+    urn,
+    easting,
+    northing
+from {{ ref('dim_location') }}
+where easting is not null
+  and northing is not null
+  and (
+    easting < 0 or easting > 700000
+    or northing < 0 or northing > 1300000
+  )
				`@@ -0,0 +1 @@`
				`"""tap-uk-ees: Singer tap for Explore Education Statistics API."""`
				`@@ -0,0 +1 @@`
				`"""tap-uk-fbit: Singer tap for Financial Benchmarking and Insights Tool API."""`
				`@@ -0,0 +1 @@`
				`"""tap-uk-gias: Singer tap for GIAS bulk establishment data."""`
				`@@ -0,0 +1 @@`
				`"""tap-uk-idaci: Singer tap for IDACI (Income Deprivation Affecting Children Index)."""`
				`@@ -0,0 +1 @@`
				`"""tap-uk-ofsted: Singer tap for Ofsted Management Information."""`
				`@@ -0,0 +1 @@`
				`"""tap-uk-parent-view: Singer tap for Ofsted Parent View survey data."""`