fix(admissions): switch to EES content API + correct publication slug and columns

The EES statistics API only exposes ~13 publications; admissions data is not among them. Switch to the EES content API (content.explore-education-statistics. service.gov.uk) which covers all publications. - ees.py: add get_content_release_id() and download_release_zip_csv() that fetch the release ZIP and extract a named CSV member from it - admissions.py: use corrected slug (primary-and-secondary-school-applications- and-offers), correct column names from actual CSV (school_urn, total_number_places_offered, times_put_as_1st_preference, etc.), derive first_preference_offers_pct from offer/application ratio, filter to primary schools only, keep most recent year per URN Also includes SchoolDetailView UX redesign: parent-first section ordering, plain-English labels, national average benchmarks, progress score colour coding, expanded header, quick summary strip, and CSS consolidation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-25 10:06:36 +00:00
parent 00dca39fbd
commit b68063c9b9
5 changed files with 951 additions and 652 deletions
--- a/integrator/scripts/sources/ees.py
+++ b/integrator/scripts/sources/ees.py
@@ -1,21 +1,27 @@
 """
 Shared EES (Explore Education Statistics) API client.

-Base URL: https://api.education.gov.uk/statistics/v1
+Two APIs are available:
+  - Statistics API: https://api.education.gov.uk/statistics/v1  (only ~13 publications)
+  - Content API:    https://content.explore-education-statistics.service.gov.uk/api
+    Covers all publications; use this for admissions and other data not in the stats API.
+    Download all files for a release as a ZIP from /api/releases/{id}/files.
 """
-import sys
+import io
+import zipfile
 from pathlib import Path
 from typing import Optional

 import requests

-API_BASE = "https://api.education.gov.uk/statistics/v1"
+STATS_API_BASE = "https://api.education.gov.uk/statistics/v1"
+CONTENT_API_BASE = "https://content.explore-education-statistics.service.gov.uk/api"
 TIMEOUT = 60


 def get_publication_files(publication_slug: str) -> list[dict]:
-    """Return list of data-set file descriptors for a publication."""
-    url = f"{API_BASE}/publications/{publication_slug}/data-set-files"
+    """Return list of data-set file descriptors for a publication (statistics API)."""
+    url = f"{STATS_API_BASE}/publications/{publication_slug}/data-set-files"
    resp = requests.get(url, timeout=TIMEOUT)
    resp.raise_for_status()
    return resp.json().get("results", [])
@@ -23,7 +29,7 @@ def get_publication_files(publication_slug: str) -> list[dict]:

 def get_latest_csv_url(publication_slug: str, keyword: str = "") -> Optional[str]:
    """
-    Find the most recent CSV download URL for a publication.
+    Find the most recent CSV download URL for a publication (statistics API).
    Optionally filter by a keyword in the file name.
    """
    files = get_publication_files(publication_slug)
@@ -37,6 +43,58 @@ def get_latest_csv_url(publication_slug: str, keyword: str = "") -> Optional[str
    return None


+def get_content_release_id(publication_slug: str) -> str:
+    """Return the latest release ID for a publication via the content API."""
+    url = f"{CONTENT_API_BASE}/publications/{publication_slug}/releases/latest"
+    resp = requests.get(url, timeout=TIMEOUT)
+    resp.raise_for_status()
+    return resp.json()["id"]
+
+
+def download_release_zip_csv(
+    publication_slug: str,
+    dest_path: Path,
+    zip_member_keyword: str = "",
+) -> Path:
+    """
+    Download the full-release ZIP from the EES content API and extract one CSV.
+
+    If zip_member_keyword is given, the first member whose path contains that
+    keyword (case-insensitive) is extracted; otherwise the first .csv found is used.
+    Returns dest_path (the extracted CSV file).
+    """
+    if dest_path.exists():
+        print(f"    EES: {dest_path.name} already exists, skipping.")
+        return dest_path
+
+    release_id = get_content_release_id(publication_slug)
+    zip_url = f"{CONTENT_API_BASE}/releases/{release_id}/files"
+    print(f"    EES: downloading release ZIP for '{publication_slug}' ...")
+    resp = requests.get(zip_url, timeout=300, stream=True)
+    resp.raise_for_status()
+
+    data = b"".join(resp.iter_content(chunk_size=65536))
+    with zipfile.ZipFile(io.BytesIO(data)) as z:
+        members = z.namelist()
+        target = None
+        kw = zip_member_keyword.lower()
+        for m in members:
+            if m.endswith(".csv") and (not kw or kw in m.lower()):
+                target = m
+                break
+        if not target:
+            raise ValueError(
+                f"No CSV matching '{zip_member_keyword}' in ZIP. Members: {members}"
+            )
+        print(f"    EES: extracting '{target}' ...")
+        dest_path.parent.mkdir(parents=True, exist_ok=True)
+        with z.open(target) as src, open(dest_path, "wb") as dst:
+            dst.write(src.read())
+
+    print(f"    EES: saved {dest_path} ({dest_path.stat().st_size // 1024} KB)")
+    return dest_path
+
+
 def download_csv(url: str, dest_path: Path) -> Path:
    """Download a CSV from EES to dest_path."""
    if dest_path.exists():