54 lines
1.7 KiB
Python
54 lines
1.7 KiB
Python
|
|
"""
|
||
|
|
Shared EES (Explore Education Statistics) API client.
|
||
|
|
|
||
|
|
Base URL: https://api.education.gov.uk/statistics/v1
|
||
|
|
"""
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional
|
||
|
|
|
||
|
|
import requests
|
||
|
|
|
||
|
|
API_BASE = "https://api.education.gov.uk/statistics/v1"
|
||
|
|
TIMEOUT = 60
|
||
|
|
|
||
|
|
|
||
|
|
def get_publication_files(publication_slug: str) -> list[dict]:
|
||
|
|
"""Return list of data-set file descriptors for a publication."""
|
||
|
|
url = f"{API_BASE}/publications/{publication_slug}/data-set-files"
|
||
|
|
resp = requests.get(url, timeout=TIMEOUT)
|
||
|
|
resp.raise_for_status()
|
||
|
|
return resp.json().get("results", [])
|
||
|
|
|
||
|
|
|
||
|
|
def get_latest_csv_url(publication_slug: str, keyword: str = "") -> Optional[str]:
|
||
|
|
"""
|
||
|
|
Find the most recent CSV download URL for a publication.
|
||
|
|
Optionally filter by a keyword in the file name.
|
||
|
|
"""
|
||
|
|
files = get_publication_files(publication_slug)
|
||
|
|
for entry in files:
|
||
|
|
name = entry.get("name", "").lower()
|
||
|
|
if keyword and keyword.lower() not in name:
|
||
|
|
continue
|
||
|
|
csv_url = entry.get("csvDownloadUrl") or entry.get("file", {}).get("url")
|
||
|
|
if csv_url:
|
||
|
|
return csv_url
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def download_csv(url: str, dest_path: Path) -> Path:
|
||
|
|
"""Download a CSV from EES to dest_path."""
|
||
|
|
if dest_path.exists():
|
||
|
|
print(f" EES: {dest_path.name} already exists, skipping.")
|
||
|
|
return dest_path
|
||
|
|
print(f" EES: downloading {url} ...")
|
||
|
|
resp = requests.get(url, timeout=300, stream=True)
|
||
|
|
resp.raise_for_status()
|
||
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
with open(dest_path, "wb") as f:
|
||
|
|
for chunk in resp.iter_content(chunk_size=65536):
|
||
|
|
f.write(chunk)
|
||
|
|
print(f" EES: saved {dest_path} ({dest_path.stat().st_size // 1024} KB)")
|
||
|
|
return dest_path
|