diff --git a/backend/app.py b/backend/app.py index f4da070..fa772d5 100644 --- a/backend/app.py +++ b/backend/app.py @@ -4,6 +4,7 @@ Serves primary and secondary school performance data for comparing schools. Uses real data from UK Government Compare School Performance downloads. """ +import hashlib import re from contextlib import asynccontextmanager from typing import Optional @@ -12,6 +13,7 @@ import numpy as np import pandas as pd from fastapi import FastAPI, HTTPException, Query, Request, Depends, Header from fastapi.middleware.cors import CORSMiddleware +from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import FileResponse, Response from fastapi.staticfiles import StaticFiles from slowapi import Limiter, _rate_limit_exceeded_handler @@ -165,6 +167,69 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): return response +# Per-path Cache-Control rules. Keys are matched as path prefixes (longest wins). +# Values: (max_age, s_maxage, stale_while_revalidate) +CACHE_RULES: list[tuple[str, tuple[int, int, int]]] = [ + ("/api/filters", (300, 86400, 604800)), + ("/api/metrics", (300, 86400, 604800)), + ("/api/national-averages", (300, 86400, 604800)), + ("/api/la-averages", (300, 86400, 604800)), + ("/api/data-info", (300, 86400, 604800)), + ("/api/schools/", (300, 3600, 86400)), # /api/schools/{urn} + ("/api/rankings", (60, 600, 3600)), + ("/api/compare", (60, 600, 3600)), + ("/api/schools", (30, 300, 1800)), # search list +] + + +def _cache_control_for_path(path: str) -> Optional[str]: + # Longest-prefix match + best: Optional[tuple[int, tuple[int, int, int]]] = None + for prefix, vals in CACHE_RULES: + if path.startswith(prefix) and (best is None or len(prefix) > best[0]): + best = (len(prefix), vals) + if best is None: + return None + max_age, s_maxage, swr = best[1] + return f"public, max-age={max_age}, s-maxage={s_maxage}, stale-while-revalidate={swr}" + + +class CacheAndETagMiddleware(BaseHTTPMiddleware): + """Set Cache-Control on cacheable API responses and serve 304s via ETag.""" + + async def dispatch(self, request: Request, call_next): + response = await call_next(request) + + # Only cache GETs that succeeded. + if request.method != "GET" or response.status_code != 200: + return response + + cache_header = _cache_control_for_path(request.url.path) + if cache_header is None: + return response + + # Drain body so we can hash it for ETag. + body_chunks = [] + async for chunk in response.body_iterator: + body_chunks.append(chunk) + body = b"".join(body_chunks) + + etag = '"' + hashlib.md5(body).hexdigest() + '"' + headers = dict(response.headers) + headers["Cache-Control"] = cache_header + headers["ETag"] = etag + headers["Vary"] = ", ".join(filter(None, [headers.get("Vary"), "Accept-Encoding"])) + + inm = request.headers.get("if-none-match") + if inm and inm == etag: + # Strip content headers on 304. + for h in ("Content-Length", "content-length", "Content-Type", "content-type"): + headers.pop(h, None) + return Response(status_code=304, headers=headers) + + return Response(content=body, status_code=200, headers=headers, media_type=response.media_type) + + class RequestSizeLimitMiddleware(BaseHTTPMiddleware): """Limit request body size to prevent DoS attacks.""" @@ -253,9 +318,12 @@ app = FastAPI( app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) -# Security middleware (order matters - these run in reverse order) +# Middleware (Starlette runs the last-added middleware first on the way out, +# so list outermost-last: GZip wraps everything and compresses the final body). +app.add_middleware(CacheAndETagMiddleware) app.add_middleware(SecurityHeadersMiddleware) app.add_middleware(RequestSizeLimitMiddleware) +app.add_middleware(GZipMiddleware, minimum_size=512) # CORS middleware - restricted for production app.add_middleware( diff --git a/nextjs-app/app/compare/page.tsx b/nextjs-app/app/compare/page.tsx index 4cdf2ff..1a1ff38 100644 --- a/nextjs-app/app/compare/page.tsx +++ b/nextjs-app/app/compare/page.tsx @@ -20,8 +20,8 @@ export const metadata: Metadata = { keywords: 'school comparison, compare schools, KS2 comparison, primary school performance', }; -// Force dynamic rendering -export const dynamic = 'force-dynamic'; +// Dynamic via searchParams; remove force-dynamic so internal data fetches +// can still use Next.js's per-call revalidate cache. export default async function ComparePage({ searchParams }: ComparePageProps) { const { urns: urnsParam, metric: metricParam } = await searchParams; diff --git a/nextjs-app/app/layout.tsx b/nextjs-app/app/layout.tsx index d7229fd..0ee5b9b 100644 --- a/nextjs-app/app/layout.tsx +++ b/nextjs-app/app/layout.tsx @@ -74,8 +74,9 @@ export default function RootLayout({ return ( + +