feat: migrate backend to marts schema, update EES tap for verified datasets
Pipeline: - EES tap: split KS4 into performance + info streams, fix admissions filename (SchoolLevel keyword match), fix census filename (yearly suffix), remove phonics (no school-level data on EES), change endswith → in for matching - stg_ees_ks4: rewrite to filter long-format data and extract Attainment 8, Progress 8, EBacc, English/Maths metrics; join KS4 info for context - stg_ees_admissions: map real CSV columns (total_number_places_offered, etc.) - stg_ees_census: update source reference, stub with TODO for data columns - Remove stg_ees_phonics, fact_phonics (no school-level EES data) - Add ees_ks4_performance + ees_ks4_info sources, remove ees_ks4 + ees_phonics - Update int_ks4_with_lineage + fact_ks4_performance with new KS4 columns - Annual EES DAG: remove stg_ees_phonics+ from selector Backend: - models.py: replace all models to point at marts.* tables with schema='marts' (DimSchool, DimLocation, KS2Performance, FactOfstedInspection, etc.) - data_loader.py: rewrite load_school_data_as_dataframe() using raw SQL joining dim_school + dim_location + fact_ks2_performance; update get_supplementary_data() - database.py: remove migration machinery, keep only connection setup - app.py: remove check_and_migrate_if_needed, remove /api/admin/reimport-ks2 endpoints (pipeline handles all imports) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,8 +28,6 @@ from .data_loader import (
|
||||
get_supplementary_data,
|
||||
)
|
||||
from .data_loader import get_data_info as get_db_info
|
||||
from .database import check_and_migrate_if_needed
|
||||
from .migration import run_full_migration
|
||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||
from .utils import clean_for_json
|
||||
|
||||
@@ -138,20 +136,15 @@ def validate_postcode(postcode: Optional[str]) -> Optional[str]:
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan - startup and shutdown events."""
|
||||
# Startup: check schema version and migrate if needed
|
||||
print("Starting up: Checking database schema...")
|
||||
check_and_migrate_if_needed()
|
||||
|
||||
print("Loading school data from database...")
|
||||
print("Loading school data from marts...")
|
||||
df = load_school_data()
|
||||
if df.empty:
|
||||
print("Warning: No data in database. Check CSV files in data/ folder.")
|
||||
print("Warning: No data in marts. Run the annual EES pipeline to populate KS2 data.")
|
||||
else:
|
||||
print(f"Data loaded successfully: {len(df)} records.")
|
||||
|
||||
yield # Application runs here
|
||||
yield
|
||||
|
||||
# Shutdown: cleanup if needed
|
||||
print("Shutting down...")
|
||||
|
||||
|
||||
@@ -585,7 +578,7 @@ async def get_data_info(request: Request):
|
||||
if db_info["total_schools"] == 0:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"message": "No data in database. Run the migration script: python scripts/migrate_csv_to_db.py",
|
||||
"message": "No data in marts. Run the annual EES pipeline to load KS2 data.",
|
||||
"data_source": "PostgreSQL",
|
||||
}
|
||||
|
||||
@@ -635,56 +628,6 @@ async def reload_data(
|
||||
return {"status": "reloaded"}
|
||||
|
||||
|
||||
_reimport_status: dict = {"running": False, "done": False, "error": None}
|
||||
|
||||
|
||||
@app.post("/api/admin/reimport-ks2")
|
||||
@limiter.limit("2/minute")
|
||||
async def reimport_ks2(
|
||||
request: Request,
|
||||
geocode: bool = True,
|
||||
_: bool = Depends(verify_admin_api_key)
|
||||
):
|
||||
"""
|
||||
Start a full KS2 CSV migration in the background and return immediately.
|
||||
Poll GET /api/admin/reimport-ks2/status to check progress.
|
||||
Pass ?geocode=false to skip postcode → lat/lng resolution.
|
||||
Requires X-API-Key header with valid admin API key.
|
||||
"""
|
||||
global _reimport_status
|
||||
if _reimport_status["running"]:
|
||||
return {"status": "already_running"}
|
||||
|
||||
_reimport_status = {"running": True, "done": False, "error": None}
|
||||
|
||||
def _run():
|
||||
global _reimport_status
|
||||
try:
|
||||
success = run_full_migration(geocode=geocode)
|
||||
if not success:
|
||||
_reimport_status = {"running": False, "done": False, "error": "No CSV data found"}
|
||||
return
|
||||
clear_cache()
|
||||
load_school_data()
|
||||
_reimport_status = {"running": False, "done": True, "error": None}
|
||||
except Exception as exc:
|
||||
_reimport_status = {"running": False, "done": False, "error": str(exc)}
|
||||
|
||||
import threading
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return {"status": "started"}
|
||||
|
||||
|
||||
@app.get("/api/admin/reimport-ks2/status")
|
||||
async def reimport_ks2_status(
|
||||
request: Request,
|
||||
_: bool = Depends(verify_admin_api_key)
|
||||
):
|
||||
"""Poll this endpoint to check reimport progress."""
|
||||
s = _reimport_status
|
||||
if s["error"]:
|
||||
raise HTTPException(status_code=500, detail=s["error"])
|
||||
return {"running": s["running"], "done": s["done"]}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user