feat: migrate backend to marts schema, update EES tap for verified datasets
Pipeline: - EES tap: split KS4 into performance + info streams, fix admissions filename (SchoolLevel keyword match), fix census filename (yearly suffix), remove phonics (no school-level data on EES), change endswith → in for matching - stg_ees_ks4: rewrite to filter long-format data and extract Attainment 8, Progress 8, EBacc, English/Maths metrics; join KS4 info for context - stg_ees_admissions: map real CSV columns (total_number_places_offered, etc.) - stg_ees_census: update source reference, stub with TODO for data columns - Remove stg_ees_phonics, fact_phonics (no school-level EES data) - Add ees_ks4_performance + ees_ks4_info sources, remove ees_ks4 + ees_phonics - Update int_ks4_with_lineage + fact_ks4_performance with new KS4 columns - Annual EES DAG: remove stg_ees_phonics+ from selector Backend: - models.py: replace all models to point at marts.* tables with schema='marts' (DimSchool, DimLocation, KS2Performance, FactOfstedInspection, etc.) - data_loader.py: rewrite load_school_data_as_dataframe() using raw SQL joining dim_school + dim_location + fact_ks2_performance; update get_supplementary_data() - database.py: remove migration machinery, keep only connection setup - app.py: remove check_and_migrate_if_needed, remove /api/admin/reimport-ks2 endpoints (pipeline handles all imports) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,36 +1,30 @@
|
||||
"""
|
||||
Database connection setup using SQLAlchemy.
|
||||
The schema is managed by dbt — the backend only reads from marts.* tables.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import create_engine, inspect
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
from contextlib import contextmanager
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
|
||||
from .config import settings
|
||||
|
||||
# Create engine
|
||||
engine = create_engine(
|
||||
settings.database_url,
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
pool_pre_ping=True, # Verify connections before use
|
||||
echo=False, # Set to True for SQL debugging
|
||||
pool_pre_ping=True,
|
||||
echo=False,
|
||||
)
|
||||
|
||||
# Session factory
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
# Base class for models
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
def get_db():
|
||||
"""
|
||||
Dependency for FastAPI routes to get a database session.
|
||||
"""
|
||||
"""Dependency for FastAPI routes."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
@@ -40,10 +34,7 @@ def get_db():
|
||||
|
||||
@contextmanager
|
||||
def get_db_session():
|
||||
"""
|
||||
Context manager for database sessions.
|
||||
Use in non-FastAPI contexts (scripts, etc).
|
||||
"""
|
||||
"""Context manager for non-FastAPI contexts."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
@@ -53,95 +44,3 @@ def get_db_session():
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def init_db():
|
||||
"""
|
||||
Initialize database - create all tables.
|
||||
"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def drop_db():
|
||||
"""
|
||||
Drop all tables - use with caution!
|
||||
"""
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
|
||||
|
||||
def get_db_schema_version() -> Optional[int]:
|
||||
"""
|
||||
Get the current schema version from the database.
|
||||
Returns None if table doesn't exist or no version is set.
|
||||
"""
|
||||
from .models import SchemaVersion # Import here to avoid circular imports
|
||||
|
||||
# Check if schema_version table exists
|
||||
inspector = inspect(engine)
|
||||
if "schema_version" not in inspector.get_table_names():
|
||||
return None
|
||||
|
||||
try:
|
||||
with get_db_session() as db:
|
||||
row = db.query(SchemaVersion).first()
|
||||
return row.version if row else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def set_db_schema_version(version: int):
|
||||
"""
|
||||
Set/update the schema version in the database.
|
||||
Creates the row if it doesn't exist.
|
||||
"""
|
||||
from .models import SchemaVersion
|
||||
|
||||
with get_db_session() as db:
|
||||
row = db.query(SchemaVersion).first()
|
||||
if row:
|
||||
row.version = version
|
||||
row.migrated_at = datetime.utcnow()
|
||||
else:
|
||||
db.add(SchemaVersion(id=1, version=version, migrated_at=datetime.utcnow()))
|
||||
|
||||
|
||||
def check_and_migrate_if_needed():
|
||||
"""
|
||||
Check schema version and run migration if needed.
|
||||
Called during application startup.
|
||||
"""
|
||||
from .version import SCHEMA_VERSION
|
||||
from .migration import run_full_migration
|
||||
|
||||
db_version = get_db_schema_version()
|
||||
|
||||
if db_version == SCHEMA_VERSION:
|
||||
print(f"Schema version {SCHEMA_VERSION} matches. Fast startup.")
|
||||
# Still ensure tables exist (they should if version matches)
|
||||
init_db()
|
||||
return
|
||||
|
||||
if db_version is None:
|
||||
print(f"No schema version found. Running initial migration (v{SCHEMA_VERSION})...")
|
||||
else:
|
||||
print(f"Schema mismatch: DB has v{db_version}, code expects v{SCHEMA_VERSION}")
|
||||
print("Running full migration...")
|
||||
|
||||
try:
|
||||
# Set schema version BEFORE migration so a crash mid-migration
|
||||
# doesn't cause an infinite re-migration loop on every restart.
|
||||
init_db()
|
||||
set_db_schema_version(SCHEMA_VERSION)
|
||||
|
||||
success = run_full_migration(geocode=False)
|
||||
|
||||
if success:
|
||||
print(f"Migration complete. Schema version {SCHEMA_VERSION}.")
|
||||
else:
|
||||
print("Warning: Migration completed but no data was imported.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"FATAL: Migration failed: {e}")
|
||||
print("Application cannot start. Please check database and CSV files.")
|
||||
raise
|
||||
|
||||
|
||||
Reference in New Issue
Block a user