Files
school_compare/scripts/migrate_csv_to_db.py

69 lines
2.0 KiB
Python
Raw Normal View History

2026-01-06 17:22:26 +00:00
#!/usr/bin/env python3
"""
CLI script for manual database migration.
2026-01-06 17:22:26 +00:00
Usage:
python scripts/migrate_csv_to_db.py [--drop] [--geocode]
Options:
--drop Drop existing tables before migration (full reimport)
2026-01-06 17:22:26 +00:00
--geocode Geocode postcodes (requires network access)
"""
import sys
2026-01-06 17:22:26 +00:00
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
import argparse
from backend.config import settings
from backend.database import Base, engine, init_db, set_db_schema_version
from backend.migration import load_csv_data, migrate_data, run_full_migration
from backend.version import SCHEMA_VERSION
2026-01-06 17:22:26 +00:00
def main():
parser = argparse.ArgumentParser(
description="Migrate CSV data to PostgreSQL database"
)
parser.add_argument(
"--drop", action="store_true", help="Drop existing tables before migration"
)
parser.add_argument("--geocode", action="store_true", help="Geocode postcodes")
2026-01-06 17:22:26 +00:00
args = parser.parse_args()
2026-01-06 17:22:26 +00:00
print("=" * 60)
print("School Data Migration: CSV -> PostgreSQL")
print("=" * 60)
print(f"\nDatabase: {settings.database_url.split('@')[-1]}")
print(f"Data directory: {settings.data_dir}")
print(f"Target schema version: {SCHEMA_VERSION}")
2026-01-06 17:22:26 +00:00
if args.drop:
print("\nRunning full migration (drop + reimport)...")
success = run_full_migration(geocode=args.geocode)
else:
print("\nCreating tables (preserving existing data)...")
init_db()
print("\nLoading CSV data...")
df = load_csv_data(settings.data_dir)
if df.empty:
print("No data found to migrate!")
return 1
migrate_data(df, geocode=args.geocode)
success = True
if success:
# Ensure schema_version table exists
init_db()
set_db_schema_version(SCHEMA_VERSION)
print(f"\nSchema version set to {SCHEMA_VERSION}")
return 0 if success else 1
if __name__ == "__main__":
2026-01-06 17:22:26 +00:00
sys.exit(main())