fixing migration script
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 55s

This commit is contained in:
Tudor Sitaru
2026-01-06 21:12:47 +00:00
parent 822feaf494
commit 35e661d732

View File

@@ -177,8 +177,14 @@ def load_csv_data(data_dir: Path) -> pd.DataFrame:
def migrate_data(df: pd.DataFrame, geocode: bool = False):
"""Migrate DataFrame data to database."""
# Group by URN to get unique schools
school_data = df.groupby('urn').first().reset_index()
# Clean URN column - convert to integer, drop invalid values
df = df.copy()
df['urn'] = pd.to_numeric(df['urn'], errors='coerce')
df = df.dropna(subset=['urn'])
df['urn'] = df['urn'].astype(int)
# Group by URN to get unique schools (use latest year's data)
school_data = df.sort_values('year', ascending=False).groupby('urn').first().reset_index()
print(f"\nMigrating {len(school_data)} unique schools...")
# Geocode if requested
@@ -208,6 +214,10 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
if not urn:
continue
# Skip if we've already added this URN (handles duplicates in source data)
if urn in urn_to_school_id:
continue
# Get geocoding data
postcode = row.get('postcode')
lat, lon = None, None