fixing migration script
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 55s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 55s
This commit is contained in:
@@ -177,8 +177,14 @@ def load_csv_data(data_dir: Path) -> pd.DataFrame:
|
||||
def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
||||
"""Migrate DataFrame data to database."""
|
||||
|
||||
# Group by URN to get unique schools
|
||||
school_data = df.groupby('urn').first().reset_index()
|
||||
# Clean URN column - convert to integer, drop invalid values
|
||||
df = df.copy()
|
||||
df['urn'] = pd.to_numeric(df['urn'], errors='coerce')
|
||||
df = df.dropna(subset=['urn'])
|
||||
df['urn'] = df['urn'].astype(int)
|
||||
|
||||
# Group by URN to get unique schools (use latest year's data)
|
||||
school_data = df.sort_values('year', ascending=False).groupby('urn').first().reset_index()
|
||||
print(f"\nMigrating {len(school_data)} unique schools...")
|
||||
|
||||
# Geocode if requested
|
||||
@@ -208,6 +214,10 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
||||
if not urn:
|
||||
continue
|
||||
|
||||
# Skip if we've already added this URN (handles duplicates in source data)
|
||||
if urn in urn_to_school_id:
|
||||
continue
|
||||
|
||||
# Get geocoding data
|
||||
postcode = row.get('postcode')
|
||||
lat, lon = None, None
|
||||
|
||||
Reference in New Issue
Block a user