diff --git a/scripts/migrate_csv_to_db.py b/scripts/migrate_csv_to_db.py index 7891bd3..bd1f3b1 100644 --- a/scripts/migrate_csv_to_db.py +++ b/scripts/migrate_csv_to_db.py @@ -195,7 +195,16 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False): schools_created = 0 for _, row in school_data.iterrows(): - urn = int(row['urn']) if pd.notna(row.get('urn')) else None + # Safely parse URN - handle None, NaN, whitespace, and invalid values + urn_val = row.get('urn') + urn = None + if pd.notna(urn_val): + try: + urn_str = str(urn_val).strip() + if urn_str: + urn = int(float(urn_str)) # Handle "12345.0" format + except (ValueError, TypeError): + pass if not urn: continue @@ -207,11 +216,22 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False): if coords: lat, lon = coords + # Safely parse local_authority_code + la_code = None + la_code_val = row.get('local_authority_code') + if pd.notna(la_code_val): + try: + la_code_str = str(la_code_val).strip() + if la_code_str: + la_code = int(float(la_code_str)) + except (ValueError, TypeError): + pass + school = School( urn=urn, school_name=row.get('school_name') if pd.notna(row.get('school_name')) else 'Unknown', local_authority=row.get('local_authority') if pd.notna(row.get('local_authority')) else None, - local_authority_code=int(row.get('local_authority_code')) if pd.notna(row.get('local_authority_code')) else None, + local_authority_code=la_code, school_type=row.get('school_type') if pd.notna(row.get('school_type')) else None, school_type_code=row.get('school_type_code') if pd.notna(row.get('school_type_code')) else None, religious_denomination=row.get('religious_denomination') if pd.notna(row.get('religious_denomination')) else None, @@ -238,12 +258,29 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False): results_created = 0 for _, row in df.iterrows(): - urn = int(row['urn']) if pd.notna(row.get('urn')) else None + # Safely parse URN + urn_val = row.get('urn') + urn = None + if pd.notna(urn_val): + try: + urn_str = str(urn_val).strip() + if urn_str: + urn = int(float(urn_str)) + except (ValueError, TypeError): + pass if not urn or urn not in urn_to_school_id: continue school_id = urn_to_school_id[urn] - year = int(row['year']) if pd.notna(row.get('year')) else None + + # Safely parse year + year_val = row.get('year') + year = None + if pd.notna(year_val): + try: + year = int(float(str(year_val).strip())) + except (ValueError, TypeError): + pass if not year: continue