Fix migration script to handle percentage signs in CSV data
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 55s
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 55s
- Updated parse_numeric() to strip percentage signs before parsing - This fixes the issue where all percentage metrics were showing as NULL/empty - School cards will now display actual performance data after re-running migration
This commit is contained in:
@@ -10,23 +10,28 @@ Options:
|
|||||||
--geocode Geocode postcodes (requires network access)
|
--geocode Geocode postcodes (requires network access)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Add parent directory to path for imports
|
# Add parent directory to path for imports
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import re
|
import re
|
||||||
from typing import Optional, Dict
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from backend.config import settings
|
from backend.config import settings
|
||||||
from backend.database import engine, Base, get_db_session
|
from backend.models import (
|
||||||
from backend.models import School, SchoolResult, SCHOOL_FIELD_MAPPING, RESULT_FIELD_MAPPING
|
School,
|
||||||
|
SchoolResult,
|
||||||
|
SCHOOL_FIELD_MAPPING,
|
||||||
|
RESULT_FIELD_MAPPING,
|
||||||
|
)
|
||||||
from backend.schemas import (
|
from backend.schemas import (
|
||||||
COLUMN_MAPPINGS,
|
COLUMN_MAPPINGS,
|
||||||
NUMERIC_COLUMNS,
|
NUMERIC_COLUMNS,
|
||||||
@@ -43,8 +48,10 @@ def parse_numeric(value) -> Optional[float]:
|
|||||||
if isinstance(value, (int, float)):
|
if isinstance(value, (int, float)):
|
||||||
return float(value) if not np.isnan(value) else None
|
return float(value) if not np.isnan(value) else None
|
||||||
str_val = str(value).strip().upper()
|
str_val = str(value).strip().upper()
|
||||||
if str_val in NULL_VALUES or str_val == '':
|
if str_val in NULL_VALUES or str_val == "":
|
||||||
return None
|
return None
|
||||||
|
# Remove percentage signs if present
|
||||||
|
str_val = str_val.replace("%", "")
|
||||||
try:
|
try:
|
||||||
return float(str_val)
|
return float(str_val)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -53,10 +60,10 @@ def parse_numeric(value) -> Optional[float]:
|
|||||||
|
|
||||||
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
def extract_year_from_folder(folder_name: str) -> Optional[int]:
|
||||||
"""Extract year from folder name like '2023-2024'."""
|
"""Extract year from folder name like '2023-2024'."""
|
||||||
match = re.search(r'(\d{4})-(\d{4})', folder_name)
|
match = re.search(r"(\d{4})-(\d{4})", folder_name)
|
||||||
if match:
|
if match:
|
||||||
return int(match.group(2))
|
return int(match.group(2))
|
||||||
match = re.search(r'(\d{4})', folder_name)
|
match = re.search(r"(\d{4})", folder_name)
|
||||||
if match:
|
if match:
|
||||||
return int(match.group(1))
|
return int(match.group(1))
|
||||||
return None
|
return None
|
||||||
@@ -68,7 +75,11 @@ def geocode_postcodes_bulk(postcodes: list) -> Dict[str, tuple]:
|
|||||||
Returns dict of postcode -> (latitude, longitude).
|
Returns dict of postcode -> (latitude, longitude).
|
||||||
"""
|
"""
|
||||||
results = {}
|
results = {}
|
||||||
valid_postcodes = [p.strip().upper() for p in postcodes if p and isinstance(p, str) and len(p.strip()) >= 5]
|
valid_postcodes = [
|
||||||
|
p.strip().upper()
|
||||||
|
for p in postcodes
|
||||||
|
if p and isinstance(p, str) and len(p.strip()) >= 5
|
||||||
|
]
|
||||||
valid_postcodes = list(set(valid_postcodes))
|
valid_postcodes = list(set(valid_postcodes))
|
||||||
|
|
||||||
if not valid_postcodes:
|
if not valid_postcodes:
|
||||||
@@ -79,21 +90,23 @@ def geocode_postcodes_bulk(postcodes: list) -> Dict[str, tuple]:
|
|||||||
|
|
||||||
for i, batch_start in enumerate(range(0, len(valid_postcodes), batch_size)):
|
for i, batch_start in enumerate(range(0, len(valid_postcodes), batch_size)):
|
||||||
batch = valid_postcodes[batch_start : batch_start + batch_size]
|
batch = valid_postcodes[batch_start : batch_start + batch_size]
|
||||||
print(f" Geocoding batch {i+1}/{total_batches} ({len(batch)} postcodes)...")
|
print(
|
||||||
|
f" Geocoding batch {i + 1}/{total_batches} ({len(batch)} postcodes)..."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
'https://api.postcodes.io/postcodes',
|
"https://api.postcodes.io/postcodes",
|
||||||
json={'postcodes': batch},
|
json={"postcodes": batch},
|
||||||
timeout=30
|
timeout=30,
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
data = response.json()
|
||||||
for item in data.get('result', []):
|
for item in data.get("result", []):
|
||||||
if item and item.get('result'):
|
if item and item.get("result"):
|
||||||
pc = item['query'].upper()
|
pc = item["query"].upper()
|
||||||
lat = item['result'].get('latitude')
|
lat = item["result"].get("latitude")
|
||||||
lon = item['result'].get('longitude')
|
lon = item["result"].get("longitude")
|
||||||
if lat and lon:
|
if lat and lon:
|
||||||
results[pc] = (lat, lon)
|
results[pc] = (lat, lon)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -123,42 +136,59 @@ def load_csv_data(data_dir: Path) -> pd.DataFrame:
|
|||||||
print(f" Loading {csv_file.name} (year {year})...")
|
print(f" Loading {csv_file.name} (year {year})...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
df = pd.read_csv(csv_file, encoding='latin-1', low_memory=False)
|
df = pd.read_csv(csv_file, encoding="latin-1", low_memory=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Error loading {csv_file}: {e}")
|
print(f" Error loading {csv_file}: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Rename columns
|
# Rename columns
|
||||||
df.rename(columns=COLUMN_MAPPINGS, inplace=True)
|
df.rename(columns=COLUMN_MAPPINGS, inplace=True)
|
||||||
df['year'] = year
|
df["year"] = year
|
||||||
|
|
||||||
# Handle local authority name
|
# Handle local authority name
|
||||||
la_name_cols = ['LANAME', 'LA (name)', 'LA_NAME', 'LA NAME']
|
la_name_cols = ["LANAME", "LA (name)", "LA_NAME", "LA NAME"]
|
||||||
la_name_col = next((c for c in la_name_cols if c in df.columns), None)
|
la_name_col = next((c for c in la_name_cols if c in df.columns), None)
|
||||||
|
|
||||||
if la_name_col and la_name_col != 'local_authority':
|
if la_name_col and la_name_col != "local_authority":
|
||||||
df['local_authority'] = df[la_name_col]
|
df["local_authority"] = df[la_name_col]
|
||||||
elif 'LEA' in df.columns:
|
elif "LEA" in df.columns:
|
||||||
df['local_authority_code'] = pd.to_numeric(df['LEA'], errors='coerce')
|
df["local_authority_code"] = pd.to_numeric(df["LEA"], errors="coerce")
|
||||||
df['local_authority'] = df['local_authority_code'].map(LA_CODE_TO_NAME).fillna(df['LEA'].astype(str))
|
df["local_authority"] = (
|
||||||
|
df["local_authority_code"]
|
||||||
|
.map(LA_CODE_TO_NAME)
|
||||||
|
.fillna(df["LEA"].astype(str))
|
||||||
|
)
|
||||||
|
|
||||||
# Store LEA code
|
# Store LEA code
|
||||||
if 'LEA' in df.columns:
|
if "LEA" in df.columns:
|
||||||
df['local_authority_code'] = pd.to_numeric(df['LEA'], errors='coerce')
|
df["local_authority_code"] = pd.to_numeric(df["LEA"], errors="coerce")
|
||||||
|
|
||||||
# Map school type
|
# Map school type
|
||||||
if 'school_type_code' in df.columns:
|
if "school_type_code" in df.columns:
|
||||||
df['school_type'] = df['school_type_code'].map(SCHOOL_TYPE_MAP).fillna(df['school_type_code'])
|
df["school_type"] = (
|
||||||
|
df["school_type_code"]
|
||||||
|
.map(SCHOOL_TYPE_MAP)
|
||||||
|
.fillna(df["school_type_code"])
|
||||||
|
)
|
||||||
|
|
||||||
# Create combined address
|
# Create combined address
|
||||||
addr_parts = ['address1', 'address2', 'town', 'postcode']
|
addr_parts = ["address1", "address2", "town", "postcode"]
|
||||||
for col in addr_parts:
|
for col in addr_parts:
|
||||||
if col not in df.columns:
|
if col not in df.columns:
|
||||||
df[col] = None
|
df[col] = None
|
||||||
|
|
||||||
df['address'] = df.apply(
|
df["address"] = df.apply(
|
||||||
lambda r: ', '.join(str(v) for v in [r.get('address1'), r.get('address2'), r.get('town'), r.get('postcode')] if pd.notna(v) and str(v).strip()),
|
lambda r: ", ".join(
|
||||||
axis=1
|
str(v)
|
||||||
|
for v in [
|
||||||
|
r.get("address1"),
|
||||||
|
r.get("address2"),
|
||||||
|
r.get("town"),
|
||||||
|
r.get("postcode"),
|
||||||
|
]
|
||||||
|
if pd.notna(v) and str(v).strip()
|
||||||
|
),
|
||||||
|
axis=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
all_data.append(df)
|
all_data.append(df)
|
||||||
@@ -179,19 +209,21 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
# Clean URN column - convert to integer, drop invalid values
|
# Clean URN column - convert to integer, drop invalid values
|
||||||
df = df.copy()
|
df = df.copy()
|
||||||
df['urn'] = pd.to_numeric(df['urn'], errors='coerce')
|
df["urn"] = pd.to_numeric(df["urn"], errors="coerce")
|
||||||
df = df.dropna(subset=['urn'])
|
df = df.dropna(subset=["urn"])
|
||||||
df['urn'] = df['urn'].astype(int)
|
df["urn"] = df["urn"].astype(int)
|
||||||
|
|
||||||
# Group by URN to get unique schools (use latest year's data)
|
# Group by URN to get unique schools (use latest year's data)
|
||||||
school_data = df.sort_values('year', ascending=False).groupby('urn').first().reset_index()
|
school_data = (
|
||||||
|
df.sort_values("year", ascending=False).groupby("urn").first().reset_index()
|
||||||
|
)
|
||||||
print(f"\nMigrating {len(school_data)} unique schools...")
|
print(f"\nMigrating {len(school_data)} unique schools...")
|
||||||
|
|
||||||
# Geocode if requested
|
# Geocode if requested
|
||||||
geocoded = {}
|
geocoded = {}
|
||||||
if geocode and 'postcode' in df.columns:
|
if geocode and "postcode" in df.columns:
|
||||||
print("\nGeocoding postcodes...")
|
print("\nGeocoding postcodes...")
|
||||||
postcodes = df['postcode'].dropna().unique().tolist()
|
postcodes = df["postcode"].dropna().unique().tolist()
|
||||||
geocoded = geocode_postcodes_bulk(postcodes)
|
geocoded = geocode_postcodes_bulk(postcodes)
|
||||||
print(f" Successfully geocoded {len(geocoded)} postcodes")
|
print(f" Successfully geocoded {len(geocoded)} postcodes")
|
||||||
|
|
||||||
@@ -202,7 +234,7 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
for _, row in school_data.iterrows():
|
for _, row in school_data.iterrows():
|
||||||
# Safely parse URN - handle None, NaN, whitespace, and invalid values
|
# Safely parse URN - handle None, NaN, whitespace, and invalid values
|
||||||
urn_val = row.get('urn')
|
urn_val = row.get("urn")
|
||||||
urn = None
|
urn = None
|
||||||
if pd.notna(urn_val):
|
if pd.notna(urn_val):
|
||||||
try:
|
try:
|
||||||
@@ -219,7 +251,7 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Get geocoding data
|
# Get geocoding data
|
||||||
postcode = row.get('postcode')
|
postcode = row.get("postcode")
|
||||||
lat, lon = None, None
|
lat, lon = None, None
|
||||||
if postcode and pd.notna(postcode):
|
if postcode and pd.notna(postcode):
|
||||||
coords = geocoded.get(str(postcode).strip().upper())
|
coords = geocoded.get(str(postcode).strip().upper())
|
||||||
@@ -228,7 +260,7 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
# Safely parse local_authority_code
|
# Safely parse local_authority_code
|
||||||
la_code = None
|
la_code = None
|
||||||
la_code_val = row.get('local_authority_code')
|
la_code_val = row.get("local_authority_code")
|
||||||
if pd.notna(la_code_val):
|
if pd.notna(la_code_val):
|
||||||
try:
|
try:
|
||||||
la_code_str = str(la_code_val).strip()
|
la_code_str = str(la_code_val).strip()
|
||||||
@@ -239,17 +271,29 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
school = School(
|
school = School(
|
||||||
urn=urn,
|
urn=urn,
|
||||||
school_name=row.get('school_name') if pd.notna(row.get('school_name')) else 'Unknown',
|
school_name=row.get("school_name")
|
||||||
local_authority=row.get('local_authority') if pd.notna(row.get('local_authority')) else None,
|
if pd.notna(row.get("school_name"))
|
||||||
|
else "Unknown",
|
||||||
|
local_authority=row.get("local_authority")
|
||||||
|
if pd.notna(row.get("local_authority"))
|
||||||
|
else None,
|
||||||
local_authority_code=la_code,
|
local_authority_code=la_code,
|
||||||
school_type=row.get('school_type') if pd.notna(row.get('school_type')) else None,
|
school_type=row.get("school_type")
|
||||||
school_type_code=row.get('school_type_code') if pd.notna(row.get('school_type_code')) else None,
|
if pd.notna(row.get("school_type"))
|
||||||
religious_denomination=row.get('religious_denomination') if pd.notna(row.get('religious_denomination')) else None,
|
else None,
|
||||||
age_range=row.get('age_range') if pd.notna(row.get('age_range')) else None,
|
school_type_code=row.get("school_type_code")
|
||||||
address1=row.get('address1') if pd.notna(row.get('address1')) else None,
|
if pd.notna(row.get("school_type_code"))
|
||||||
address2=row.get('address2') if pd.notna(row.get('address2')) else None,
|
else None,
|
||||||
town=row.get('town') if pd.notna(row.get('town')) else None,
|
religious_denomination=row.get("religious_denomination")
|
||||||
postcode=row.get('postcode') if pd.notna(row.get('postcode')) else None,
|
if pd.notna(row.get("religious_denomination"))
|
||||||
|
else None,
|
||||||
|
age_range=row.get("age_range")
|
||||||
|
if pd.notna(row.get("age_range"))
|
||||||
|
else None,
|
||||||
|
address1=row.get("address1") if pd.notna(row.get("address1")) else None,
|
||||||
|
address2=row.get("address2") if pd.notna(row.get("address2")) else None,
|
||||||
|
town=row.get("town") if pd.notna(row.get("town")) else None,
|
||||||
|
postcode=row.get("postcode") if pd.notna(row.get("postcode")) else None,
|
||||||
latitude=lat,
|
latitude=lat,
|
||||||
longitude=lon,
|
longitude=lon,
|
||||||
)
|
)
|
||||||
@@ -269,7 +313,7 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
# Safely parse URN
|
# Safely parse URN
|
||||||
urn_val = row.get('urn')
|
urn_val = row.get("urn")
|
||||||
urn = None
|
urn = None
|
||||||
if pd.notna(urn_val):
|
if pd.notna(urn_val):
|
||||||
try:
|
try:
|
||||||
@@ -284,7 +328,7 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
school_id = urn_to_school_id[urn]
|
school_id = urn_to_school_id[urn]
|
||||||
|
|
||||||
# Safely parse year
|
# Safely parse year
|
||||||
year_val = row.get('year')
|
year_val = row.get("year")
|
||||||
year = None
|
year = None
|
||||||
if pd.notna(year_val):
|
if pd.notna(year_val):
|
||||||
try:
|
try:
|
||||||
@@ -297,48 +341,52 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
result = SchoolResult(
|
result = SchoolResult(
|
||||||
school_id=school_id,
|
school_id=school_id,
|
||||||
year=year,
|
year=year,
|
||||||
total_pupils=parse_numeric(row.get('total_pupils')),
|
total_pupils=parse_numeric(row.get("total_pupils")),
|
||||||
eligible_pupils=parse_numeric(row.get('eligible_pupils')),
|
eligible_pupils=parse_numeric(row.get("eligible_pupils")),
|
||||||
# Expected Standard
|
# Expected Standard
|
||||||
rwm_expected_pct=parse_numeric(row.get('rwm_expected_pct')),
|
rwm_expected_pct=parse_numeric(row.get("rwm_expected_pct")),
|
||||||
reading_expected_pct=parse_numeric(row.get('reading_expected_pct')),
|
reading_expected_pct=parse_numeric(row.get("reading_expected_pct")),
|
||||||
writing_expected_pct=parse_numeric(row.get('writing_expected_pct')),
|
writing_expected_pct=parse_numeric(row.get("writing_expected_pct")),
|
||||||
maths_expected_pct=parse_numeric(row.get('maths_expected_pct')),
|
maths_expected_pct=parse_numeric(row.get("maths_expected_pct")),
|
||||||
gps_expected_pct=parse_numeric(row.get('gps_expected_pct')),
|
gps_expected_pct=parse_numeric(row.get("gps_expected_pct")),
|
||||||
science_expected_pct=parse_numeric(row.get('science_expected_pct')),
|
science_expected_pct=parse_numeric(row.get("science_expected_pct")),
|
||||||
# Higher Standard
|
# Higher Standard
|
||||||
rwm_high_pct=parse_numeric(row.get('rwm_high_pct')),
|
rwm_high_pct=parse_numeric(row.get("rwm_high_pct")),
|
||||||
reading_high_pct=parse_numeric(row.get('reading_high_pct')),
|
reading_high_pct=parse_numeric(row.get("reading_high_pct")),
|
||||||
writing_high_pct=parse_numeric(row.get('writing_high_pct')),
|
writing_high_pct=parse_numeric(row.get("writing_high_pct")),
|
||||||
maths_high_pct=parse_numeric(row.get('maths_high_pct')),
|
maths_high_pct=parse_numeric(row.get("maths_high_pct")),
|
||||||
gps_high_pct=parse_numeric(row.get('gps_high_pct')),
|
gps_high_pct=parse_numeric(row.get("gps_high_pct")),
|
||||||
# Progress
|
# Progress
|
||||||
reading_progress=parse_numeric(row.get('reading_progress')),
|
reading_progress=parse_numeric(row.get("reading_progress")),
|
||||||
writing_progress=parse_numeric(row.get('writing_progress')),
|
writing_progress=parse_numeric(row.get("writing_progress")),
|
||||||
maths_progress=parse_numeric(row.get('maths_progress')),
|
maths_progress=parse_numeric(row.get("maths_progress")),
|
||||||
# Averages
|
# Averages
|
||||||
reading_avg_score=parse_numeric(row.get('reading_avg_score')),
|
reading_avg_score=parse_numeric(row.get("reading_avg_score")),
|
||||||
maths_avg_score=parse_numeric(row.get('maths_avg_score')),
|
maths_avg_score=parse_numeric(row.get("maths_avg_score")),
|
||||||
gps_avg_score=parse_numeric(row.get('gps_avg_score')),
|
gps_avg_score=parse_numeric(row.get("gps_avg_score")),
|
||||||
# Context
|
# Context
|
||||||
disadvantaged_pct=parse_numeric(row.get('disadvantaged_pct')),
|
disadvantaged_pct=parse_numeric(row.get("disadvantaged_pct")),
|
||||||
eal_pct=parse_numeric(row.get('eal_pct')),
|
eal_pct=parse_numeric(row.get("eal_pct")),
|
||||||
sen_support_pct=parse_numeric(row.get('sen_support_pct')),
|
sen_support_pct=parse_numeric(row.get("sen_support_pct")),
|
||||||
sen_ehcp_pct=parse_numeric(row.get('sen_ehcp_pct')),
|
sen_ehcp_pct=parse_numeric(row.get("sen_ehcp_pct")),
|
||||||
stability_pct=parse_numeric(row.get('stability_pct')),
|
stability_pct=parse_numeric(row.get("stability_pct")),
|
||||||
# Gender
|
# Gender
|
||||||
rwm_expected_boys_pct=parse_numeric(row.get('rwm_expected_boys_pct')),
|
rwm_expected_boys_pct=parse_numeric(row.get("rwm_expected_boys_pct")),
|
||||||
rwm_expected_girls_pct=parse_numeric(row.get('rwm_expected_girls_pct')),
|
rwm_expected_girls_pct=parse_numeric(row.get("rwm_expected_girls_pct")),
|
||||||
rwm_high_boys_pct=parse_numeric(row.get('rwm_high_boys_pct')),
|
rwm_high_boys_pct=parse_numeric(row.get("rwm_high_boys_pct")),
|
||||||
rwm_high_girls_pct=parse_numeric(row.get('rwm_high_girls_pct')),
|
rwm_high_girls_pct=parse_numeric(row.get("rwm_high_girls_pct")),
|
||||||
# Disadvantaged
|
# Disadvantaged
|
||||||
rwm_expected_disadvantaged_pct=parse_numeric(row.get('rwm_expected_disadvantaged_pct')),
|
rwm_expected_disadvantaged_pct=parse_numeric(
|
||||||
rwm_expected_non_disadvantaged_pct=parse_numeric(row.get('rwm_expected_non_disadvantaged_pct')),
|
row.get("rwm_expected_disadvantaged_pct")
|
||||||
disadvantaged_gap=parse_numeric(row.get('disadvantaged_gap')),
|
),
|
||||||
|
rwm_expected_non_disadvantaged_pct=parse_numeric(
|
||||||
|
row.get("rwm_expected_non_disadvantaged_pct")
|
||||||
|
),
|
||||||
|
disadvantaged_gap=parse_numeric(row.get("disadvantaged_gap")),
|
||||||
# 3-Year
|
# 3-Year
|
||||||
rwm_expected_3yr_pct=parse_numeric(row.get('rwm_expected_3yr_pct')),
|
rwm_expected_3yr_pct=parse_numeric(row.get("rwm_expected_3yr_pct")),
|
||||||
reading_avg_3yr=parse_numeric(row.get('reading_avg_3yr')),
|
reading_avg_3yr=parse_numeric(row.get("reading_avg_3yr")),
|
||||||
maths_avg_3yr=parse_numeric(row.get('maths_avg_3yr')),
|
maths_avg_3yr=parse_numeric(row.get("maths_avg_3yr")),
|
||||||
)
|
)
|
||||||
db.add(result)
|
db.add(result)
|
||||||
results_created += 1
|
results_created += 1
|
||||||
@@ -355,9 +403,13 @@ def migrate_data(df: pd.DataFrame, geocode: bool = False):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description='Migrate CSV data to PostgreSQL database')
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('--drop', action='store_true', help='Drop existing tables before migration')
|
description="Migrate CSV data to PostgreSQL database"
|
||||||
parser.add_argument('--geocode', action='store_true', help='Geocode postcodes')
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--drop", action="store_true", help="Drop existing tables before migration"
|
||||||
|
)
|
||||||
|
parser.add_argument("--geocode", action="store_true", help="Geocode postcodes")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
@@ -385,6 +437,11 @@ def main():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
migrate_data(df, geocode=args.geocode)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user