diff --git a/backend/database.py b/backend/database.py
new file mode 100644
index 0000000..40d3e7e
--- /dev/null
+++ b/backend/database.py
@@ -0,0 +1,67 @@
+"""
+Database connection setup using SQLAlchemy.
+"""
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, declarative_base
+from contextlib import contextmanager
+
+from .config import settings
+
+# Create engine
+engine = create_engine(
+    settings.database_url,
+    pool_size=10,
+    max_overflow=20,
+    pool_pre_ping=True,  # Verify connections before use
+    echo=False,  # Set to True for SQL debugging
+)
+
+# Session factory
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+# Base class for models
+Base = declarative_base()
+
+
+def get_db():
+    """
+    Dependency for FastAPI routes to get a database session.
+    """
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+@contextmanager
+def get_db_session():
+    """
+    Context manager for database sessions.
+    Use in non-FastAPI contexts (scripts, etc).
+    """
+    db = SessionLocal()
+    try:
+        yield db
+        db.commit()
+    except Exception:
+        db.rollback()
+        raise
+    finally:
+        db.close()
+
+
+def init_db():
+    """
+    Initialize database - create all tables.
+    """
+    Base.metadata.create_all(bind=engine)
+
+
+def drop_db():
+    """
+    Drop all tables - use with caution!
+    """
+    Base.metadata.drop_all(bind=engine)
+
diff --git a/backend/models.py b/backend/models.py
new file mode 100644
index 0000000..c57f8a0
--- /dev/null
+++ b/backend/models.py
@@ -0,0 +1,190 @@
+"""
+SQLAlchemy database models for school data.
+Normalized schema with separate tables for schools and yearly results.
+"""
+
+from sqlalchemy import (
+    Column, Integer, String, Float, ForeignKey, Index, UniqueConstraint,
+    Text, Boolean
+)
+from sqlalchemy.orm import relationship
+from .database import Base
+
+
+class School(Base):
+    """
+    Core school information - relatively static data.
+    """
+    __tablename__ = "schools"
+    
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    urn = Column(Integer, unique=True, nullable=False, index=True)
+    school_name = Column(String(255), nullable=False)
+    local_authority = Column(String(100))
+    local_authority_code = Column(Integer)
+    school_type = Column(String(100))
+    school_type_code = Column(String(10))
+    religious_denomination = Column(String(100))
+    age_range = Column(String(20))
+    
+    # Address
+    address1 = Column(String(255))
+    address2 = Column(String(255))
+    town = Column(String(100))
+    postcode = Column(String(20), index=True)
+    
+    # Geocoding (cached)
+    latitude = Column(Float)
+    longitude = Column(Float)
+    
+    # Relationships
+    results = relationship("SchoolResult", back_populates="school", cascade="all, delete-orphan")
+    
+    def __repr__(self):
+        return f"<School(urn={self.urn}, name='{self.school_name}')>"
+    
+    @property
+    def address(self):
+        """Combine address fields into single string."""
+        parts = [self.address1, self.address2, self.town, self.postcode]
+        return ", ".join(p for p in parts if p)
+
+
+class SchoolResult(Base):
+    """
+    Yearly KS2 results for a school.
+    Each school can have multiple years of results.
+    """
+    __tablename__ = "school_results"
+    
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    school_id = Column(Integer, ForeignKey("schools.id", ondelete="CASCADE"), nullable=False)
+    year = Column(Integer, nullable=False, index=True)
+    
+    # Pupil numbers
+    total_pupils = Column(Integer)
+    eligible_pupils = Column(Integer)
+    
+    # Core KS2 metrics - Expected Standard
+    rwm_expected_pct = Column(Float)
+    reading_expected_pct = Column(Float)
+    writing_expected_pct = Column(Float)
+    maths_expected_pct = Column(Float)
+    gps_expected_pct = Column(Float)
+    science_expected_pct = Column(Float)
+    
+    # Higher Standard
+    rwm_high_pct = Column(Float)
+    reading_high_pct = Column(Float)
+    writing_high_pct = Column(Float)
+    maths_high_pct = Column(Float)
+    gps_high_pct = Column(Float)
+    
+    # Progress Scores
+    reading_progress = Column(Float)
+    writing_progress = Column(Float)
+    maths_progress = Column(Float)
+    
+    # Average Scores
+    reading_avg_score = Column(Float)
+    maths_avg_score = Column(Float)
+    gps_avg_score = Column(Float)
+    
+    # School Context
+    disadvantaged_pct = Column(Float)
+    eal_pct = Column(Float)
+    sen_support_pct = Column(Float)
+    sen_ehcp_pct = Column(Float)
+    stability_pct = Column(Float)
+    
+    # Gender Breakdown
+    rwm_expected_boys_pct = Column(Float)
+    rwm_expected_girls_pct = Column(Float)
+    rwm_high_boys_pct = Column(Float)
+    rwm_high_girls_pct = Column(Float)
+    
+    # Disadvantaged Performance
+    rwm_expected_disadvantaged_pct = Column(Float)
+    rwm_expected_non_disadvantaged_pct = Column(Float)
+    disadvantaged_gap = Column(Float)
+    
+    # 3-Year Averages
+    rwm_expected_3yr_pct = Column(Float)
+    reading_avg_3yr = Column(Float)
+    maths_avg_3yr = Column(Float)
+    
+    # Relationship
+    school = relationship("School", back_populates="results")
+    
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint('school_id', 'year', name='uq_school_year'),
+        Index('ix_school_results_school_year', 'school_id', 'year'),
+    )
+    
+    def __repr__(self):
+        return f"<SchoolResult(school_id={self.school_id}, year={self.year})>"
+
+
+# Mapping from CSV columns to model fields
+SCHOOL_FIELD_MAPPING = {
+    'urn': 'urn',
+    'school_name': 'school_name',
+    'local_authority': 'local_authority',
+    'local_authority_code': 'local_authority_code',
+    'school_type': 'school_type',
+    'school_type_code': 'school_type_code',
+    'religious_denomination': 'religious_denomination',
+    'age_range': 'age_range',
+    'address1': 'address1',
+    'address2': 'address2',
+    'town': 'town',
+    'postcode': 'postcode',
+}
+
+RESULT_FIELD_MAPPING = {
+    'year': 'year',
+    'total_pupils': 'total_pupils',
+    'eligible_pupils': 'eligible_pupils',
+    # Expected Standard
+    'rwm_expected_pct': 'rwm_expected_pct',
+    'reading_expected_pct': 'reading_expected_pct',
+    'writing_expected_pct': 'writing_expected_pct',
+    'maths_expected_pct': 'maths_expected_pct',
+    'gps_expected_pct': 'gps_expected_pct',
+    'science_expected_pct': 'science_expected_pct',
+    # Higher Standard
+    'rwm_high_pct': 'rwm_high_pct',
+    'reading_high_pct': 'reading_high_pct',
+    'writing_high_pct': 'writing_high_pct',
+    'maths_high_pct': 'maths_high_pct',
+    'gps_high_pct': 'gps_high_pct',
+    # Progress
+    'reading_progress': 'reading_progress',
+    'writing_progress': 'writing_progress',
+    'maths_progress': 'maths_progress',
+    # Averages
+    'reading_avg_score': 'reading_avg_score',
+    'maths_avg_score': 'maths_avg_score',
+    'gps_avg_score': 'gps_avg_score',
+    # Context
+    'disadvantaged_pct': 'disadvantaged_pct',
+    'eal_pct': 'eal_pct',
+    'sen_support_pct': 'sen_support_pct',
+    'sen_ehcp_pct': 'sen_ehcp_pct',
+    'stability_pct': 'stability_pct',
+    # Gender
+    'rwm_expected_boys_pct': 'rwm_expected_boys_pct',
+    'rwm_expected_girls_pct': 'rwm_expected_girls_pct',
+    'rwm_high_boys_pct': 'rwm_high_boys_pct',
+    'rwm_high_girls_pct': 'rwm_high_girls_pct',
+    # Disadvantaged
+    'rwm_expected_disadvantaged_pct': 'rwm_expected_disadvantaged_pct',
+    'rwm_expected_non_disadvantaged_pct': 'rwm_expected_non_disadvantaged_pct',
+    'disadvantaged_gap': 'disadvantaged_gap',
+    # 3-Year
+    'rwm_expected_3yr_pct': 'rwm_expected_3yr_pct',
+    'reading_avg_3yr': 'reading_avg_3yr',
+    'maths_avg_3yr': 'maths_avg_3yr',
+}
+