feat: wire Typesense search into backend, fix sync performance data bug
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 1m1s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 1m1s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
sync_typesense.py: - Fix query string replacement: was matching 'ST_X(l.geom) as lng' but QUERY_BASE uses 'l.longitude as lng' — KS2/KS4 lateral joins were silently dropped on every sync run backend: - Add typesense_url/typesense_api_key settings to config.py - Add search_schools_typesense() to data_loader.py — queries Typesense 'schools' alias, returns URNs in relevance order with typo tolerance; falls back to empty list if Typesense is unavailable - /api/schools: replace pandas str.contains with Typesense search; results are filtered from the DataFrame and returned in relevance order; graceful fallback to substring match if Typesense is down requirements.txt: add typesense==0.21.0, numpy==1.26.4 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,7 @@ from .data_loader import (
|
||||
load_school_data,
|
||||
geocode_single_postcode,
|
||||
get_supplementary_data,
|
||||
search_schools_typesense,
|
||||
)
|
||||
from .data_loader import get_data_info as get_db_info
|
||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||
@@ -314,15 +315,19 @@ async def get_schools(
|
||||
|
||||
# Apply filters
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
mask = (
|
||||
schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
||||
)
|
||||
if "address" in schools_df.columns:
|
||||
mask = mask | schools_df["address"].str.lower().str.contains(
|
||||
search_lower, na=False
|
||||
)
|
||||
schools_df = schools_df[mask]
|
||||
ts_urns = search_schools_typesense(search)
|
||||
if ts_urns:
|
||||
urn_order = {urn: i for i, urn in enumerate(ts_urns)}
|
||||
schools_df = schools_df[schools_df["urn"].isin(set(ts_urns))].copy()
|
||||
schools_df["_ts_rank"] = schools_df["urn"].map(urn_order)
|
||||
schools_df = schools_df.sort_values("_ts_rank").drop(columns=["_ts_rank"])
|
||||
else:
|
||||
# Fallback: Typesense unavailable, use substring match
|
||||
search_lower = search.lower()
|
||||
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
||||
if "address" in schools_df.columns:
|
||||
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
|
||||
schools_df = schools_df[mask]
|
||||
|
||||
if local_authority:
|
||||
schools_df = schools_df[
|
||||
|
||||
@@ -38,6 +38,10 @@ class Settings(BaseSettings):
|
||||
rate_limit_burst: int = 10 # Allow burst of requests
|
||||
max_request_size: int = 1024 * 1024 # 1MB max request size
|
||||
|
||||
# Typesense
|
||||
typesense_url: str = "http://localhost:8108"
|
||||
typesense_api_key: str = ""
|
||||
|
||||
# Analytics
|
||||
ga_measurement_id: Optional[str] = "G-J0PCVT14NY" # Google Analytics 4 Measurement ID
|
||||
|
||||
|
||||
@@ -20,6 +20,47 @@ from .models import (
|
||||
from .schemas import SCHOOL_TYPE_MAP
|
||||
|
||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||
_typesense_client = None
|
||||
|
||||
|
||||
def _get_typesense_client():
|
||||
global _typesense_client
|
||||
if _typesense_client is not None:
|
||||
return _typesense_client
|
||||
url = settings.typesense_url
|
||||
key = settings.typesense_api_key
|
||||
if not url or not key:
|
||||
return None
|
||||
try:
|
||||
import typesense
|
||||
host = url.split("//")[-1]
|
||||
host_part, _, port_str = host.partition(":")
|
||||
port = int(port_str) if port_str else 8108
|
||||
_typesense_client = typesense.Client({
|
||||
"nodes": [{"host": host_part, "port": str(port), "protocol": "http"}],
|
||||
"api_key": key,
|
||||
"connection_timeout_seconds": 2,
|
||||
})
|
||||
return _typesense_client
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def search_schools_typesense(query: str, limit: int = 250) -> List[int]:
|
||||
"""Search Typesense. Returns URNs in relevance order, or [] if unavailable."""
|
||||
client = _get_typesense_client()
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
result = client.collections["schools"].documents.search({
|
||||
"q": query,
|
||||
"query_by": "school_name,local_authority,postcode",
|
||||
"per_page": min(limit, 250),
|
||||
"typo_tokens_threshold": 1,
|
||||
})
|
||||
return [int(h["document"]["urn"]) for h in result.get("hits", [])]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def normalize_school_type(school_type: Optional[str]) -> Optional[str]:
|
||||
|
||||
@@ -158,7 +158,7 @@ def sync(typesense_url: str, api_key: str):
|
||||
query = QUERY_BASE
|
||||
if select_extra:
|
||||
# Insert extra select columns before FROM
|
||||
query = query.replace("ST_X(l.geom) as lng", "ST_X(l.geom) as lng,\n " + ",\n ".join(select_extra))
|
||||
query = query.replace("l.longitude as lng", "l.longitude as lng,\n " + ",\n ".join(select_extra))
|
||||
query += joins
|
||||
|
||||
cur.execute(query)
|
||||
|
||||
@@ -10,4 +10,6 @@ psycopg2-binary==2.9.9
|
||||
alembic==1.13.1
|
||||
slowapi==0.1.9
|
||||
secure==0.3.0
|
||||
typesense==0.21.0
|
||||
numpy==1.26.4
|
||||
|
||||
|
||||
Reference in New Issue
Block a user