feat: wire Typesense search into backend, fix sync performance data bug
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 1m1s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
All checks were successful
Build and Push Docker Images / Build Backend (FastAPI) (push) Successful in 1m1s
Build and Push Docker Images / Build Frontend (Next.js) (push) Successful in 1m7s
Build and Push Docker Images / Build Integrator (push) Successful in 55s
Build and Push Docker Images / Build Kestra Init (push) Successful in 31s
Build and Push Docker Images / Build Pipeline (Meltano + dbt + Airflow) (push) Successful in 1m25s
Build and Push Docker Images / Trigger Portainer Update (push) Successful in 1s
sync_typesense.py: - Fix query string replacement: was matching 'ST_X(l.geom) as lng' but QUERY_BASE uses 'l.longitude as lng' — KS2/KS4 lateral joins were silently dropped on every sync run backend: - Add typesense_url/typesense_api_key settings to config.py - Add search_schools_typesense() to data_loader.py — queries Typesense 'schools' alias, returns URNs in relevance order with typo tolerance; falls back to empty list if Typesense is unavailable - /api/schools: replace pandas str.contains with Typesense search; results are filtered from the DataFrame and returned in relevance order; graceful fallback to substring match if Typesense is down requirements.txt: add typesense==0.21.0, numpy==1.26.4 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,7 @@ from .data_loader import (
|
|||||||
load_school_data,
|
load_school_data,
|
||||||
geocode_single_postcode,
|
geocode_single_postcode,
|
||||||
get_supplementary_data,
|
get_supplementary_data,
|
||||||
|
search_schools_typesense,
|
||||||
)
|
)
|
||||||
from .data_loader import get_data_info as get_db_info
|
from .data_loader import get_data_info as get_db_info
|
||||||
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
from .schemas import METRIC_DEFINITIONS, RANKING_COLUMNS, SCHOOL_COLUMNS
|
||||||
@@ -314,14 +315,18 @@ async def get_schools(
|
|||||||
|
|
||||||
# Apply filters
|
# Apply filters
|
||||||
if search:
|
if search:
|
||||||
|
ts_urns = search_schools_typesense(search)
|
||||||
|
if ts_urns:
|
||||||
|
urn_order = {urn: i for i, urn in enumerate(ts_urns)}
|
||||||
|
schools_df = schools_df[schools_df["urn"].isin(set(ts_urns))].copy()
|
||||||
|
schools_df["_ts_rank"] = schools_df["urn"].map(urn_order)
|
||||||
|
schools_df = schools_df.sort_values("_ts_rank").drop(columns=["_ts_rank"])
|
||||||
|
else:
|
||||||
|
# Fallback: Typesense unavailable, use substring match
|
||||||
search_lower = search.lower()
|
search_lower = search.lower()
|
||||||
mask = (
|
mask = schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
||||||
schools_df["school_name"].str.lower().str.contains(search_lower, na=False)
|
|
||||||
)
|
|
||||||
if "address" in schools_df.columns:
|
if "address" in schools_df.columns:
|
||||||
mask = mask | schools_df["address"].str.lower().str.contains(
|
mask = mask | schools_df["address"].str.lower().str.contains(search_lower, na=False)
|
||||||
search_lower, na=False
|
|
||||||
)
|
|
||||||
schools_df = schools_df[mask]
|
schools_df = schools_df[mask]
|
||||||
|
|
||||||
if local_authority:
|
if local_authority:
|
||||||
|
|||||||
@@ -38,6 +38,10 @@ class Settings(BaseSettings):
|
|||||||
rate_limit_burst: int = 10 # Allow burst of requests
|
rate_limit_burst: int = 10 # Allow burst of requests
|
||||||
max_request_size: int = 1024 * 1024 # 1MB max request size
|
max_request_size: int = 1024 * 1024 # 1MB max request size
|
||||||
|
|
||||||
|
# Typesense
|
||||||
|
typesense_url: str = "http://localhost:8108"
|
||||||
|
typesense_api_key: str = ""
|
||||||
|
|
||||||
# Analytics
|
# Analytics
|
||||||
ga_measurement_id: Optional[str] = "G-J0PCVT14NY" # Google Analytics 4 Measurement ID
|
ga_measurement_id: Optional[str] = "G-J0PCVT14NY" # Google Analytics 4 Measurement ID
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,47 @@ from .models import (
|
|||||||
from .schemas import SCHOOL_TYPE_MAP
|
from .schemas import SCHOOL_TYPE_MAP
|
||||||
|
|
||||||
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
_postcode_cache: Dict[str, Tuple[float, float]] = {}
|
||||||
|
_typesense_client = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_typesense_client():
|
||||||
|
global _typesense_client
|
||||||
|
if _typesense_client is not None:
|
||||||
|
return _typesense_client
|
||||||
|
url = settings.typesense_url
|
||||||
|
key = settings.typesense_api_key
|
||||||
|
if not url or not key:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import typesense
|
||||||
|
host = url.split("//")[-1]
|
||||||
|
host_part, _, port_str = host.partition(":")
|
||||||
|
port = int(port_str) if port_str else 8108
|
||||||
|
_typesense_client = typesense.Client({
|
||||||
|
"nodes": [{"host": host_part, "port": str(port), "protocol": "http"}],
|
||||||
|
"api_key": key,
|
||||||
|
"connection_timeout_seconds": 2,
|
||||||
|
})
|
||||||
|
return _typesense_client
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def search_schools_typesense(query: str, limit: int = 250) -> List[int]:
|
||||||
|
"""Search Typesense. Returns URNs in relevance order, or [] if unavailable."""
|
||||||
|
client = _get_typesense_client()
|
||||||
|
if client is None:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
result = client.collections["schools"].documents.search({
|
||||||
|
"q": query,
|
||||||
|
"query_by": "school_name,local_authority,postcode",
|
||||||
|
"per_page": min(limit, 250),
|
||||||
|
"typo_tokens_threshold": 1,
|
||||||
|
})
|
||||||
|
return [int(h["document"]["urn"]) for h in result.get("hits", [])]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def normalize_school_type(school_type: Optional[str]) -> Optional[str]:
|
def normalize_school_type(school_type: Optional[str]) -> Optional[str]:
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ def sync(typesense_url: str, api_key: str):
|
|||||||
query = QUERY_BASE
|
query = QUERY_BASE
|
||||||
if select_extra:
|
if select_extra:
|
||||||
# Insert extra select columns before FROM
|
# Insert extra select columns before FROM
|
||||||
query = query.replace("ST_X(l.geom) as lng", "ST_X(l.geom) as lng,\n " + ",\n ".join(select_extra))
|
query = query.replace("l.longitude as lng", "l.longitude as lng,\n " + ",\n ".join(select_extra))
|
||||||
query += joins
|
query += joins
|
||||||
|
|
||||||
cur.execute(query)
|
cur.execute(query)
|
||||||
|
|||||||
@@ -10,4 +10,6 @@ psycopg2-binary==2.9.9
|
|||||||
alembic==1.13.1
|
alembic==1.13.1
|
||||||
slowapi==0.1.9
|
slowapi==0.1.9
|
||||||
secure==0.3.0
|
secure==0.3.0
|
||||||
|
typesense==0.21.0
|
||||||
|
numpy==1.26.4
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user