""" Sitemap Generation DAG Rebuilds the sitemap.xml on the backend after data updates so search engines get fresh school URLs. Runs weekly (Sunday 05:00) and can be triggered manually. """ from __future__ import annotations from datetime import datetime, timedelta from airflow import DAG try: from airflow.providers.standard.operators.bash import BashOperator except ImportError: from airflow.operators.bash import BashOperator default_args = { "owner": "school-compare", "depends_on_past": False, "email_on_failure": False, "retries": 2, "retry_delay": timedelta(minutes=5), } with DAG( dag_id="sitemap_generate", default_args=default_args, description="Rebuild sitemap.xml on the backend after school data updates", schedule="0 5 * * 0", # Sundays 05:00 — after the weekly data window start_date=datetime(2025, 1, 1), catchup=False, tags=["school-compare", "seo"], ) as dag: regenerate_sitemap = BashOperator( task_id="call_regenerate_sitemap", bash_command=""" set -e BACKEND_URL="${BACKEND_URL:-http://backend:80}" ADMIN_KEY="${ADMIN_API_KEY:-changeme}" echo "Calling $BACKEND_URL/api/admin/regenerate-sitemap ..." response=$(curl -s -o /tmp/sitemap_response.json -w "%{http_code}" \\ -X POST "$BACKEND_URL/api/admin/regenerate-sitemap" \\ -H "X-API-Key: $ADMIN_KEY" \\ -H "Content-Type: application/json") echo "HTTP status: $response" cat /tmp/sitemap_response.json if [ "$response" != "200" ]; then echo "ERROR: sitemap regeneration failed (HTTP $response)" exit 1 fi urls=$(python3 -c "import json,sys; d=json.load(open('/tmp/sitemap_response.json')); print(d.get('urls', '?'))") echo "Sitemap regenerated: $urls URLs." """, )