Files

63 lines
1.7 KiB
Python
Raw Permalink Normal View History

"""
Sitemap Generation DAG
Rebuilds the sitemap.xml on the backend after data updates so search engines
get fresh school URLs. Runs weekly (Sunday 05:00) and can be triggered manually.
"""
from __future__ import annotations
from datetime import datetime, timedelta
from airflow import DAG
try:
from airflow.providers.standard.operators.bash import BashOperator
except ImportError:
from airflow.operators.bash import BashOperator
default_args = {
"owner": "school-compare",
"depends_on_past": False,
"email_on_failure": False,
"retries": 2,
"retry_delay": timedelta(minutes=5),
}
with DAG(
dag_id="sitemap_generate",
default_args=default_args,
description="Rebuild sitemap.xml on the backend after school data updates",
schedule="0 5 * * 0", # Sundays 05:00 — after the weekly data window
start_date=datetime(2025, 1, 1),
catchup=False,
tags=["school-compare", "seo"],
) as dag:
regenerate_sitemap = BashOperator(
task_id="call_regenerate_sitemap",
bash_command="""
set -e
BACKEND_URL="${BACKEND_URL:-http://backend:80}"
ADMIN_KEY="${ADMIN_API_KEY:-changeme}"
echo "Calling $BACKEND_URL/api/admin/regenerate-sitemap ..."
response=$(curl -s -o /tmp/sitemap_response.json -w "%{http_code}" \\
-X POST "$BACKEND_URL/api/admin/regenerate-sitemap" \\
-H "X-API-Key: $ADMIN_KEY" \\
-H "Content-Type: application/json")
echo "HTTP status: $response"
cat /tmp/sitemap_response.json
if [ "$response" != "200" ]; then
echo "ERROR: sitemap regeneration failed (HTTP $response)"
exit 1
fi
urls=$(python3 -c "import json,sys; d=json.load(open('/tmp/sitemap_response.json')); print(d.get('urls', '?'))")
echo "Sitemap regenerated: $urls URLs."
""",
)