Files

176 lines
6.3 KiB
Python
Raw Permalink Normal View History

import asyncio
import json
import pytest
from unittest.mock import patch, AsyncMock
from src.snapshot_downloader import SnapshotDownloader
def _downloader(tmp_path):
return SnapshotDownloader(output_dir=str(tmp_path), api_key="test-key")
# --- load_snapshot_cache ---
def test_load_snapshot_cache_missing(tmp_path):
assert _downloader(tmp_path).load_snapshot_cache() == []
def test_load_snapshot_cache_returns_data(tmp_path):
d = _downloader(tmp_path)
snapshots = [{"id": "1", "notes": "hello"}]
(tmp_path / "snapshots_cache.json").write_text(json.dumps(snapshots))
assert d.load_snapshot_cache() == snapshots
def test_load_snapshot_cache_malformed_returns_empty(tmp_path):
d = _downloader(tmp_path)
(tmp_path / "snapshots_cache.json").write_text("not json{{{")
assert d.load_snapshot_cache() == []
def test_load_snapshot_cache_non_list_returns_empty(tmp_path):
d = _downloader(tmp_path)
(tmp_path / "snapshots_cache.json").write_text('{"key": "val"}')
assert d.load_snapshot_cache() == []
# --- save_snapshot_cache ---
def test_save_snapshot_cache_writes_json(tmp_path):
d = _downloader(tmp_path)
snapshots = [{"id": "1"}, {"id": "2"}]
d.save_snapshot_cache(snapshots)
data = json.loads((tmp_path / "snapshots_cache.json").read_text())
assert data == snapshots
# --- load_last_run_date ---
def test_load_last_run_date_missing(tmp_path):
assert _downloader(tmp_path).load_last_run_date() is None
def test_load_last_run_date_returns_date(tmp_path):
d = _downloader(tmp_path)
(tmp_path / "last_run.json").write_text('{"last_date_to": "2025-01-01"}')
assert d.load_last_run_date() == "2025-01-01"
def test_load_last_run_date_malformed_returns_none(tmp_path):
d = _downloader(tmp_path)
(tmp_path / "last_run.json").write_text("not json")
assert d.load_last_run_date() is None
def test_load_last_run_date_missing_key_returns_none(tmp_path):
d = _downloader(tmp_path)
(tmp_path / "last_run.json").write_text('{"date": "2025-01-01"}')
assert d.load_last_run_date() is None
# --- save_last_run_date ---
def test_save_last_run_date_writes_json(tmp_path):
d = _downloader(tmp_path)
d.save_last_run_date("2025-06-01")
data = json.loads((tmp_path / "last_run.json").read_text())
assert data == {"last_date_to": "2025-06-01"}
# --- generate_html_file fixed filename ---
def test_generate_html_file_uses_fixed_filename(tmp_path):
d = _downloader(tmp_path)
with patch.object(d, "generate_html_template", new_callable=AsyncMock, return_value="<html></html>"):
result = asyncio.run(d.generate_html_file([], "2024-01-01", "2025-01-01"))
assert result.name == "snapshots.html"
assert (tmp_path / "snapshots.html").exists()
# --- incremental download_snapshots ---
def _run_download(d, **kwargs):
"""Run download_snapshots with mocked API calls."""
new_snapshots = kwargs.pop("new_snapshots", [])
mock_fetch = AsyncMock(return_value=new_snapshots)
with patch.object(d, "authenticate", new_callable=AsyncMock):
with patch.object(d, "fetch_all_snapshots", mock_fetch):
with patch.object(d, "generate_html_file", new_callable=AsyncMock,
return_value=d.output_dir / "snapshots.html"):
asyncio.run(d.download_snapshots(**kwargs))
return mock_fetch
def test_first_run_saves_cache_and_state(tmp_path):
d = _downloader(tmp_path)
new_snapshots = [{"id": "abc", "startTime": "2025-01-15T10:00:00Z"}]
_run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
assert d.load_snapshot_cache() == new_snapshots
assert d.load_last_run_date() is not None
def test_subsequent_run_uses_last_run_date_as_fetch_from(tmp_path):
d = _downloader(tmp_path)
d.save_last_run_date("2025-03-01")
d.save_snapshot_cache([{"id": "old", "startTime": "2025-02-01T00:00:00Z"}])
new_snapshots = [{"id": "new", "startTime": "2025-03-15T00:00:00Z"}]
mock_fetch = _run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
# Third positional arg to fetch_all_snapshots is date_from (after session, type_ids)
assert mock_fetch.call_args.args[2] == "2025-03-01"
ids = {s["id"] for s in d.load_snapshot_cache()}
assert ids == {"old", "new"}
def test_deduplication_by_id(tmp_path):
d = _downloader(tmp_path)
d.save_last_run_date("2025-01-01")
d.save_snapshot_cache([{"id": "dup", "startTime": "2025-01-01T00:00:00Z"}])
# API returns the boundary snapshot again plus one new one
new_snapshots = [
{"id": "dup", "startTime": "2025-01-01T00:00:00Z"},
{"id": "fresh", "startTime": "2025-01-02T00:00:00Z"},
]
_run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
cache = d.load_snapshot_cache()
ids = [s["id"] for s in cache]
assert ids.count("dup") == 1
assert "fresh" in ids
def test_fetch_failure_does_not_update_state(tmp_path):
d = _downloader(tmp_path)
d.save_last_run_date("2025-01-01")
d.save_snapshot_cache([{"id": "existing"}])
with patch.object(d, "authenticate", new_callable=AsyncMock):
with patch.object(d, "fetch_all_snapshots", new_callable=AsyncMock,
side_effect=Exception("network error")):
with pytest.raises(Exception, match="network error"):
asyncio.run(d.download_snapshots(date_from="2024-01-01"))
assert d.load_last_run_date() == "2025-01-01"
assert d.load_snapshot_cache() == [{"id": "existing"}]
def test_html_generation_failure_does_not_update_state_file(tmp_path):
d = _downloader(tmp_path)
d.save_last_run_date("2025-01-01")
d.save_snapshot_cache([{"id": "existing"}])
new_snapshots = [{"id": "new", "startTime": "2025-02-01T00:00:00Z"}]
with patch.object(d, "authenticate", new_callable=AsyncMock):
with patch.object(d, "fetch_all_snapshots", new_callable=AsyncMock,
return_value=new_snapshots):
with patch.object(d, "generate_html_file", new_callable=AsyncMock,
side_effect=OSError("disk full")):
with pytest.raises(OSError):
asyncio.run(d.download_snapshots(date_from="2024-01-01"))
# Cache was updated with new data, but state file was NOT advanced
assert d.load_last_run_date() == "2025-01-01"