import asyncio import json import pytest from unittest.mock import patch, AsyncMock from src.snapshot_downloader import SnapshotDownloader def _downloader(tmp_path): return SnapshotDownloader(output_dir=str(tmp_path), api_key="test-key") # --- load_snapshot_cache --- def test_load_snapshot_cache_missing(tmp_path): assert _downloader(tmp_path).load_snapshot_cache() == [] def test_load_snapshot_cache_returns_data(tmp_path): d = _downloader(tmp_path) snapshots = [{"id": "1", "notes": "hello"}] (tmp_path / "snapshots_cache.json").write_text(json.dumps(snapshots)) assert d.load_snapshot_cache() == snapshots def test_load_snapshot_cache_malformed_returns_empty(tmp_path): d = _downloader(tmp_path) (tmp_path / "snapshots_cache.json").write_text("not json{{{") assert d.load_snapshot_cache() == [] def test_load_snapshot_cache_non_list_returns_empty(tmp_path): d = _downloader(tmp_path) (tmp_path / "snapshots_cache.json").write_text('{"key": "val"}') assert d.load_snapshot_cache() == [] # --- save_snapshot_cache --- def test_save_snapshot_cache_writes_json(tmp_path): d = _downloader(tmp_path) snapshots = [{"id": "1"}, {"id": "2"}] d.save_snapshot_cache(snapshots) data = json.loads((tmp_path / "snapshots_cache.json").read_text()) assert data == snapshots # --- load_last_run_date --- def test_load_last_run_date_missing(tmp_path): assert _downloader(tmp_path).load_last_run_date() is None def test_load_last_run_date_returns_date(tmp_path): d = _downloader(tmp_path) (tmp_path / "last_run.json").write_text('{"last_date_to": "2025-01-01"}') assert d.load_last_run_date() == "2025-01-01" def test_load_last_run_date_malformed_returns_none(tmp_path): d = _downloader(tmp_path) (tmp_path / "last_run.json").write_text("not json") assert d.load_last_run_date() is None def test_load_last_run_date_missing_key_returns_none(tmp_path): d = _downloader(tmp_path) (tmp_path / "last_run.json").write_text('{"date": "2025-01-01"}') assert d.load_last_run_date() is None # --- save_last_run_date --- def test_save_last_run_date_writes_json(tmp_path): d = _downloader(tmp_path) d.save_last_run_date("2025-06-01") data = json.loads((tmp_path / "last_run.json").read_text()) assert data == {"last_date_to": "2025-06-01"} # --- generate_html_file fixed filename --- def test_generate_html_file_uses_fixed_filename(tmp_path): d = _downloader(tmp_path) with patch.object(d, "generate_html_template", new_callable=AsyncMock, return_value=""): result = asyncio.run(d.generate_html_file([], "2024-01-01", "2025-01-01")) assert result.name == "snapshots.html" assert (tmp_path / "snapshots.html").exists() # --- incremental download_snapshots --- def _run_download(d, **kwargs): """Run download_snapshots with mocked API calls.""" new_snapshots = kwargs.pop("new_snapshots", []) mock_fetch = AsyncMock(return_value=new_snapshots) with patch.object(d, "authenticate", new_callable=AsyncMock): with patch.object(d, "fetch_all_snapshots", mock_fetch): with patch.object(d, "generate_html_file", new_callable=AsyncMock, return_value=d.output_dir / "snapshots.html"): asyncio.run(d.download_snapshots(**kwargs)) return mock_fetch def test_first_run_saves_cache_and_state(tmp_path): d = _downloader(tmp_path) new_snapshots = [{"id": "abc", "startTime": "2025-01-15T10:00:00Z"}] _run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots) assert d.load_snapshot_cache() == new_snapshots assert d.load_last_run_date() is not None def test_subsequent_run_uses_last_run_date_as_fetch_from(tmp_path): d = _downloader(tmp_path) d.save_last_run_date("2025-03-01") d.save_snapshot_cache([{"id": "old", "startTime": "2025-02-01T00:00:00Z"}]) new_snapshots = [{"id": "new", "startTime": "2025-03-15T00:00:00Z"}] mock_fetch = _run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots) # Third positional arg to fetch_all_snapshots is date_from (after session, type_ids) assert mock_fetch.call_args.args[2] == "2025-03-01" ids = {s["id"] for s in d.load_snapshot_cache()} assert ids == {"old", "new"} def test_deduplication_by_id(tmp_path): d = _downloader(tmp_path) d.save_last_run_date("2025-01-01") d.save_snapshot_cache([{"id": "dup", "startTime": "2025-01-01T00:00:00Z"}]) # API returns the boundary snapshot again plus one new one new_snapshots = [ {"id": "dup", "startTime": "2025-01-01T00:00:00Z"}, {"id": "fresh", "startTime": "2025-01-02T00:00:00Z"}, ] _run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots) cache = d.load_snapshot_cache() ids = [s["id"] for s in cache] assert ids.count("dup") == 1 assert "fresh" in ids def test_fetch_failure_does_not_update_state(tmp_path): d = _downloader(tmp_path) d.save_last_run_date("2025-01-01") d.save_snapshot_cache([{"id": "existing"}]) with patch.object(d, "authenticate", new_callable=AsyncMock): with patch.object(d, "fetch_all_snapshots", new_callable=AsyncMock, side_effect=Exception("network error")): with pytest.raises(Exception, match="network error"): asyncio.run(d.download_snapshots(date_from="2024-01-01")) assert d.load_last_run_date() == "2025-01-01" assert d.load_snapshot_cache() == [{"id": "existing"}] def test_html_generation_failure_does_not_update_state_file(tmp_path): d = _downloader(tmp_path) d.save_last_run_date("2025-01-01") d.save_snapshot_cache([{"id": "existing"}]) new_snapshots = [{"id": "new", "startTime": "2025-02-01T00:00:00Z"}] with patch.object(d, "authenticate", new_callable=AsyncMock): with patch.object(d, "fetch_all_snapshots", new_callable=AsyncMock, return_value=new_snapshots): with patch.object(d, "generate_html_file", new_callable=AsyncMock, side_effect=OSError("disk full")): with pytest.raises(OSError): asyncio.run(d.download_snapshots(date_from="2024-01-01")) # Cache was updated with new data, but state file was NOT advanced assert d.load_last_run_date() == "2025-01-01"