feat: incremental snapshot fetch with JSON cache and state file
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -86,3 +86,74 @@ def test_generate_html_file_uses_fixed_filename(tmp_path):
|
||||
result = asyncio.run(d.generate_html_file([], "2024-01-01", "2025-01-01"))
|
||||
assert result.name == "snapshots.html"
|
||||
assert (tmp_path / "snapshots.html").exists()
|
||||
|
||||
|
||||
# --- incremental download_snapshots ---
|
||||
|
||||
def _run_download(d, **kwargs):
|
||||
"""Run download_snapshots with mocked API calls."""
|
||||
new_snapshots = kwargs.pop("new_snapshots", [])
|
||||
mock_fetch = AsyncMock(return_value=new_snapshots)
|
||||
with patch.object(d, "authenticate", new_callable=AsyncMock):
|
||||
with patch.object(d, "fetch_all_snapshots", mock_fetch):
|
||||
with patch.object(d, "generate_html_file", new_callable=AsyncMock,
|
||||
return_value=d.output_dir / "snapshots.html"):
|
||||
asyncio.run(d.download_snapshots(**kwargs))
|
||||
return mock_fetch
|
||||
|
||||
|
||||
def test_first_run_saves_cache_and_state(tmp_path):
|
||||
d = _downloader(tmp_path)
|
||||
new_snapshots = [{"id": "abc", "startTime": "2025-01-15T10:00:00Z"}]
|
||||
_run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
|
||||
|
||||
assert d.load_snapshot_cache() == new_snapshots
|
||||
assert d.load_last_run_date() is not None
|
||||
|
||||
|
||||
def test_subsequent_run_uses_last_run_date_as_fetch_from(tmp_path):
|
||||
d = _downloader(tmp_path)
|
||||
d.save_last_run_date("2025-03-01")
|
||||
d.save_snapshot_cache([{"id": "old", "startTime": "2025-02-01T00:00:00Z"}])
|
||||
|
||||
new_snapshots = [{"id": "new", "startTime": "2025-03-15T00:00:00Z"}]
|
||||
mock_fetch = _run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
|
||||
|
||||
# Third positional arg to fetch_all_snapshots is date_from (after session, type_ids)
|
||||
assert mock_fetch.call_args.args[2] == "2025-03-01"
|
||||
|
||||
ids = {s["id"] for s in d.load_snapshot_cache()}
|
||||
assert ids == {"old", "new"}
|
||||
|
||||
|
||||
def test_deduplication_by_id(tmp_path):
|
||||
d = _downloader(tmp_path)
|
||||
d.save_last_run_date("2025-01-01")
|
||||
d.save_snapshot_cache([{"id": "dup", "startTime": "2025-01-01T00:00:00Z"}])
|
||||
|
||||
# API returns the boundary snapshot again plus one new one
|
||||
new_snapshots = [
|
||||
{"id": "dup", "startTime": "2025-01-01T00:00:00Z"},
|
||||
{"id": "fresh", "startTime": "2025-01-02T00:00:00Z"},
|
||||
]
|
||||
_run_download(d, date_from="2024-01-01", new_snapshots=new_snapshots)
|
||||
|
||||
cache = d.load_snapshot_cache()
|
||||
ids = [s["id"] for s in cache]
|
||||
assert ids.count("dup") == 1
|
||||
assert "fresh" in ids
|
||||
|
||||
|
||||
def test_fetch_failure_does_not_update_state(tmp_path):
|
||||
d = _downloader(tmp_path)
|
||||
d.save_last_run_date("2025-01-01")
|
||||
d.save_snapshot_cache([{"id": "existing"}])
|
||||
|
||||
with patch.object(d, "authenticate", new_callable=AsyncMock):
|
||||
with patch.object(d, "fetch_all_snapshots", new_callable=AsyncMock,
|
||||
side_effect=Exception("network error")):
|
||||
with pytest.raises(Exception, match="network error"):
|
||||
asyncio.run(d.download_snapshots(date_from="2024-01-01"))
|
||||
|
||||
assert d.load_last_run_date() == "2025-01-01"
|
||||
assert d.load_snapshot_cache() == [{"id": "existing"}]
|
||||
|
||||
Reference in New Issue
Block a user