From d77226413d1e4119e0f14bcb941c833fe21535ca Mon Sep 17 00:00:00 2001 From: Tudor Sitaru Date: Fri, 15 May 2026 16:09:50 +0100 Subject: [PATCH] feat: add snapshot cache and state file I/O methods Co-Authored-By: Claude Sonnet 4.6 --- src/snapshot_downloader.py | 33 ++++++++++++++ tests/test_incremental_snapshot.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 tests/test_incremental_snapshot.py diff --git a/src/snapshot_downloader.py b/src/snapshot_downloader.py index e4dfe07..889f08f 100644 --- a/src/snapshot_downloader.py +++ b/src/snapshot_downloader.py @@ -539,6 +539,39 @@ class SnapshotDownloader: return None + def load_snapshot_cache(self) -> List[Dict[str, Any]]: + cache_file = self.output_dir / "snapshots_cache.json" + if not cache_file.exists(): + return [] + try: + with open(cache_file, "r", encoding="utf-8") as f: + data = json.load(f) + return data if isinstance(data, list) else [] + except (json.JSONDecodeError, OSError): + self.logger.warning("Could not read snapshot cache; starting fresh") + return [] + + def save_snapshot_cache(self, snapshots: List[Dict[str, Any]]) -> None: + cache_file = self.output_dir / "snapshots_cache.json" + with open(cache_file, "w", encoding="utf-8") as f: + json.dump(snapshots, f, indent=2, default=str) + + def load_last_run_date(self) -> Optional[str]: + state_file = self.output_dir / "last_run.json" + if not state_file.exists(): + return None + try: + with open(state_file, "r", encoding="utf-8") as f: + data = json.load(f) + return data.get("last_date_to") + except (json.JSONDecodeError, OSError): + return None + + def save_last_run_date(self, date: str) -> None: + state_file = self.output_dir / "last_run.json" + with open(state_file, "w", encoding="utf-8") as f: + json.dump({"last_date_to": date}, f) + async def generate_html_file( self, snapshots: List[Dict[str, Any]], date_from: str, date_to: str ) -> Path: diff --git a/tests/test_incremental_snapshot.py b/tests/test_incremental_snapshot.py new file mode 100644 index 0000000..52f0a87 --- /dev/null +++ b/tests/test_incremental_snapshot.py @@ -0,0 +1,72 @@ +import asyncio +import json +import pytest +from unittest.mock import AsyncMock, patch + +from src.snapshot_downloader import SnapshotDownloader + + +def _downloader(tmp_path): + return SnapshotDownloader(output_dir=str(tmp_path), api_key="test-key") + + +# --- load_snapshot_cache --- + +def test_load_snapshot_cache_missing(tmp_path): + assert _downloader(tmp_path).load_snapshot_cache() == [] + + +def test_load_snapshot_cache_returns_data(tmp_path): + d = _downloader(tmp_path) + snapshots = [{"id": "1", "notes": "hello"}] + (tmp_path / "snapshots_cache.json").write_text(json.dumps(snapshots)) + assert d.load_snapshot_cache() == snapshots + + +def test_load_snapshot_cache_malformed_returns_empty(tmp_path): + d = _downloader(tmp_path) + (tmp_path / "snapshots_cache.json").write_text("not json{{{") + assert d.load_snapshot_cache() == [] + + +def test_load_snapshot_cache_non_list_returns_empty(tmp_path): + d = _downloader(tmp_path) + (tmp_path / "snapshots_cache.json").write_text('{"key": "val"}') + assert d.load_snapshot_cache() == [] + + +# --- save_snapshot_cache --- + +def test_save_snapshot_cache_writes_json(tmp_path): + d = _downloader(tmp_path) + snapshots = [{"id": "1"}, {"id": "2"}] + d.save_snapshot_cache(snapshots) + data = json.loads((tmp_path / "snapshots_cache.json").read_text()) + assert data == snapshots + + +# --- load_last_run_date --- + +def test_load_last_run_date_missing(tmp_path): + assert _downloader(tmp_path).load_last_run_date() is None + + +def test_load_last_run_date_returns_date(tmp_path): + d = _downloader(tmp_path) + (tmp_path / "last_run.json").write_text('{"last_date_to": "2025-01-01"}') + assert d.load_last_run_date() == "2025-01-01" + + +def test_load_last_run_date_malformed_returns_none(tmp_path): + d = _downloader(tmp_path) + (tmp_path / "last_run.json").write_text("not json") + assert d.load_last_run_date() is None + + +# --- save_last_run_date --- + +def test_save_last_run_date_writes_json(tmp_path): + d = _downloader(tmp_path) + d.save_last_run_date("2025-06-01") + data = json.loads((tmp_path / "last_run.json").read_text()) + assert data == {"last_date_to": "2025-06-01"}