bug fixes and performance improvements

2025-11-11 11:28:01 +00:00
parent acfb22cbea
commit 4f73b3036e
5 changed files with 82 additions and 84 deletions
--- a/src/snapshot_downloader.py
+++ b/src/snapshot_downloader.py
@@ -8,16 +8,16 @@ and generates a comprehensive markup file containing all the snapshot informatio

 import argparse
 import asyncio
-import aiohttp
+import html
 import json
 import logging
-import os
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Any, Optional
-from urllib.parse import urlencode, urljoin
-import html
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlencode
+
 import aiofiles
+import aiohttp

 # Import the auth manager
 try:
@@ -132,12 +132,11 @@ class SnapshotDownloader:
    async def fetch_snapshots_page(
        self,
        session: aiohttp.ClientSession,
-        type_ids: List[int] = [15],
+        type_ids: list[int] = [15],
        date_from: str = "2021-10-18",
-        date_to: str = None,
+        date_to: str = "",
        cursor: str = None,
-        per_page: int = 100,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """
        Fetch a single page of snapshots from the API using cursor-based pagination.

@@ -147,12 +146,11 @@ class SnapshotDownloader:
            date_from: Start date in YYYY-MM-DD format
            date_to: End date in YYYY-MM-DD format
            cursor: Cursor for pagination (None for first page)
-            per_page: Number of items per page

        Returns:
            Dictionary containing the API response
        """
-        if date_to is None:
+        if date_to == "":
            date_to = datetime.now().strftime("%Y-%m-%d")

        # Build query parameters
@@ -167,7 +165,7 @@ class SnapshotDownloader:

        # Add type IDs - API expects typeIDs[]=15 format
        for type_id in type_ids:
-            params[f"typeIDs[]"] = type_id
+            params["typeIDs[]"] = type_id

        # Build URL with parameters
        query_string = urlencode(params, doseq=True)
@@ -198,13 +196,12 @@ class SnapshotDownloader:
                    if (
                        len(data.get("posts", [])) <= 3
                    ):  # Only print full data if few posts
-                        print(f"Full Response Data:")
+                        print("Full Response Data:")
                        print(json.dumps(data, indent=2, default=str))
                    print("=" * 50)

                # The API returns snapshots in 'posts' field
                snapshots = data.get("posts", [])
-                cursor_value = data.get("cursor")

                page_info = f"cursor: {cursor[:20]}..." if cursor else "first page"
                self.logger.info(f"Retrieved {len(snapshots)} snapshots ({page_info})")
@@ -394,11 +391,11 @@ class SnapshotDownloader:
                image_name = html.escape(image.get("fileName", "Image"))

                if local_path:
-                    media_html += f'<div class="image-item">\n'
+                    media_html += '<div class="image-item">\n'
                    media_html += f'  <img src="{local_path}" alt="{image_name}" loading="lazy">\n'
                    media_html += f'  <p class="image-caption">{image_name}</p>\n'
                    media_html += f'  <p class="image-meta">Updated: {self.format_date(image.get("updated", ""))}</p>\n'
-                    media_html += f"</div>\n"
+                    media_html += "</div>\n"
                else:
                    # Fallback to API URL if download failed
                    image_url = (
@@ -407,13 +404,13 @@ class SnapshotDownloader:
                        else ""
                    )
                    if image_url:
-                        media_html += f'<div class="image-item">\n'
+                        media_html += '<div class="image-item">\n'
                        media_html += f'  <img src="{image_url}" alt="{image_name}" loading="lazy">\n'
                        media_html += (
                            f'  <p class="image-caption">{image_name} (online)</p>\n'
                        )
                        media_html += f'  <p class="image-meta">Updated: {self.format_date(image.get("updated", ""))}</p>\n'
-                        media_html += f"</div>\n"
+                        media_html += "</div>\n"

            media_html += "</div>\n</div>\n"

@@ -469,11 +466,11 @@ class SnapshotDownloader:
            if value:
                if isinstance(value, list):
                    value = ", ".join(str(v) for v in value)
-                metadata_html += f'<div class="metadata-item">\n'
+                metadata_html += '<div class="metadata-item">\n'
                metadata_html += (
                    f"  <strong>{label}:</strong> {html.escape(str(value))}\n"
                )
-                metadata_html += f"</div>\n"
+                metadata_html += "</div>\n"

        # Raw JSON data (collapsed by default)
        metadata_html += '<details class="raw-data">\n'
@@ -1230,7 +1227,7 @@ Examples:

        if html_file:
            print(f"\n✅ Success! Snapshots downloaded and saved to: {html_file}")
-            print(f"📁 Open the file in your browser to view the snapshots")
+            print("📁 Open the file in your browser to view the snapshots")
        else:
            print("⚠️ No snapshots were found for the specified period")