diff --git a/docs/archived/SNAPSHOT_DOWNLOADER_SUMMARY.md b/docs/archived/SNAPSHOT_DOWNLOADER_SUMMARY.md index 884c53b..537d980 100644 --- a/docs/archived/SNAPSHOT_DOWNLOADER_SUMMARY.md +++ b/docs/archived/SNAPSHOT_DOWNLOADER_SUMMARY.md @@ -45,7 +45,7 @@ snapshots = await downloader.fetch_all_snapshots( downloader = SnapshotDownloader( # Option 1: Direct API key api_key="your-api-key-here", - + # Option 2: Email/password (gets API key automatically) email="user@example.com", password="password" @@ -75,7 +75,6 @@ curl 'https://api.parentzone.me/v1/posts?typeIDs[]=15&dateFrom=2021-10-18&dateTo - ✅ **Endpoint**: `/v1/posts` - ✅ **Type ID filtering**: `typeIDs[]=15` (configurable) - ✅ **Date range filtering**: `dateFrom` and `dateTo` parameters -- ✅ **Pagination**: `page` and `per_page` parameters - ✅ **All required headers** from curl command - ✅ **Authentication**: `x-api-key` header support @@ -271,7 +270,7 @@ parentzone_downloader/
Total: 150 snapshots
- +

Snapshot Title

@@ -288,7 +287,7 @@ parentzone_downloader/
- + @@ -350,4 +349,4 @@ The system successfully addresses the original requirements: 4. ✅ Includes interactive features for browsing and searching 5. ✅ Supports flexible date ranges and filtering options -**Ready to use immediately for downloading and viewing ParentZone snapshots!** \ No newline at end of file +**Ready to use immediately for downloading and viewing ParentZone snapshots!** diff --git a/src/auth_manager.py b/src/auth_manager.py index 1a9e74d..e486559 100644 --- a/src/auth_manager.py +++ b/src/auth_manager.py @@ -8,7 +8,6 @@ and manages session tokens for API requests. import asyncio import aiohttp -import json import logging from typing import Optional, Dict, Any from urllib.parse import urljoin @@ -22,7 +21,7 @@ class AuthManager: Args: api_url: Base URL of the API """ - self.api_url = api_url.rstrip('/') + self.api_url = api_url.rstrip("/") self.login_url = urljoin(self.api_url, "/v1/auth/login") self.create_session_url = urljoin(self.api_url, "/v1/auth/create-session") self.session_token: Optional[str] = None @@ -34,18 +33,18 @@ class AuthManager: # Standard headers for login requests self.headers = { - 'accept': 'application/json, text/plain, */*', - 'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,ro;q=0.7', - 'content-type': 'application/json;charset=UTF-8', - 'origin': 'https://www.parentzone.me', - 'priority': 'u=1, i', - 'sec-ch-ua': '"Not;A=Brand";v="99", "Google Chrome";v="139", "Chromium";v="139"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"macOS"', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-site', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36' + "accept": "application/json, text/plain, */*", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8,ro;q=0.7", + "content-type": "application/json;charset=UTF-8", + "origin": "https://www.parentzone.me", + "priority": "u=1, i", + "sec-ch-ua": '"Not;A=Brand";v="99", "Google Chrome";v="139", "Chromium";v="139"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-site", + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", } async def login(self, email: str, password: str) -> bool: @@ -64,18 +63,13 @@ class AuthManager: self.logger.info(f"Attempting login for {email}") # Step 1: Login to get user accounts - login_data = { - "email": email, - "password": password - } + login_data = {"email": email, "password": password} timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: try: async with session.post( - self.login_url, - headers=self.headers, - json=login_data + self.login_url, headers=self.headers, json=login_data ) as response: self.logger.info(f"Login response status: {response.status}") @@ -89,20 +83,26 @@ class AuthManager: if isinstance(data, list) and len(data) > 0: # Use the first account first_account = data[0] - self.user_id = first_account.get('id') - self.user_name = first_account.get('name') - self.provider_name = first_account.get('providerName') + self.user_id = first_account.get("id") + self.user_name = first_account.get("name") + self.provider_name = first_account.get("providerName") - self.logger.info(f"Selected account: {self.user_name} at {self.provider_name} (ID: {self.user_id})") + self.logger.info( + f"Selected account: {self.user_name} at {self.provider_name} (ID: {self.user_id})" + ) # Step 2: Create session with the account ID return await self._create_session(password) else: - self.logger.error(f"Unexpected login response format: {data}") + self.logger.error( + f"Unexpected login response format: {data}" + ) return False else: error_text = await response.text() - self.logger.error(f"Login failed with status {response.status}: {error_text}") + self.logger.error( + f"Login failed with status {response.status}: {error_text}" + ) return False except Exception as e: @@ -125,24 +125,21 @@ class AuthManager: self.logger.info(f"Creating session for user ID: {self.user_id}") - session_data = { - "id": self.user_id, - "password": password - } + session_data = {"id": self.user_id, "password": password} # Add x-api-product header for session creation session_headers = self.headers.copy() - session_headers['x-api-product'] = 'iConnect' + session_headers["x-api-product"] = "iConnect" timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: try: async with session.post( - self.create_session_url, - headers=session_headers, - json=session_data + self.create_session_url, headers=session_headers, json=session_data ) as response: - self.logger.info(f"Create session response status: {response.status}") + self.logger.info( + f"Create session response status: {response.status}" + ) if response.status == 200: data = await response.json() @@ -150,16 +147,20 @@ class AuthManager: self.logger.debug(f"Session response data: {data}") # Extract API key from response - if isinstance(data, dict) and 'key' in data: - self.api_key = data['key'] + if isinstance(data, dict) and "key" in data: + self.api_key = data["key"] self.logger.info("API key obtained successfully") return True else: - self.logger.error(f"No 'key' field in session response: {data}") + self.logger.error( + f"No 'key' field in session response: {data}" + ) return False else: error_text = await response.text() - self.logger.error(f"Session creation failed with status {response.status}: {error_text}") + self.logger.error( + f"Session creation failed with status {response.status}: {error_text}" + ) return False except Exception as e: @@ -177,8 +178,8 @@ class AuthManager: if self.api_key: # Use x-api-key header for authenticated requests - headers['x-api-key'] = self.api_key - headers['x-api-product'] = 'iConnect' + headers["x-api-key"] = self.api_key + headers["x-api-product"] = "iConnect" return headers @@ -216,7 +217,11 @@ async def test_login(): print("✅ Login successful!") print(f"User: {auth_manager.user_name} at {auth_manager.provider_name}") print(f"User ID: {auth_manager.user_id}") - print(f"API Key: {auth_manager.api_key[:20]}..." if auth_manager.api_key else "No API key found") + print( + f"API Key: {auth_manager.api_key[:20]}..." + if auth_manager.api_key + else "No API key found" + ) # Test getting auth headers headers = auth_manager.get_auth_headers() diff --git a/src/image_downloader.py b/src/image_downloader.py index c978285..9aa7014 100644 --- a/src/image_downloader.py +++ b/src/image_downloader.py @@ -12,17 +12,16 @@ Usage: import argparse import asyncio -import aiohttp -import aiofiles -import os -import json import logging -from pathlib import Path -from urllib.parse import urljoin, urlparse -from typing import List, Dict, Any, Optional +import os import time +from pathlib import Path +from typing import Any, Dict, List +from urllib.parse import urljoin, urlparse + +import aiofiles +import aiohttp from tqdm import tqdm -import hashlib # Import the auth manager and asset tracker try: @@ -341,8 +340,8 @@ class ImageDownloader: # Set file modification time to match the updated timestamp if "updated" in asset: try: - from datetime import datetime import os + from datetime import datetime # Parse the ISO timestamp updated_time = datetime.fromisoformat( diff --git a/src/snapshot_downloader.py b/src/snapshot_downloader.py index 5c8e82c..967f9a7 100644 --- a/src/snapshot_downloader.py +++ b/src/snapshot_downloader.py @@ -8,16 +8,16 @@ and generates a comprehensive markup file containing all the snapshot informatio import argparse import asyncio -import aiohttp +import html import json import logging -import os from datetime import datetime, timedelta from pathlib import Path -from typing import List, Dict, Any, Optional -from urllib.parse import urlencode, urljoin -import html +from typing import Any, Dict, List, Optional +from urllib.parse import urlencode + import aiofiles +import aiohttp # Import the auth manager try: @@ -132,12 +132,11 @@ class SnapshotDownloader: async def fetch_snapshots_page( self, session: aiohttp.ClientSession, - type_ids: List[int] = [15], + type_ids: list[int] = [15], date_from: str = "2021-10-18", - date_to: str = None, + date_to: str = "", cursor: str = None, - per_page: int = 100, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Fetch a single page of snapshots from the API using cursor-based pagination. @@ -147,12 +146,11 @@ class SnapshotDownloader: date_from: Start date in YYYY-MM-DD format date_to: End date in YYYY-MM-DD format cursor: Cursor for pagination (None for first page) - per_page: Number of items per page Returns: Dictionary containing the API response """ - if date_to is None: + if date_to == "": date_to = datetime.now().strftime("%Y-%m-%d") # Build query parameters @@ -167,7 +165,7 @@ class SnapshotDownloader: # Add type IDs - API expects typeIDs[]=15 format for type_id in type_ids: - params[f"typeIDs[]"] = type_id + params["typeIDs[]"] = type_id # Build URL with parameters query_string = urlencode(params, doseq=True) @@ -198,13 +196,12 @@ class SnapshotDownloader: if ( len(data.get("posts", [])) <= 3 ): # Only print full data if few posts - print(f"Full Response Data:") + print("Full Response Data:") print(json.dumps(data, indent=2, default=str)) print("=" * 50) # The API returns snapshots in 'posts' field snapshots = data.get("posts", []) - cursor_value = data.get("cursor") page_info = f"cursor: {cursor[:20]}..." if cursor else "first page" self.logger.info(f"Retrieved {len(snapshots)} snapshots ({page_info})") @@ -394,11 +391,11 @@ class SnapshotDownloader: image_name = html.escape(image.get("fileName", "Image")) if local_path: - media_html += f'
\n' + media_html += '
\n' media_html += f' {image_name}\n' media_html += f'

{image_name}

\n' media_html += f'

Updated: {self.format_date(image.get("updated", ""))}

\n' - media_html += f"
\n" + media_html += "
\n" else: # Fallback to API URL if download failed image_url = ( @@ -407,13 +404,13 @@ class SnapshotDownloader: else "" ) if image_url: - media_html += f'
\n' + media_html += '
\n' media_html += f' {image_name}\n' media_html += ( f'

{image_name} (online)

\n' ) media_html += f'

Updated: {self.format_date(image.get("updated", ""))}

\n' - media_html += f"
\n" + media_html += "
\n" media_html += "\n\n" @@ -469,11 +466,11 @@ class SnapshotDownloader: if value: if isinstance(value, list): value = ", ".join(str(v) for v in value) - metadata_html += f'
\n' + metadata_html += '
\n' metadata_html += ( f" {label}: {html.escape(str(value))}\n" ) - metadata_html += f"
\n" + metadata_html += "
\n" # Raw JSON data (collapsed by default) metadata_html += '
\n' @@ -1230,7 +1227,7 @@ Examples: if html_file: print(f"\n✅ Success! Snapshots downloaded and saved to: {html_file}") - print(f"📁 Open the file in your browser to view the snapshots") + print("📁 Open the file in your browser to view the snapshots") else: print("⚠️ No snapshots were found for the specified period") diff --git a/tests/test_snapshot_downloader.py b/tests/test_snapshot_downloader.py index e004443..44fd8fc 100644 --- a/tests/test_snapshot_downloader.py +++ b/tests/test_snapshot_downloader.py @@ -222,7 +222,6 @@ class SnapshotDownloaderTester: date_from="2024-01-01", date_to="2024-01-31", page=1, - per_page=100, ) except Exception as e: # Expected - we just want to capture the URL @@ -339,7 +338,7 @@ class SnapshotDownloaderTester: print(" ✅ HTML file created") # Check file content - with open(html_file, 'r', encoding='utf-8') as f: + with open(html_file, "r", encoding="utf-8") as f: content = f.read() if "" in content: @@ -514,9 +513,7 @@ class SnapshotDownloaderTester: # Override the fetch_snapshots_page method to use our mock original_method = downloader.fetch_snapshots_page - async def mock_fetch_page( - session, type_ids, date_from, date_to, page, per_page - ): + async def mock_fetch_page(session, type_ids, date_from, date_to, page): response_data = mock_session.pages[page - 1] mock_session.call_count += 1 downloader.stats["pages_fetched"] += 1 @@ -601,6 +598,7 @@ class SnapshotDownloaderTester: except Exception as e: print(f"\n❌ TEST SUITE FAILED: {e}") import traceback + traceback.print_exc() return False @@ -660,7 +658,7 @@ def main(): # Setup logging logging.basicConfig( level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) tester = SnapshotDownloaderTester()