17
Dockerfile
@@ -13,19 +13,18 @@ COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application files
|
||||
COPY *.py ./
|
||||
COPY *config.json ./
|
||||
COPY src/ ./src/
|
||||
COPY config/ ./config/
|
||||
|
||||
# Create output directories
|
||||
RUN mkdir -p /app/snapshots /app/logs
|
||||
RUN mkdir -p /app/data/snapshots /app/data/logs
|
||||
|
||||
# Copy scripts
|
||||
COPY scheduler.sh ./
|
||||
COPY startup.sh ./
|
||||
RUN chmod +x scheduler.sh startup.sh
|
||||
COPY scripts/ ./scripts/
|
||||
RUN chmod +x scripts/*.sh
|
||||
|
||||
# Copy cron configuration
|
||||
COPY crontab /etc/cron.d/parentzone-downloader
|
||||
COPY scripts/crontab /etc/cron.d/parentzone-downloader
|
||||
RUN chmod 0644 /etc/cron.d/parentzone-downloader
|
||||
RUN crontab /etc/cron.d/parentzone-downloader
|
||||
|
||||
@@ -40,7 +39,7 @@ ENV PYTHONPATH=/app
|
||||
EXPOSE 8080
|
||||
|
||||
# Expose volume for persistent data
|
||||
VOLUME ["/app/snapshots", "/app/logs", "/app/parentzone_images"]
|
||||
VOLUME ["/app/data/snapshots", "/app/data/logs", "/app/data/parentzone_images"]
|
||||
|
||||
# Start all services using startup script
|
||||
CMD ["./startup.sh"]
|
||||
CMD ["./scripts/startup.sh"]
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"api_url": "https://api.parentzone.me",
|
||||
"output_dir": "snapshots",
|
||||
"api_key": "YOUR_API_KEY_HERE",
|
||||
"email": "your-email@example.com",
|
||||
"password": "your-password",
|
||||
"date_from": "2021-01-01",
|
||||
"date_to": null,
|
||||
"type_ids": [15],
|
||||
"max_pages": null,
|
||||
"debug_mode": false
|
||||
}
|
||||
@@ -13,17 +13,16 @@ services:
|
||||
# Timezone for cron scheduling
|
||||
- TZ=${TZ:-UTC}
|
||||
volumes:
|
||||
# Persistent storage for snapshots and logs
|
||||
- ./snapshots:/app/snapshots
|
||||
- ./logs:/app/logs
|
||||
# Mount your config file
|
||||
- ./config.json:/app/config.json:ro
|
||||
# Persistent storage for data
|
||||
- ./data/snapshots:/app/data/snapshots
|
||||
- ./data/logs:/app/data/logs
|
||||
- ./data/parentzone_images:/app/data/parentzone_images
|
||||
# Mount configuration directory
|
||||
- ./config:/app/config:ro
|
||||
ports:
|
||||
- "8080:8080"
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
snapshots:
|
||||
driver: local
|
||||
logs:
|
||||
data:
|
||||
driver: local
|
||||
|
||||
242
docs/README.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# Image Downloader Script
|
||||
|
||||
A Python script to download images from a REST API that provides endpoints for listing assets and downloading them in full resolution.
|
||||
|
||||
## Features
|
||||
|
||||
- **Concurrent Downloads**: Download multiple images simultaneously for better performance
|
||||
- **Error Handling**: Robust error handling with detailed logging
|
||||
- **Progress Tracking**: Real-time progress bar with download statistics
|
||||
- **Resume Support**: Skip already downloaded files
|
||||
- **Flexible API Integration**: Supports various API response formats
|
||||
- **Filename Sanitization**: Automatically handles invalid characters in filenames
|
||||
- **File Timestamps**: Preserves original file modification dates from API
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone or download this repository
|
||||
2. Install the required dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
python image_downloader.py \
|
||||
--api-url "https://api.example.com" \
|
||||
--list-endpoint "/assets" \
|
||||
--download-endpoint "/download" \
|
||||
--output-dir "./images" \
|
||||
--api-key "your_api_key_here"
|
||||
```
|
||||
|
||||
### Advanced Usage
|
||||
|
||||
```bash
|
||||
python image_downloader.py \
|
||||
--api-url "https://api.example.com" \
|
||||
--list-endpoint "/assets" \
|
||||
--download-endpoint "/download" \
|
||||
--output-dir "./images" \
|
||||
--max-concurrent 10 \
|
||||
--timeout 60 \
|
||||
--api-key "your_api_key_here"
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `--api-url`: Base URL of the API (required)
|
||||
- `--list-endpoint`: Endpoint to get the list of assets (required)
|
||||
- `--download-endpoint`: Endpoint to download individual assets (required)
|
||||
- `--output-dir`: Directory to save downloaded images (required)
|
||||
- `--max-concurrent`: Maximum number of concurrent downloads (default: 5)
|
||||
- `--timeout`: Request timeout in seconds (default: 30)
|
||||
- `--api-key`: API key for authentication (x-api-key header)
|
||||
- `--email`: Email for login authentication
|
||||
- `--password`: Password for login authentication
|
||||
|
||||
## Authentication
|
||||
|
||||
The script supports two authentication methods:
|
||||
|
||||
### API Key Authentication
|
||||
- Uses `x-api-key` header for list endpoint
|
||||
- Uses `key` parameter for download endpoint
|
||||
- Configure with `--api-key` parameter or `api_key` in config file
|
||||
|
||||
### Login Authentication
|
||||
- Performs login to `/v1/auth/login` endpoint
|
||||
- Uses session token for list endpoint
|
||||
- Uses `key` parameter for download endpoint
|
||||
- Configure with `--email` and `--password` parameters or in config file
|
||||
|
||||
**Note**: Only one authentication method should be used at a time. API key takes precedence over login credentials.
|
||||
|
||||
## API Integration
|
||||
|
||||
The script is designed to work with REST APIs that follow these patterns:
|
||||
|
||||
### List Endpoint
|
||||
The list endpoint should return a JSON response with asset information. The script supports these common formats:
|
||||
|
||||
```json
|
||||
// Array of assets
|
||||
[
|
||||
{"id": "1", "filename": "image1.jpg", "url": "..."},
|
||||
{"id": "2", "filename": "image2.png", "url": "..."}
|
||||
]
|
||||
|
||||
// Object with data array
|
||||
{
|
||||
"data": [
|
||||
{"id": "1", "filename": "image1.jpg"},
|
||||
{"id": "2", "filename": "image2.png"}
|
||||
]
|
||||
}
|
||||
|
||||
// Object with results array
|
||||
{
|
||||
"results": [
|
||||
{"id": "1", "filename": "image1.jpg"},
|
||||
{"id": "2", "filename": "image2.png"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Download Endpoint
|
||||
The download endpoint should accept an asset ID and return the image file. Common patterns:
|
||||
|
||||
- `GET /download/{asset_id}`
|
||||
- `GET /assets/{asset_id}/download`
|
||||
- `GET /images/{asset_id}`
|
||||
|
||||
**ParentZone API Format:**
|
||||
- `GET /v1/media/{asset_id}/full?key={api_key}&u={updated_timestamp}`
|
||||
|
||||
### Asset Object Fields
|
||||
|
||||
The script looks for these fields in asset objects:
|
||||
|
||||
**Required for identification:**
|
||||
- `id`, `asset_id`, `image_id`, `file_id`, `uuid`, or `key`
|
||||
|
||||
**Optional for better filenames:**
|
||||
- `fileName`: Preferred filename (ParentZone API)
|
||||
- `filename`: Alternative filename field
|
||||
- `name`: Alternative name
|
||||
- `title`: Display title
|
||||
- `mimeType`: MIME type for proper file extension (ParentZone API)
|
||||
- `content_type`: Alternative MIME type field
|
||||
|
||||
**Required for ParentZone API downloads:**
|
||||
- `updated`: Timestamp used in download URL parameter and file modification time
|
||||
|
||||
## Examples
|
||||
|
||||
### Example 1: ParentZone API with API Key
|
||||
```bash
|
||||
python image_downloader.py \
|
||||
--api-url "https://api.parentzone.me" \
|
||||
--list-endpoint "/v1/gallery" \
|
||||
--download-endpoint "/v1/media" \
|
||||
--output-dir "./parentzone_images" \
|
||||
--api-key "your_api_key_here"
|
||||
```
|
||||
|
||||
### Example 2: ParentZone API with Login
|
||||
```bash
|
||||
python image_downloader.py \
|
||||
--api-url "https://api.parentzone.me" \
|
||||
--list-endpoint "/v1/gallery" \
|
||||
--download-endpoint "/v1/media" \
|
||||
--output-dir "./parentzone_images" \
|
||||
--email "your_email@example.com" \
|
||||
--password "your_password_here"
|
||||
```
|
||||
|
||||
### Example 2: API with Authentication
|
||||
The script now supports API key authentication via the `--api-key` parameter. For other authentication methods, you can modify the script to include custom headers:
|
||||
|
||||
```python
|
||||
# In the get_asset_list method, add headers:
|
||||
headers = {
|
||||
'Authorization': 'Bearer your_token_here',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
async with session.get(url, headers=headers, timeout=self.timeout) as response:
|
||||
```
|
||||
|
||||
### Example 3: Custom Response Format
|
||||
If your API returns a different format, you can modify the `get_asset_list` method:
|
||||
|
||||
```python
|
||||
# For API that returns: {"images": [...]}
|
||||
if 'images' in data:
|
||||
assets = data['images']
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
The script creates:
|
||||
|
||||
1. **Downloaded Images**: All images are saved to the specified output directory with original modification timestamps
|
||||
2. **Log File**: `download.log` in the output directory with detailed information
|
||||
3. **Progress Display**: Real-time progress bar showing:
|
||||
- Total assets
|
||||
- Successfully downloaded
|
||||
- Failed downloads
|
||||
- Skipped files (already exist)
|
||||
|
||||
### File Timestamps
|
||||
|
||||
The downloader automatically sets the file modification time to match the `updated` timestamp from the API response. This preserves the original file dates and helps with:
|
||||
|
||||
- **File Organization**: Files are sorted by their original creation/update dates
|
||||
- **Backup Systems**: Backup tools can properly identify changed files
|
||||
- **Media Libraries**: Media management software can display correct dates
|
||||
- **Data Integrity**: Maintains the temporal relationship between files
|
||||
|
||||
## Error Handling
|
||||
|
||||
The script handles various error scenarios:
|
||||
|
||||
- **Network Errors**: Retries and continues with other downloads
|
||||
- **Invalid Responses**: Logs errors and continues
|
||||
- **File System Errors**: Creates directories and handles permission issues
|
||||
- **API Errors**: Logs HTTP errors and continues
|
||||
|
||||
## Performance
|
||||
|
||||
- **Concurrent Downloads**: Configurable concurrency (default: 5)
|
||||
- **Connection Pooling**: Efficient HTTP connection reuse
|
||||
- **Chunked Downloads**: Memory-efficient large file handling
|
||||
- **Progress Tracking**: Real-time feedback on download progress
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **"No assets found"**: Check your list endpoint URL and response format
|
||||
2. **"Failed to fetch asset list"**: Verify API URL and network connectivity
|
||||
3. **"Content type is not an image"**: API might be returning JSON instead of image data
|
||||
4. **Permission errors**: Check write permissions for the output directory
|
||||
|
||||
### Debug Mode
|
||||
|
||||
For detailed debugging, you can modify the logging level:
|
||||
|
||||
```python
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This script is provided as-is for educational and personal use.
|
||||
|
||||
## Contributing
|
||||
|
||||
Feel free to submit issues and enhancement requests!
|
||||
@@ -12,12 +12,12 @@ import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Add the current directory to the path so we can import modules
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
# Add the parent directory to the path so we can import modules
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from auth_manager import AuthManager
|
||||
from asset_tracker import AssetTracker
|
||||
from image_downloader import ImageDownloader
|
||||
from src.auth_manager import AuthManager
|
||||
from src.asset_tracker import AssetTracker
|
||||
from src.image_downloader import ImageDownloader
|
||||
|
||||
|
||||
async def demo_asset_tracking():
|
||||
@@ -32,8 +32,7 @@ async def demo_asset_tracking():
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -78,7 +77,7 @@ async def demo_asset_tracking():
|
||||
email=email,
|
||||
password=password,
|
||||
track_assets=True,
|
||||
max_concurrent=3
|
||||
max_concurrent=3,
|
||||
)
|
||||
|
||||
# First download run
|
||||
@@ -104,12 +103,7 @@ async def demo_asset_tracking():
|
||||
print("Running again - should detect no new assets...")
|
||||
|
||||
# Reset stats for second run
|
||||
downloader.stats = {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0
|
||||
}
|
||||
downloader.stats = {"total": 0, "successful": 0, "failed": 0, "skipped": 0}
|
||||
|
||||
# Second download run
|
||||
await downloader.download_all_assets()
|
||||
@@ -118,7 +112,7 @@ async def demo_asset_tracking():
|
||||
print(f" Assets to download: {downloader.stats['total']}")
|
||||
print(f" New downloads: {downloader.stats['successful']}")
|
||||
|
||||
if downloader.stats['total'] == 0:
|
||||
if downloader.stats["total"] == 0:
|
||||
print(" ✅ Perfect! No new assets found - all are up to date!")
|
||||
else:
|
||||
print(f" Downloaded: {downloader.stats['successful']}")
|
||||
@@ -149,6 +143,7 @@ async def demo_asset_tracking():
|
||||
except Exception as e:
|
||||
print(f"❌ Demo failed with error: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
@@ -177,7 +172,7 @@ def show_usage():
|
||||
|
||||
async def main():
|
||||
"""Main function."""
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ['--help', '-h']:
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ["--help", "-h"]:
|
||||
show_usage()
|
||||
return 0
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# ParentZone Downloaders Cron Schedule
|
||||
# Run both downloaders daily at 2:00 AM
|
||||
0 2 * * * /app/scheduler.sh >> /var/log/cron.log 2>&1
|
||||
0 2 * * * /app/scripts/scheduler.sh >> /var/log/cron.log 2>&1
|
||||
|
||||
# Keep cron log file from growing too large (weekly cleanup)
|
||||
0 3 * * 0 find /var/log -name "cron.log" -size +100M -exec truncate -s 50M {} \; 2>/dev/null || true
|
||||
|
||||
# Cleanup old snapshot files (keep last 90 days)
|
||||
30 3 * * 0 find /app/snapshots -name "*.html" -mtime +90 -delete 2>/dev/null || true
|
||||
30 3 * * 0 find /app/data/snapshots -name "*.html" -mtime +90 -delete 2>/dev/null || true
|
||||
@@ -3,10 +3,10 @@
|
||||
# ParentZone Downloaders Daily Scheduler
|
||||
# This script runs both the config downloader and snapshot downloader
|
||||
|
||||
LOG_DIR="/app/logs"
|
||||
LOG_DIR="/app/data/logs"
|
||||
LOG_FILE="$LOG_DIR/scheduler_$(date +%Y%m%d).log"
|
||||
SNAPSHOT_CONFIG_FILE="/app/snapshot_config.json"
|
||||
ASSET_CONFIG_FILE="/app/parentzone_config.json"
|
||||
SNAPSHOT_CONFIG_FILE="/app/config/snapshot_config.json"
|
||||
ASSET_CONFIG_FILE="/app/config/parentzone_config.json"
|
||||
|
||||
# Create log directory if it doesn't exist
|
||||
mkdir -p "$LOG_DIR"
|
||||
@@ -54,7 +54,7 @@ cd /app
|
||||
|
||||
# Run config-based asset downloader
|
||||
if [ "$SKIP_ASSET_DOWNLOADER" = false ]; then
|
||||
run_with_logging "python3 config_downloader.py --config $ASSET_CONFIG_FILE" "Config Asset Downloader"
|
||||
run_with_logging "python3 src/config_downloader.py --config $ASSET_CONFIG_FILE" "Config Asset Downloader"
|
||||
asset_result=$?
|
||||
else
|
||||
log_message "SKIPPED: Config Asset Downloader (configuration file not found)"
|
||||
@@ -62,15 +62,15 @@ else
|
||||
fi
|
||||
|
||||
# Run config-based snapshot downloader
|
||||
run_with_logging "python3 config_snapshot_downloader.py --config $SNAPSHOT_CONFIG_FILE" "Config Snapshot Downloader"
|
||||
run_with_logging "python3 src/config_snapshot_downloader.py --config $SNAPSHOT_CONFIG_FILE" "Config Snapshot Downloader"
|
||||
config_result=$?
|
||||
|
||||
# Run regular snapshot downloader with environment variables
|
||||
if [ -n "$API_KEY" ]; then
|
||||
run_with_logging "python3 snapshot_downloader.py --api-key $API_KEY --output-dir snapshots" "Snapshot Downloader (API Key)"
|
||||
run_with_logging "python3 src/snapshot_downloader.py --api-key $API_KEY --output-dir data/snapshots" "Snapshot Downloader (API Key)"
|
||||
snapshot_result=$?
|
||||
elif [ -n "$EMAIL" ] && [ -n "$PASSWORD" ]; then
|
||||
run_with_logging "python3 snapshot_downloader.py --email $EMAIL --password $PASSWORD --output-dir snapshots" "Snapshot Downloader (Email/Password)"
|
||||
run_with_logging "python3 src/snapshot_downloader.py --email $EMAIL --password $PASSWORD --output-dir data/snapshots" "Snapshot Downloader (Email/Password)"
|
||||
snapshot_result=$?
|
||||
else
|
||||
log_message "WARNING: No authentication method provided via environment variables, skipping direct snapshot downloader"
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
set -e
|
||||
|
||||
LOG_DIR="/app/logs"
|
||||
SNAPSHOTS_DIR="/app/snapshots"
|
||||
LOG_DIR="/app/data/logs"
|
||||
SNAPSHOTS_DIR="/app/data/snapshots"
|
||||
|
||||
# Create directories if they don't exist
|
||||
mkdir -p "$LOG_DIR"
|
||||
@@ -30,7 +30,7 @@ log_message "Cron daemon started"
|
||||
|
||||
# Start web server in the background
|
||||
log_message "Starting web server on port 8080..."
|
||||
python3 webserver.py --host 0.0.0.0 --port 8080 --snapshots-dir "$SNAPSHOTS_DIR" &
|
||||
python3 src/webserver.py --host 0.0.0.0 --port 8080 --snapshots-dir "$SNAPSHOTS_DIR" &
|
||||
WEB_SERVER_PID=$!
|
||||
|
||||
log_message "Web server started with PID: $WEB_SERVER_PID"
|
||||
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 2.0 MiB |
|
Before Width: | Height: | Size: 845 KiB |
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 744 KiB |
|
Before Width: | Height: | Size: 773 KiB |
|
Before Width: | Height: | Size: 892 KiB |
|
Before Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 775 KiB |
|
Before Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 973 KiB |
|
Before Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 1.8 MiB |
|
Before Width: | Height: | Size: 1000 KiB |
|
Before Width: | Height: | Size: 900 KiB |
|
Before Width: | Height: | Size: 818 KiB |
|
Before Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 555 KiB |
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 891 KiB |
|
Before Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 679 KiB |
|
Before Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 809 KiB |
@@ -1,162 +0,0 @@
|
||||
2025-09-05 22:23:50,764 - INFO - Starting snapshot download with configuration
|
||||
2025-09-05 22:23:50,764 - INFO - Date range: 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:23:50,764 - INFO - Type IDs: [15]
|
||||
2025-09-05 22:23:50,764 - INFO - Output directory: ./snapshots_test
|
||||
2025-09-05 22:23:50,764 - INFO - Max pages limit: 2
|
||||
2025-09-05 22:23:50,764 - INFO - Starting snapshot download for period 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:23:50,764 - INFO - Attempting login authentication...
|
||||
2025-09-05 22:23:50,765 - INFO - Attempting login for tudor.sitaru@gmail.com
|
||||
2025-09-05 22:23:51,594 - INFO - Login response status: 200
|
||||
2025-09-05 22:23:51,594 - INFO - Login successful
|
||||
2025-09-05 22:23:51,594 - INFO - Selected account: Tudor Sitaru at Noddy's Nursery School (ID: e518bd01-e516-4b3c-aefa-bcb369823a2e)
|
||||
2025-09-05 22:23:51,594 - INFO - Creating session for user ID: e518bd01-e516-4b3c-aefa-bcb369823a2e
|
||||
2025-09-05 22:23:51,994 - INFO - Create session response status: 200
|
||||
2025-09-05 22:23:51,995 - INFO - Session creation successful
|
||||
2025-09-05 22:23:51,995 - INFO - API key obtained successfully
|
||||
2025-09-05 22:23:51,996 - INFO - Login authentication successful
|
||||
2025-09-05 22:23:51,996 - INFO - Starting snapshot fetch from 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:23:51,996 - INFO - Fetching snapshots (first page): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&typeIDs%5B%5D=15
|
||||
2025-09-05 22:23:52,398 - INFO - Retrieved 25 snapshots (first page)
|
||||
2025-09-05 22:23:52,398 - INFO - Page 1: 25 snapshots (total: 25)
|
||||
2025-09-05 22:23:52,399 - INFO - Fetching snapshots (cursor: eyJsYXN0SUQiOjIzODE4...): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&cursor=eyJsYXN0SUQiOjIzODE4NTcsImxhc3RTdGFydFRpbWUiOiIyMDI0LTEwLTIzVDE0OjEyOjAwIn0%3D&typeIDs%5B%5D=15
|
||||
2025-09-05 22:23:52,708 - INFO - Retrieved 25 snapshots (cursor: eyJsYXN0SUQiOjIzODE4...)
|
||||
2025-09-05 22:23:52,708 - INFO - Page 2: 25 snapshots (total: 50)
|
||||
2025-09-05 22:23:52,708 - INFO - Reached maximum pages limit: 2
|
||||
2025-09-05 22:23:52,708 - INFO - Total snapshots fetched: 50
|
||||
2025-09-05 22:23:52,715 - INFO - Generated HTML file: snapshots_test/snapshots_2021-10-18_to_2025-09-05.html
|
||||
2025-09-05 22:42:28,035 - INFO - Starting snapshot download with configuration
|
||||
2025-09-05 22:42:28,035 - INFO - Date range: 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:42:28,036 - INFO - Type IDs: [15]
|
||||
2025-09-05 22:42:28,036 - INFO - Output directory: ./snapshots_test
|
||||
2025-09-05 22:42:28,036 - INFO - Max pages limit: 2
|
||||
2025-09-05 22:42:28,036 - INFO - Starting snapshot download for period 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:42:28,036 - INFO - Attempting login authentication...
|
||||
2025-09-05 22:42:28,036 - INFO - Attempting login for tudor.sitaru@gmail.com
|
||||
2025-09-05 22:42:28,783 - INFO - Login response status: 200
|
||||
2025-09-05 22:42:28,783 - INFO - Login successful
|
||||
2025-09-05 22:42:28,783 - INFO - Selected account: Tudor Sitaru at Noddy's Nursery School (ID: e518bd01-e516-4b3c-aefa-bcb369823a2e)
|
||||
2025-09-05 22:42:28,783 - INFO - Creating session for user ID: e518bd01-e516-4b3c-aefa-bcb369823a2e
|
||||
2025-09-05 22:42:29,171 - INFO - Create session response status: 200
|
||||
2025-09-05 22:42:29,172 - INFO - Session creation successful
|
||||
2025-09-05 22:42:29,172 - INFO - API key obtained successfully
|
||||
2025-09-05 22:42:29,173 - INFO - Login authentication successful
|
||||
2025-09-05 22:42:29,173 - INFO - Starting snapshot fetch from 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:42:29,173 - INFO - Fetching snapshots (first page): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&typeIDs%5B%5D=15
|
||||
2025-09-05 22:42:29,705 - INFO - Retrieved 25 snapshots (first page)
|
||||
2025-09-05 22:42:29,706 - INFO - Page 1: 25 snapshots (total: 25)
|
||||
2025-09-05 22:42:29,706 - INFO - Fetching snapshots (cursor: eyJsYXN0SUQiOjIzODE4...): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&cursor=eyJsYXN0SUQiOjIzODE4NTcsImxhc3RTdGFydFRpbWUiOiIyMDI0LTEwLTIzVDE0OjEyOjAwIn0%3D&typeIDs%5B%5D=15
|
||||
2025-09-05 22:42:30,033 - INFO - Retrieved 25 snapshots (cursor: eyJsYXN0SUQiOjIzODE4...)
|
||||
2025-09-05 22:42:30,034 - INFO - Page 2: 25 snapshots (total: 50)
|
||||
2025-09-05 22:42:30,034 - INFO - Reached maximum pages limit: 2
|
||||
2025-09-05 22:42:30,034 - INFO - Total snapshots fetched: 50
|
||||
2025-09-05 22:42:30,039 - INFO - Generated HTML file: snapshots_test/snapshots_2021-10-18_to_2025-09-05.html
|
||||
2025-09-05 22:49:12,928 - INFO - Starting snapshot download with configuration
|
||||
2025-09-05 22:49:12,928 - INFO - Date range: 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:49:12,928 - INFO - Type IDs: [15]
|
||||
2025-09-05 22:49:12,928 - INFO - Output directory: ./snapshots_test
|
||||
2025-09-05 22:49:12,928 - INFO - Max pages limit: 2
|
||||
2025-09-05 22:49:12,928 - INFO - Starting snapshot download for period 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:49:12,929 - INFO - Attempting login authentication...
|
||||
2025-09-05 22:49:12,929 - INFO - Attempting login for tudor.sitaru@gmail.com
|
||||
2025-09-05 22:49:13,677 - INFO - Login response status: 200
|
||||
2025-09-05 22:49:13,678 - INFO - Login successful
|
||||
2025-09-05 22:49:13,678 - INFO - Selected account: Tudor Sitaru at Noddy's Nursery School (ID: e518bd01-e516-4b3c-aefa-bcb369823a2e)
|
||||
2025-09-05 22:49:13,678 - INFO - Creating session for user ID: e518bd01-e516-4b3c-aefa-bcb369823a2e
|
||||
2025-09-05 22:49:14,082 - INFO - Create session response status: 200
|
||||
2025-09-05 22:49:14,083 - INFO - Session creation successful
|
||||
2025-09-05 22:49:14,083 - INFO - API key obtained successfully
|
||||
2025-09-05 22:49:14,084 - INFO - Login authentication successful
|
||||
2025-09-05 22:49:14,085 - INFO - Starting snapshot fetch from 2021-10-18 to 2025-09-05
|
||||
2025-09-05 22:49:14,085 - INFO - Fetching snapshots (first page): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&typeIDs%5B%5D=15
|
||||
2025-09-05 22:49:14,512 - INFO - Retrieved 25 snapshots (first page)
|
||||
2025-09-05 22:49:14,512 - INFO - Page 1: 25 snapshots (total: 25)
|
||||
2025-09-05 22:49:14,512 - INFO - Fetching snapshots (cursor: eyJsYXN0SUQiOjIzODE4...): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&cursor=eyJsYXN0SUQiOjIzODE4NTcsImxhc3RTdGFydFRpbWUiOiIyMDI0LTEwLTIzVDE0OjEyOjAwIn0%3D&typeIDs%5B%5D=15
|
||||
2025-09-05 22:49:14,754 - INFO - Retrieved 25 snapshots (cursor: eyJsYXN0SUQiOjIzODE4...)
|
||||
2025-09-05 22:49:14,754 - INFO - Page 2: 25 snapshots (total: 50)
|
||||
2025-09-05 22:49:14,754 - INFO - Reached maximum pages limit: 2
|
||||
2025-09-05 22:49:14,754 - INFO - Total snapshots fetched: 50
|
||||
2025-09-05 22:49:14,758 - INFO - Generated HTML file: snapshots_test/snapshots_2021-10-18_to_2025-09-05.html
|
||||
2025-09-05 23:02:05,096 - INFO - Starting snapshot download with configuration
|
||||
2025-09-05 23:02:05,097 - INFO - Date range: 2021-10-18 to 2025-09-05
|
||||
2025-09-05 23:02:05,097 - INFO - Type IDs: [15]
|
||||
2025-09-05 23:02:05,097 - INFO - Output directory: ./snapshots_test
|
||||
2025-09-05 23:02:05,097 - INFO - Max pages limit: 2
|
||||
2025-09-05 23:02:05,097 - INFO - Starting snapshot download for period 2021-10-18 to 2025-09-05
|
||||
2025-09-05 23:02:05,097 - INFO - Attempting login authentication...
|
||||
2025-09-05 23:02:05,097 - INFO - Attempting login for tudor.sitaru@gmail.com
|
||||
2025-09-05 23:02:05,767 - INFO - Login response status: 200
|
||||
2025-09-05 23:02:05,767 - INFO - Login successful
|
||||
2025-09-05 23:02:05,767 - INFO - Selected account: Tudor Sitaru at Noddy's Nursery School (ID: e518bd01-e516-4b3c-aefa-bcb369823a2e)
|
||||
2025-09-05 23:02:05,767 - INFO - Creating session for user ID: e518bd01-e516-4b3c-aefa-bcb369823a2e
|
||||
2025-09-05 23:02:06,174 - INFO - Create session response status: 200
|
||||
2025-09-05 23:02:06,175 - INFO - Session creation successful
|
||||
2025-09-05 23:02:06,175 - INFO - API key obtained successfully
|
||||
2025-09-05 23:02:06,176 - INFO - Login authentication successful
|
||||
2025-09-05 23:02:06,176 - INFO - Starting snapshot fetch from 2021-10-18 to 2025-09-05
|
||||
2025-09-05 23:02:06,176 - INFO - Fetching snapshots (first page): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&typeIDs%5B%5D=15
|
||||
2025-09-05 23:02:06,600 - INFO - Retrieved 25 snapshots (first page)
|
||||
2025-09-05 23:02:06,600 - INFO - Page 1: 25 snapshots (total: 25)
|
||||
2025-09-05 23:02:06,600 - INFO - Fetching snapshots (cursor: eyJsYXN0SUQiOjIzODE4...): https://api.parentzone.me/v1/posts?dateFrom=2021-10-18&dateTo=2025-09-05&cursor=eyJsYXN0SUQiOjIzODE4NTcsImxhc3RTdGFydFRpbWUiOiIyMDI0LTEwLTIzVDE0OjEyOjAwIn0%3D&typeIDs%5B%5D=15
|
||||
2025-09-05 23:02:06,997 - INFO - Retrieved 25 snapshots (cursor: eyJsYXN0SUQiOjIzODE4...)
|
||||
2025-09-05 23:02:06,997 - INFO - Page 2: 25 snapshots (total: 50)
|
||||
2025-09-05 23:02:06,998 - INFO - Reached maximum pages limit: 2
|
||||
2025-09-05 23:02:06,998 - INFO - Total snapshots fetched: 50
|
||||
2025-09-05 23:02:06,998 - INFO - Attempting login authentication...
|
||||
2025-09-05 23:02:06,998 - INFO - Attempting login for tudor.sitaru@gmail.com
|
||||
2025-09-05 23:02:07,608 - INFO - Login response status: 200
|
||||
2025-09-05 23:02:07,608 - INFO - Login successful
|
||||
2025-09-05 23:02:07,608 - INFO - Selected account: Tudor Sitaru at Noddy's Nursery School (ID: e518bd01-e516-4b3c-aefa-bcb369823a2e)
|
||||
2025-09-05 23:02:07,608 - INFO - Creating session for user ID: e518bd01-e516-4b3c-aefa-bcb369823a2e
|
||||
2025-09-05 23:02:07,895 - INFO - Create session response status: 200
|
||||
2025-09-05 23:02:07,896 - INFO - Session creation successful
|
||||
2025-09-05 23:02:07,896 - INFO - API key obtained successfully
|
||||
2025-09-05 23:02:07,897 - INFO - Login authentication successful
|
||||
2025-09-05 23:02:07,897 - INFO - Downloading media file: DCC724DD-0E3C-445D-BB6A-628C355533F2.jpeg
|
||||
2025-09-05 23:02:08,250 - INFO - Successfully downloaded media: DCC724DD-0E3C-445D-BB6A-628C355533F2.jpeg
|
||||
2025-09-05 23:02:08,251 - INFO - Downloading media file: e4e51387-1fee-4129-bd47-e49523b26697.jpeg
|
||||
2025-09-05 23:02:08,445 - INFO - Successfully downloaded media: e4e51387-1fee-4129-bd47-e49523b26697.jpeg
|
||||
2025-09-05 23:02:08,447 - INFO - Downloading media file: 7ED768A6-16A7-480A-B238-34B1DB87BDE6.jpeg
|
||||
2025-09-05 23:02:08,700 - INFO - Successfully downloaded media: 7ED768A6-16A7-480A-B238-34B1DB87BDE6.jpeg
|
||||
2025-09-05 23:02:08,700 - INFO - Downloading media file: 6CE82D8D-FAE8-4CD3-987F-A9F0BDD57919.jpeg
|
||||
2025-09-05 23:02:09,026 - INFO - Successfully downloaded media: 6CE82D8D-FAE8-4CD3-987F-A9F0BDD57919.jpeg
|
||||
2025-09-05 23:02:09,026 - INFO - Downloading media file: 04F440B5-549B-48E5-A480-4CEB0B649834.jpeg
|
||||
2025-09-05 23:02:09,402 - INFO - Successfully downloaded media: 04F440B5-549B-48E5-A480-4CEB0B649834.jpeg
|
||||
2025-09-05 23:02:09,403 - INFO - Downloading media file: AB2FE0B6-0932-4179-A3AE-933E05FA8519.jpeg
|
||||
2025-09-05 23:02:09,861 - INFO - Successfully downloaded media: AB2FE0B6-0932-4179-A3AE-933E05FA8519.jpeg
|
||||
2025-09-05 23:02:09,861 - INFO - Downloading media file: 466557B6-6ED0-4750-BA37-EC6DF92CB18B.jpeg
|
||||
2025-09-05 23:02:10,242 - INFO - Successfully downloaded media: 466557B6-6ED0-4750-BA37-EC6DF92CB18B.jpeg
|
||||
2025-09-05 23:02:10,243 - INFO - Downloading media file: 7268DAC2-8275-47DA-8A0D-FA659F850C31.jpeg
|
||||
2025-09-05 23:02:10,510 - INFO - Successfully downloaded media: 7268DAC2-8275-47DA-8A0D-FA659F850C31.jpeg
|
||||
2025-09-05 23:02:10,511 - INFO - Downloading media file: 692E5DAF-0D7B-433F-AA94-75CC265F1A59.jpeg
|
||||
2025-09-05 23:02:10,815 - INFO - Successfully downloaded media: 692E5DAF-0D7B-433F-AA94-75CC265F1A59.jpeg
|
||||
2025-09-05 23:02:10,815 - INFO - Downloading media file: CCE3933F-84FD-4A6D-987A-77993183A054.jpeg
|
||||
2025-09-05 23:02:11,036 - INFO - Successfully downloaded media: CCE3933F-84FD-4A6D-987A-77993183A054.jpeg
|
||||
2025-09-05 23:02:11,036 - INFO - Downloading media file: 2A5EE1D8-A113-43F8-9416-316287DE3E8F.jpeg
|
||||
2025-09-05 23:02:11,243 - INFO - Successfully downloaded media: 2A5EE1D8-A113-43F8-9416-316287DE3E8F.jpeg
|
||||
2025-09-05 23:02:11,243 - INFO - Downloading media file: 80702FD5-DF2C-4EC3-948C-70EBAE7C4BFF.jpeg
|
||||
2025-09-05 23:02:11,460 - INFO - Successfully downloaded media: 80702FD5-DF2C-4EC3-948C-70EBAE7C4BFF.jpeg
|
||||
2025-09-05 23:02:11,460 - INFO - Downloading media file: 1BC2789D-99B7-4CC5-84F3-AEA1F0CB39B2.jpeg
|
||||
2025-09-05 23:02:11,727 - INFO - Successfully downloaded media: 1BC2789D-99B7-4CC5-84F3-AEA1F0CB39B2.jpeg
|
||||
2025-09-05 23:02:11,728 - INFO - Downloading media file: BA2B3A67-356C-4D22-9FA2-2CF2040EC080.jpeg
|
||||
2025-09-05 23:02:11,969 - INFO - Successfully downloaded media: BA2B3A67-356C-4D22-9FA2-2CF2040EC080.jpeg
|
||||
2025-09-05 23:02:11,969 - INFO - Downloading media file: F3411311-E3CE-4A74-84CB-372DA00F80B7.jpeg
|
||||
2025-09-05 23:02:12,233 - INFO - Successfully downloaded media: F3411311-E3CE-4A74-84CB-372DA00F80B7.jpeg
|
||||
2025-09-05 23:02:12,233 - INFO - Downloading media file: 1715613184982FE8C3F62-2F0C-4A43-8F57-864F5BA9E112.jpeg.jpg
|
||||
2025-09-05 23:02:12,448 - INFO - Successfully downloaded media: 1715613184982FE8C3F62-2F0C-4A43-8F57-864F5BA9E112.jpeg.jpg
|
||||
2025-09-05 23:02:12,448 - INFO - Downloading media file: 171561318498211415BA1-6E38-4D1C-8962-8ED04199856D.jpeg.jpg
|
||||
2025-09-05 23:02:12,675 - INFO - Successfully downloaded media: 171561318498211415BA1-6E38-4D1C-8962-8ED04199856D.jpeg.jpg
|
||||
2025-09-05 23:02:12,676 - INFO - Downloading media file: 07B7B911-58C7-4998-BBDE-A773351854D5.jpeg
|
||||
2025-09-05 23:02:13,209 - INFO - Successfully downloaded media: 07B7B911-58C7-4998-BBDE-A773351854D5.jpeg
|
||||
2025-09-05 23:02:13,209 - INFO - Downloading media file: 1073B5D1-D162-4D78-8135-45447BA04CAB.jpeg
|
||||
2025-09-05 23:02:14,432 - INFO - Successfully downloaded media: 1073B5D1-D162-4D78-8135-45447BA04CAB.jpeg
|
||||
2025-09-05 23:02:14,433 - INFO - Downloading media file: 25E15BAA-58B3-47C8-BEC9-D777ED71A0AB.jpeg
|
||||
2025-09-05 23:02:14,707 - INFO - Successfully downloaded media: 25E15BAA-58B3-47C8-BEC9-D777ED71A0AB.jpeg
|
||||
2025-09-05 23:02:14,707 - INFO - Downloading media file: C959CBD6-A829-43AB-87CF-732269921ADB.jpeg
|
||||
2025-09-05 23:02:15,058 - INFO - Successfully downloaded media: C959CBD6-A829-43AB-87CF-732269921ADB.jpeg
|
||||
2025-09-05 23:02:15,058 - INFO - Downloading media file: 045D878D-47E3-4EB5-B9DB-36B9B63299E9.jpeg
|
||||
2025-09-05 23:02:15,349 - INFO - Successfully downloaded media: 045D878D-47E3-4EB5-B9DB-36B9B63299E9.jpeg
|
||||
2025-09-05 23:02:15,350 - INFO - Downloading media file: 6BC18F39-5C1A-43FB-AD64-0D5AB616A292.jpeg
|
||||
2025-09-05 23:02:15,634 - INFO - Successfully downloaded media: 6BC18F39-5C1A-43FB-AD64-0D5AB616A292.jpeg
|
||||
2025-09-05 23:02:15,635 - INFO - Downloading media file: D827391F-6BB7-4F61-B315-FB791E5ADC2F.jpeg
|
||||
2025-09-05 23:02:15,918 - INFO - Successfully downloaded media: D827391F-6BB7-4F61-B315-FB791E5ADC2F.jpeg
|
||||
2025-09-05 23:02:15,920 - INFO - Generated HTML file: snapshots_test/snapshots_2021-10-18_to_2025-09-05.html
|
||||
44
src/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
ParentZone Downloader - Source Package
|
||||
|
||||
This package contains the core application modules for the ParentZone Downloader.
|
||||
|
||||
Modules:
|
||||
- asset_tracker: Track downloaded assets to avoid re-downloads
|
||||
- auth_manager: Handle authentication with ParentZone API
|
||||
- config_downloader: Configuration-based image downloader
|
||||
- config_snapshot_downloader: Configuration-based snapshot downloader
|
||||
- image_downloader: Download images from ParentZone API
|
||||
- snapshot_downloader: Download snapshots from ParentZone API
|
||||
- webserver: Web server to serve downloaded snapshots
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "ParentZone Downloader Team"
|
||||
|
||||
# Import main classes for easier access
|
||||
try:
|
||||
from .asset_tracker import AssetTracker
|
||||
from .auth_manager import AuthManager
|
||||
from .config_downloader import ConfigImageDownloader
|
||||
from .config_snapshot_downloader import ConfigSnapshotDownloader
|
||||
from .image_downloader import ImageDownloader
|
||||
from .snapshot_downloader import SnapshotDownloader
|
||||
from .webserver import SnapshotsWebServer
|
||||
|
||||
__all__ = [
|
||||
"AssetTracker",
|
||||
"AuthManager",
|
||||
"ConfigImageDownloader",
|
||||
"ConfigSnapshotDownloader",
|
||||
"ImageDownloader",
|
||||
"SnapshotDownloader",
|
||||
"SnapshotsWebServer",
|
||||
]
|
||||
|
||||
except ImportError as e:
|
||||
# Handle case where dependencies might not be available
|
||||
__all__ = []
|
||||
import warnings
|
||||
|
||||
warnings.warn(f"Some modules could not be imported: {e}")
|
||||
@@ -24,12 +24,12 @@ from tqdm import tqdm
|
||||
|
||||
# Import the auth manager and asset tracker
|
||||
try:
|
||||
from auth_manager import AuthManager
|
||||
from src.auth_manager import AuthManager
|
||||
except ImportError:
|
||||
AuthManager = None
|
||||
|
||||
try:
|
||||
from asset_tracker import AssetTracker
|
||||
from src.asset_tracker import AssetTracker
|
||||
except ImportError:
|
||||
AssetTracker = None
|
||||
|
||||
@@ -453,8 +453,8 @@ Examples:
|
||||
python config_downloader.py --config config.json
|
||||
|
||||
# Create a config file first:
|
||||
cp config_example.json my_config.json
|
||||
# Edit my_config.json with your API details
|
||||
cp config/config_example.json config/my_config.json
|
||||
# Edit config/my_config.json with your API details
|
||||
python config_downloader.py --config my_config.json
|
||||
""",
|
||||
)
|
||||
@@ -16,9 +16,11 @@ from pathlib import Path
|
||||
|
||||
# Import the snapshot downloader
|
||||
try:
|
||||
from snapshot_downloader import SnapshotDownloader
|
||||
from src.snapshot_downloader import SnapshotDownloader
|
||||
except ImportError:
|
||||
print("Error: snapshot_downloader.py not found. Please ensure it's in the same directory.")
|
||||
print(
|
||||
"Error: snapshot_downloader.py not found. Please ensure it's in the same directory."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
|
||||
@@ -35,38 +37,47 @@ class ConfigSnapshotDownloader:
|
||||
|
||||
# Create the underlying snapshot downloader
|
||||
self.downloader = SnapshotDownloader(
|
||||
api_url=self.config.get('api_url', 'https://api.parentzone.me'),
|
||||
output_dir=self.config.get('output_dir', 'snapshots'),
|
||||
api_key=self.config.get('api_key'),
|
||||
email=self.config.get('email'),
|
||||
password=self.config.get('password')
|
||||
api_url=self.config.get("api_url", "https://api.parentzone.me"),
|
||||
output_dir=self.config.get("output_dir", "snapshots"),
|
||||
api_key=self.config.get("api_key"),
|
||||
email=self.config.get("email"),
|
||||
password=self.config.get("password"),
|
||||
)
|
||||
|
||||
def load_config(self, config_file: str) -> dict:
|
||||
"""Load configuration from JSON file."""
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
with open(config_file, "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Validate required authentication
|
||||
has_api_key = 'api_key' in config and config['api_key']
|
||||
has_credentials = 'email' in config and 'password' in config and config['email'] and config['password']
|
||||
has_api_key = "api_key" in config and config["api_key"]
|
||||
has_credentials = (
|
||||
"email" in config
|
||||
and "password" in config
|
||||
and config["email"]
|
||||
and config["password"]
|
||||
)
|
||||
|
||||
if not has_api_key and not has_credentials:
|
||||
raise ValueError("Either 'api_key' or both 'email' and 'password' must be provided in config")
|
||||
raise ValueError(
|
||||
"Either 'api_key' or both 'email' and 'password' must be provided in config"
|
||||
)
|
||||
|
||||
# Set defaults for optional fields
|
||||
config.setdefault('api_url', 'https://api.parentzone.me')
|
||||
config.setdefault('output_dir', 'snapshots')
|
||||
config.setdefault('type_ids', [15])
|
||||
config.setdefault('max_pages', None)
|
||||
config.setdefault("api_url", "https://api.parentzone.me")
|
||||
config.setdefault("output_dir", "snapshots")
|
||||
config.setdefault("type_ids", [15])
|
||||
config.setdefault("max_pages", None)
|
||||
|
||||
# Set default date range (last year) if not specified
|
||||
if 'date_from' not in config or not config['date_from']:
|
||||
config['date_from'] = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
|
||||
if "date_from" not in config or not config["date_from"]:
|
||||
config["date_from"] = (datetime.now() - timedelta(days=365)).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
|
||||
if 'date_to' not in config or not config['date_to']:
|
||||
config['date_to'] = datetime.now().strftime("%Y-%m-%d")
|
||||
if "date_to" not in config or not config["date_to"]:
|
||||
config["date_to"] = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
return config
|
||||
|
||||
@@ -77,17 +88,14 @@ class ConfigSnapshotDownloader:
|
||||
|
||||
def setup_logging(self):
|
||||
"""Setup logging configuration."""
|
||||
output_dir = Path(self.config['output_dir'])
|
||||
output_dir = Path(self.config["output_dir"])
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
log_file = output_dir / 'snapshots.log'
|
||||
log_file = output_dir / "snapshots.log"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(log_file),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -99,19 +107,21 @@ class ConfigSnapshotDownloader:
|
||||
Path to the generated HTML file
|
||||
"""
|
||||
self.logger.info("Starting snapshot download with configuration")
|
||||
self.logger.info(f"Date range: {self.config['date_from']} to {self.config['date_to']}")
|
||||
self.logger.info(
|
||||
f"Date range: {self.config['date_from']} to {self.config['date_to']}"
|
||||
)
|
||||
self.logger.info(f"Type IDs: {self.config['type_ids']}")
|
||||
self.logger.info(f"Output directory: {self.config['output_dir']}")
|
||||
|
||||
if self.config.get('max_pages'):
|
||||
if self.config.get("max_pages"):
|
||||
self.logger.info(f"Max pages limit: {self.config['max_pages']}")
|
||||
|
||||
try:
|
||||
html_file = await self.downloader.download_snapshots(
|
||||
type_ids=self.config['type_ids'],
|
||||
date_from=self.config['date_from'],
|
||||
date_to=self.config['date_to'],
|
||||
max_pages=self.config.get('max_pages')
|
||||
type_ids=self.config["type_ids"],
|
||||
date_from=self.config["date_from"],
|
||||
date_to=self.config["date_to"],
|
||||
max_pages=self.config.get("max_pages"),
|
||||
)
|
||||
|
||||
return html_file
|
||||
@@ -131,10 +141,10 @@ class ConfigSnapshotDownloader:
|
||||
print(f"Date To: {self.config['date_to']}")
|
||||
print(f"Type IDs: {self.config['type_ids']}")
|
||||
|
||||
auth_method = "API Key" if self.config.get('api_key') else "Email/Password"
|
||||
auth_method = "API Key" if self.config.get("api_key") else "Email/Password"
|
||||
print(f"Authentication: {auth_method}")
|
||||
|
||||
if self.config.get('max_pages'):
|
||||
if self.config.get("max_pages"):
|
||||
print(f"Max Pages: {self.config['max_pages']}")
|
||||
|
||||
print("=" * 60)
|
||||
@@ -151,11 +161,11 @@ def create_example_config():
|
||||
"max_pages": null,
|
||||
"api_key": "your-api-key-here",
|
||||
"email": "your-email@example.com",
|
||||
"password": "your-password-here"
|
||||
"password": "your-password-here",
|
||||
}
|
||||
|
||||
config_file = Path("snapshot_config_example.json")
|
||||
with open(config_file, 'w') as f:
|
||||
with open(config_file, "w") as f:
|
||||
json.dump(example_config, f, indent=2)
|
||||
|
||||
print(f"✅ Example configuration created: {config_file}")
|
||||
@@ -196,30 +206,27 @@ Notes:
|
||||
- 'date_from' and 'date_to' default to last year if not specified
|
||||
- 'type_ids' defaults to [15] (snapshot type)
|
||||
- 'max_pages' limits pages fetched (useful for testing)
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--config", help="Path to the JSON configuration file")
|
||||
|
||||
parser.add_argument(
|
||||
"--create-example",
|
||||
action="store_true",
|
||||
help="Create an example configuration file and exit",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--config',
|
||||
help='Path to the JSON configuration file'
|
||||
"--show-config",
|
||||
action="store_true",
|
||||
help="Show configuration summary before downloading",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--create-example',
|
||||
action='store_true',
|
||||
help='Create an example configuration file and exit'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--show-config',
|
||||
action='store_true',
|
||||
help='Show configuration summary before downloading'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
action='store_true',
|
||||
help='Enable debug mode with detailed server response logging'
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Enable debug mode with detailed server response logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -26,20 +26,30 @@ import hashlib
|
||||
|
||||
# Import the auth manager and asset tracker
|
||||
try:
|
||||
from auth_manager import AuthManager
|
||||
from src.auth_manager import AuthManager
|
||||
except ImportError:
|
||||
AuthManager = None
|
||||
|
||||
try:
|
||||
from asset_tracker import AssetTracker
|
||||
from src.asset_tracker import AssetTracker
|
||||
except ImportError:
|
||||
AssetTracker = None
|
||||
|
||||
|
||||
class ImageDownloader:
|
||||
def __init__(self, api_url: str, list_endpoint: str, download_endpoint: str,
|
||||
output_dir: str, max_concurrent: int = 5, timeout: int = 30, api_key: str = None,
|
||||
email: str = None, password: str = None, track_assets: bool = True):
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str,
|
||||
list_endpoint: str,
|
||||
download_endpoint: str,
|
||||
output_dir: str,
|
||||
max_concurrent: int = 5,
|
||||
timeout: int = 30,
|
||||
api_key: str = None,
|
||||
email: str = None,
|
||||
password: str = None,
|
||||
track_assets: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the image downloader.
|
||||
|
||||
@@ -55,9 +65,9 @@ class ImageDownloader:
|
||||
password: Password for login authentication
|
||||
track_assets: Whether to enable asset tracking to avoid re-downloads
|
||||
"""
|
||||
self.api_url = api_url.rstrip('/')
|
||||
self.list_endpoint = list_endpoint.lstrip('/')
|
||||
self.download_endpoint = download_endpoint.lstrip('/')
|
||||
self.api_url = api_url.rstrip("/")
|
||||
self.list_endpoint = list_endpoint.lstrip("/")
|
||||
self.download_endpoint = download_endpoint.lstrip("/")
|
||||
self.output_dir = Path(output_dir)
|
||||
self.max_concurrent = max_concurrent
|
||||
self.timeout = timeout
|
||||
@@ -72,11 +82,11 @@ class ImageDownloader:
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(self.output_dir / 'download.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
logging.FileHandler(self.output_dir / "download.log"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -86,17 +96,14 @@ class ImageDownloader:
|
||||
self.asset_tracker = AssetTracker(storage_dir=str(self.output_dir))
|
||||
self.logger.info("Asset tracking enabled")
|
||||
elif track_assets:
|
||||
self.logger.warning("Asset tracking requested but AssetTracker not available")
|
||||
self.logger.warning(
|
||||
"Asset tracking requested but AssetTracker not available"
|
||||
)
|
||||
else:
|
||||
self.logger.info("Asset tracking disabled")
|
||||
|
||||
# Track download statistics
|
||||
self.stats = {
|
||||
'total': 0,
|
||||
'successful': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0
|
||||
}
|
||||
self.stats = {"total": 0, "successful": 0, "failed": 0, "skipped": 0}
|
||||
|
||||
async def authenticate(self):
|
||||
"""Perform login authentication if credentials are provided."""
|
||||
@@ -111,10 +118,16 @@ class ImageDownloader:
|
||||
self.logger.error("Login authentication failed")
|
||||
raise Exception("Login authentication failed")
|
||||
elif self.email or self.password:
|
||||
self.logger.warning("Both email and password must be provided for login authentication")
|
||||
raise Exception("Both email and password must be provided for login authentication")
|
||||
self.logger.warning(
|
||||
"Both email and password must be provided for login authentication"
|
||||
)
|
||||
raise Exception(
|
||||
"Both email and password must be provided for login authentication"
|
||||
)
|
||||
|
||||
async def get_asset_list(self, session: aiohttp.ClientSession) -> List[Dict[str, Any]]:
|
||||
async def get_asset_list(
|
||||
self, session: aiohttp.ClientSession
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch the list of assets from the API.
|
||||
|
||||
@@ -132,13 +145,15 @@ class ImageDownloader:
|
||||
|
||||
# Use API key if provided
|
||||
if self.api_key:
|
||||
headers['x-api-key'] = self.api_key
|
||||
headers["x-api-key"] = self.api_key
|
||||
|
||||
# Use login authentication if provided
|
||||
elif self.auth_manager and self.auth_manager.is_authenticated():
|
||||
headers.update(self.auth_manager.get_auth_headers())
|
||||
|
||||
async with session.get(url, headers=headers, timeout=self.timeout) as response:
|
||||
async with session.get(
|
||||
url, headers=headers, timeout=self.timeout
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
data = await response.json()
|
||||
|
||||
@@ -147,12 +162,12 @@ class ImageDownloader:
|
||||
assets = data
|
||||
elif isinstance(data, dict):
|
||||
# Common patterns for API responses
|
||||
if 'data' in data:
|
||||
assets = data['data']
|
||||
elif 'results' in data:
|
||||
assets = data['results']
|
||||
elif 'items' in data:
|
||||
assets = data['items']
|
||||
if "data" in data:
|
||||
assets = data["data"]
|
||||
elif "results" in data:
|
||||
assets = data["results"]
|
||||
elif "items" in data:
|
||||
assets = data["items"]
|
||||
else:
|
||||
assets = [data] # Single asset
|
||||
else:
|
||||
@@ -179,7 +194,7 @@ class ImageDownloader:
|
||||
asset_id = None
|
||||
|
||||
# Common field names for asset identifiers
|
||||
id_fields = ['id', 'asset_id', 'image_id', 'file_id', 'uuid', 'key']
|
||||
id_fields = ["id", "asset_id", "image_id", "file_id", "uuid", "key"]
|
||||
for field in id_fields:
|
||||
if field in asset:
|
||||
asset_id = asset[field]
|
||||
@@ -192,12 +207,11 @@ class ImageDownloader:
|
||||
# Build download URL with required parameters
|
||||
from urllib.parse import urlencode
|
||||
|
||||
params = {
|
||||
'key': self.api_key,
|
||||
'u': asset.get('updated', '')
|
||||
}
|
||||
params = {"key": self.api_key, "u": asset.get("updated", "")}
|
||||
|
||||
download_url = urljoin(self.api_url, f"/v1/media/{asset_id}/full?{urlencode(params)}")
|
||||
download_url = urljoin(
|
||||
self.api_url, f"/v1/media/{asset_id}/full?{urlencode(params)}"
|
||||
)
|
||||
return download_url
|
||||
|
||||
def get_filename(self, asset: Dict[str, Any], url: str) -> str:
|
||||
@@ -212,27 +226,27 @@ class ImageDownloader:
|
||||
Filename for the asset
|
||||
"""
|
||||
# Try to get filename from asset metadata
|
||||
if 'fileName' in asset:
|
||||
filename = asset['fileName']
|
||||
elif 'filename' in asset:
|
||||
filename = asset['filename']
|
||||
elif 'name' in asset:
|
||||
filename = asset['name']
|
||||
elif 'title' in asset:
|
||||
filename = asset['title']
|
||||
if "fileName" in asset:
|
||||
filename = asset["fileName"]
|
||||
elif "filename" in asset:
|
||||
filename = asset["filename"]
|
||||
elif "name" in asset:
|
||||
filename = asset["name"]
|
||||
elif "title" in asset:
|
||||
filename = asset["title"]
|
||||
else:
|
||||
# Extract filename from URL
|
||||
parsed_url = urlparse(url)
|
||||
filename = os.path.basename(parsed_url.path)
|
||||
|
||||
# If no extension, try to get it from content-type or add default
|
||||
if '.' not in filename:
|
||||
if 'mimeType' in asset:
|
||||
ext = self._get_extension_from_mime(asset['mimeType'])
|
||||
elif 'content_type' in asset:
|
||||
ext = self._get_extension_from_mime(asset['content_type'])
|
||||
if "." not in filename:
|
||||
if "mimeType" in asset:
|
||||
ext = self._get_extension_from_mime(asset["mimeType"])
|
||||
elif "content_type" in asset:
|
||||
ext = self._get_extension_from_mime(asset["content_type"])
|
||||
else:
|
||||
ext = '.jpg' # Default extension
|
||||
ext = ".jpg" # Default extension
|
||||
filename += ext
|
||||
|
||||
# Sanitize filename
|
||||
@@ -251,35 +265,39 @@ class ImageDownloader:
|
||||
def _get_extension_from_mime(self, mime_type: str) -> str:
|
||||
"""Get file extension from MIME type."""
|
||||
mime_to_ext = {
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
'image/png': '.png',
|
||||
'image/gif': '.gif',
|
||||
'image/webp': '.webp',
|
||||
'image/bmp': '.bmp',
|
||||
'image/tiff': '.tiff',
|
||||
'image/svg+xml': '.svg'
|
||||
"image/jpeg": ".jpg",
|
||||
"image/jpg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/bmp": ".bmp",
|
||||
"image/tiff": ".tiff",
|
||||
"image/svg+xml": ".svg",
|
||||
}
|
||||
return mime_to_ext.get(mime_type.lower(), '.jpg')
|
||||
return mime_to_ext.get(mime_type.lower(), ".jpg")
|
||||
|
||||
def _sanitize_filename(self, filename: str) -> str:
|
||||
"""Sanitize filename by removing invalid characters."""
|
||||
# Remove or replace invalid characters
|
||||
invalid_chars = '<>:"/\\|?*'
|
||||
for char in invalid_chars:
|
||||
filename = filename.replace(char, '_')
|
||||
filename = filename.replace(char, "_")
|
||||
|
||||
# Remove leading/trailing spaces and dots
|
||||
filename = filename.strip('. ')
|
||||
filename = filename.strip(". ")
|
||||
|
||||
# Ensure filename is not empty
|
||||
if not filename:
|
||||
filename = 'image'
|
||||
filename = "image"
|
||||
|
||||
return filename
|
||||
|
||||
async def download_asset(self, session: aiohttp.ClientSession, asset: Dict[str, Any],
|
||||
semaphore: asyncio.Semaphore) -> bool:
|
||||
async def download_asset(
|
||||
self,
|
||||
session: aiohttp.ClientSession,
|
||||
asset: Dict[str, Any],
|
||||
semaphore: asyncio.Semaphore,
|
||||
) -> bool:
|
||||
"""
|
||||
Download a single asset.
|
||||
|
||||
@@ -300,7 +318,7 @@ class ImageDownloader:
|
||||
# Check if file already exists and we're not tracking assets
|
||||
if filepath.exists() and not self.asset_tracker:
|
||||
self.logger.info(f"Skipping {filename} (already exists)")
|
||||
self.stats['skipped'] += 1
|
||||
self.stats["skipped"] += 1
|
||||
return True
|
||||
|
||||
self.logger.info(f"Downloading {filename} from {download_url}")
|
||||
@@ -309,34 +327,46 @@ class ImageDownloader:
|
||||
response.raise_for_status()
|
||||
|
||||
# Get content type to verify it's an image
|
||||
content_type = response.headers.get('content-type', '')
|
||||
if not content_type.startswith('image/'):
|
||||
self.logger.warning(f"Content type is not an image: {content_type}")
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if not content_type.startswith("image/"):
|
||||
self.logger.warning(
|
||||
f"Content type is not an image: {content_type}"
|
||||
)
|
||||
|
||||
# Download the file
|
||||
async with aiofiles.open(filepath, 'wb') as f:
|
||||
async with aiofiles.open(filepath, "wb") as f:
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
await f.write(chunk)
|
||||
|
||||
# Set file modification time to match the updated timestamp
|
||||
if 'updated' in asset:
|
||||
if "updated" in asset:
|
||||
try:
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
# Parse the ISO timestamp
|
||||
updated_time = datetime.fromisoformat(asset['updated'].replace('Z', '+00:00'))
|
||||
updated_time = datetime.fromisoformat(
|
||||
asset["updated"].replace("Z", "+00:00")
|
||||
)
|
||||
# Set file modification time
|
||||
os.utime(filepath, (updated_time.timestamp(), updated_time.timestamp()))
|
||||
self.logger.info(f"Set file modification time to {asset['updated']}")
|
||||
os.utime(
|
||||
filepath,
|
||||
(updated_time.timestamp(), updated_time.timestamp()),
|
||||
)
|
||||
self.logger.info(
|
||||
f"Set file modification time to {asset['updated']}"
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to set file modification time: {e}")
|
||||
self.logger.warning(
|
||||
f"Failed to set file modification time: {e}"
|
||||
)
|
||||
|
||||
# Mark asset as downloaded in tracker
|
||||
if self.asset_tracker:
|
||||
self.asset_tracker.mark_asset_downloaded(asset, filepath, True)
|
||||
|
||||
self.logger.info(f"Successfully downloaded {filename}")
|
||||
self.stats['successful'] += 1
|
||||
self.stats["successful"] += 1
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -347,8 +377,10 @@ class ImageDownloader:
|
||||
filepath = self.output_dir / filename
|
||||
self.asset_tracker.mark_asset_downloaded(asset, filepath, False)
|
||||
|
||||
self.logger.error(f"Failed to download asset {asset.get('id', 'unknown')}: {e}")
|
||||
self.stats['failed'] += 1
|
||||
self.logger.error(
|
||||
f"Failed to download asset {asset.get('id', 'unknown')}: {e}"
|
||||
)
|
||||
self.stats["failed"] += 1
|
||||
return False
|
||||
|
||||
async def download_all_assets(self, force_redownload: bool = False):
|
||||
@@ -364,7 +396,9 @@ class ImageDownloader:
|
||||
connector = aiohttp.TCPConnector(limit=100, limit_per_host=30)
|
||||
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
||||
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
connector=connector, timeout=timeout
|
||||
) as session:
|
||||
try:
|
||||
# Perform authentication if needed
|
||||
await self.authenticate()
|
||||
@@ -380,24 +414,27 @@ class ImageDownloader:
|
||||
# Filter for new/modified assets if tracking is enabled
|
||||
if self.asset_tracker and not force_redownload:
|
||||
assets = self.asset_tracker.get_new_assets(all_assets)
|
||||
self.logger.info(f"Found {len(assets)} new/modified assets to download")
|
||||
self.logger.info(
|
||||
f"Found {len(assets)} new/modified assets to download"
|
||||
)
|
||||
if len(assets) == 0:
|
||||
self.logger.info("All assets are up to date!")
|
||||
return
|
||||
else:
|
||||
assets = all_assets
|
||||
if force_redownload:
|
||||
self.logger.info("Force redownload enabled - downloading all assets")
|
||||
self.logger.info(
|
||||
"Force redownload enabled - downloading all assets"
|
||||
)
|
||||
|
||||
self.stats['total'] = len(assets)
|
||||
self.stats["total"] = len(assets)
|
||||
|
||||
# Create semaphore to limit concurrent downloads
|
||||
semaphore = asyncio.Semaphore(self.max_concurrent)
|
||||
|
||||
# Create tasks for all downloads
|
||||
tasks = [
|
||||
self.download_asset(session, asset, semaphore)
|
||||
for asset in assets
|
||||
self.download_asset(session, asset, semaphore) for asset in assets
|
||||
]
|
||||
|
||||
# Download all assets with progress bar
|
||||
@@ -405,11 +442,13 @@ class ImageDownloader:
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
result = await coro
|
||||
pbar.update(1)
|
||||
pbar.set_postfix({
|
||||
'Success': self.stats['successful'],
|
||||
'Failed': self.stats['failed'],
|
||||
'Skipped': self.stats['skipped']
|
||||
})
|
||||
pbar.set_postfix(
|
||||
{
|
||||
"Success": self.stats["successful"],
|
||||
"Failed": self.stats["failed"],
|
||||
"Skipped": self.stats["skipped"],
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during download process: {e}")
|
||||
@@ -441,84 +480,75 @@ Examples:
|
||||
--output-dir "./images" \\
|
||||
--max-concurrent 10 \\
|
||||
--timeout 60
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--api-url',
|
||||
"--api-url",
|
||||
required=True,
|
||||
help='Base URL of the API (e.g., https://api.example.com)'
|
||||
help="Base URL of the API (e.g., https://api.example.com)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--list-endpoint',
|
||||
"--list-endpoint",
|
||||
required=True,
|
||||
help='Endpoint to get the list of assets (e.g., /assets or /images)'
|
||||
help="Endpoint to get the list of assets (e.g., /assets or /images)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--download-endpoint',
|
||||
"--download-endpoint",
|
||||
required=True,
|
||||
help='Endpoint to download individual assets (e.g., /download or /assets)'
|
||||
help="Endpoint to download individual assets (e.g., /download or /assets)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
required=True,
|
||||
help='Directory to save downloaded images'
|
||||
"--output-dir", required=True, help="Directory to save downloaded images"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--max-concurrent',
|
||||
"--max-concurrent",
|
||||
type=int,
|
||||
default=5,
|
||||
help='Maximum number of concurrent downloads (default: 5)'
|
||||
help="Maximum number of concurrent downloads (default: 5)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--timeout',
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=30,
|
||||
help='Request timeout in seconds (default: 30)'
|
||||
help="Request timeout in seconds (default: 30)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--api-key',
|
||||
help='API key for authentication (x-api-key header)'
|
||||
"--api-key", help="API key for authentication (x-api-key header)"
|
||||
)
|
||||
|
||||
parser.add_argument("--email", help="Email for login authentication")
|
||||
|
||||
parser.add_argument("--password", help="Password for login authentication")
|
||||
|
||||
parser.add_argument(
|
||||
"--no-tracking",
|
||||
action="store_true",
|
||||
help="Disable asset tracking (will re-download all assets)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--email',
|
||||
help='Email for login authentication'
|
||||
"--force-redownload",
|
||||
action="store_true",
|
||||
help="Force re-download of all assets, even if already tracked",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--password',
|
||||
help='Password for login authentication'
|
||||
"--show-stats",
|
||||
action="store_true",
|
||||
help="Show asset tracking statistics and exit",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--no-tracking',
|
||||
action='store_true',
|
||||
help='Disable asset tracking (will re-download all assets)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--force-redownload',
|
||||
action='store_true',
|
||||
help='Force re-download of all assets, even if already tracked'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--show-stats',
|
||||
action='store_true',
|
||||
help='Show asset tracking statistics and exit'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--cleanup',
|
||||
action='store_true',
|
||||
help='Clean up metadata for missing files and exit'
|
||||
"--cleanup",
|
||||
action="store_true",
|
||||
help="Clean up metadata for missing files and exit",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -546,11 +576,13 @@ Examples:
|
||||
api_key=args.api_key,
|
||||
email=args.email,
|
||||
password=args.password,
|
||||
track_assets=not args.no_tracking
|
||||
track_assets=not args.no_tracking,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(downloader.download_all_assets(force_redownload=args.force_redownload))
|
||||
asyncio.run(
|
||||
downloader.download_all_assets(force_redownload=args.force_redownload)
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("\nDownload interrupted by user")
|
||||
except Exception as e:
|
||||
@@ -21,15 +21,21 @@ import aiofiles
|
||||
|
||||
# Import the auth manager
|
||||
try:
|
||||
from auth_manager import AuthManager
|
||||
from src.auth_manager import AuthManager
|
||||
except ImportError:
|
||||
AuthManager = None
|
||||
|
||||
|
||||
class SnapshotDownloader:
|
||||
def __init__(self, api_url: str = "https://api.parentzone.me",
|
||||
output_dir: str = "snapshots", api_key: str = None,
|
||||
email: str = None, password: str = None, debug_mode: bool = False):
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str = "https://api.parentzone.me",
|
||||
output_dir: str = "snapshots",
|
||||
api_key: str = None,
|
||||
email: str = None,
|
||||
password: str = None,
|
||||
debug_mode: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize the snapshot downloader.
|
||||
|
||||
@@ -41,7 +47,7 @@ class SnapshotDownloader:
|
||||
password: Password for login authentication
|
||||
debug_mode: Enable detailed server response logging
|
||||
"""
|
||||
self.api_url = api_url.rstrip('/')
|
||||
self.api_url = api_url.rstrip("/")
|
||||
self.snapshots_endpoint = "/v1/posts"
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -62,39 +68,36 @@ class SnapshotDownloader:
|
||||
|
||||
# Standard headers based on the curl command
|
||||
self.headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,ro;q=0.7',
|
||||
'origin': 'https://parentzone.me',
|
||||
'priority': 'u=1, i',
|
||||
'sec-ch-ua': '"Not;A=Brand";v="99", "Google Chrome";v="139", "Chromium";v="139"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36',
|
||||
'x-client-version': '3.54.0'
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,ro;q=0.7",
|
||||
"origin": "https://parentzone.me",
|
||||
"priority": "u=1, i",
|
||||
"sec-ch-ua": '"Not;A=Brand";v="99", "Google Chrome";v="139", "Chromium";v="139"',
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": '"macOS"',
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-site",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
|
||||
"x-client-version": "3.54.0",
|
||||
}
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'total_snapshots': 0,
|
||||
'pages_fetched': 0,
|
||||
'failed_requests': 0,
|
||||
'generated_files': 0
|
||||
"total_snapshots": 0,
|
||||
"pages_fetched": 0,
|
||||
"failed_requests": 0,
|
||||
"generated_files": 0,
|
||||
}
|
||||
|
||||
def setup_logging(self):
|
||||
"""Setup logging configuration."""
|
||||
log_file = self.output_dir / 'snapshots.log'
|
||||
log_file = self.output_dir / "snapshots.log"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(log_file),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -109,8 +112,8 @@ class SnapshotDownloader:
|
||||
self.logger.info("Login authentication successful")
|
||||
# Use the API key from auth manager
|
||||
auth_headers = self.auth_manager.get_auth_headers()
|
||||
if 'x-api-key' in auth_headers:
|
||||
self.api_key = auth_headers['x-api-key']
|
||||
if "x-api-key" in auth_headers:
|
||||
self.api_key = auth_headers["x-api-key"]
|
||||
else:
|
||||
self.logger.error("Login authentication failed")
|
||||
raise Exception("Login authentication failed")
|
||||
@@ -120,18 +123,21 @@ class SnapshotDownloader:
|
||||
headers = self.headers.copy()
|
||||
|
||||
if self.api_key:
|
||||
headers['x-api-key'] = self.api_key
|
||||
headers["x-api-key"] = self.api_key
|
||||
elif self.auth_manager and self.auth_manager.is_authenticated():
|
||||
headers.update(self.auth_manager.get_auth_headers())
|
||||
|
||||
return headers
|
||||
|
||||
async def fetch_snapshots_page(self, session: aiohttp.ClientSession,
|
||||
type_ids: List[int] = [15],
|
||||
date_from: str = "2021-10-18",
|
||||
date_to: str = None,
|
||||
cursor: str = None,
|
||||
per_page: int = 100) -> Dict[str, Any]:
|
||||
async def fetch_snapshots_page(
|
||||
self,
|
||||
session: aiohttp.ClientSession,
|
||||
type_ids: List[int] = [15],
|
||||
date_from: str = "2021-10-18",
|
||||
date_to: str = None,
|
||||
cursor: str = None,
|
||||
per_page: int = 100,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch a single page of snapshots from the API using cursor-based pagination.
|
||||
|
||||
@@ -151,17 +157,17 @@ class SnapshotDownloader:
|
||||
|
||||
# Build query parameters
|
||||
params = {
|
||||
'dateFrom': date_from,
|
||||
'dateTo': date_to,
|
||||
"dateFrom": date_from,
|
||||
"dateTo": date_to,
|
||||
}
|
||||
|
||||
# Add cursor for pagination (skip for first request)
|
||||
if cursor:
|
||||
params['cursor'] = cursor
|
||||
params["cursor"] = cursor
|
||||
|
||||
# Add type IDs - API expects typeIDs[]=15 format
|
||||
for type_id in type_ids:
|
||||
params[f'typeIDs[]'] = type_id
|
||||
params[f"typeIDs[]"] = type_id
|
||||
|
||||
# Build URL with parameters
|
||||
query_string = urlencode(params, doseq=True)
|
||||
@@ -184,21 +190,25 @@ class SnapshotDownloader:
|
||||
print(f"Status Code: {response.status}")
|
||||
print(f"Headers: {dict(response.headers)}")
|
||||
print(f"Response Type: {type(data)}")
|
||||
print(f"Response Keys: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}")
|
||||
print(
|
||||
f"Response Keys: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}"
|
||||
)
|
||||
print(f"Posts count: {len(data.get('posts', []))}")
|
||||
print(f"Cursor: {data.get('cursor', 'None')}")
|
||||
if len(data.get('posts', [])) <= 3: # Only print full data if few posts
|
||||
if (
|
||||
len(data.get("posts", [])) <= 3
|
||||
): # Only print full data if few posts
|
||||
print(f"Full Response Data:")
|
||||
print(json.dumps(data, indent=2, default=str))
|
||||
print("=" * 50)
|
||||
|
||||
# The API returns snapshots in 'posts' field
|
||||
snapshots = data.get('posts', [])
|
||||
cursor_value = data.get('cursor')
|
||||
snapshots = data.get("posts", [])
|
||||
cursor_value = data.get("cursor")
|
||||
|
||||
page_info = f"cursor: {cursor[:20]}..." if cursor else "first page"
|
||||
self.logger.info(f"Retrieved {len(snapshots)} snapshots ({page_info})")
|
||||
self.stats['pages_fetched'] += 1
|
||||
self.stats["pages_fetched"] += 1
|
||||
|
||||
# Return the actual API response format
|
||||
return data
|
||||
@@ -206,14 +216,17 @@ class SnapshotDownloader:
|
||||
except Exception as e:
|
||||
page_info = f"cursor: {cursor[:20]}..." if cursor else "first page"
|
||||
self.logger.error(f"Failed to fetch snapshots ({page_info}): {e}")
|
||||
self.stats['failed_requests'] += 1
|
||||
self.stats["failed_requests"] += 1
|
||||
raise
|
||||
|
||||
async def fetch_all_snapshots(self, session: aiohttp.ClientSession,
|
||||
type_ids: List[int] = [15],
|
||||
date_from: str = "2021-10-18",
|
||||
date_to: str = None,
|
||||
max_pages: int = None) -> List[Dict[str, Any]]:
|
||||
async def fetch_all_snapshots(
|
||||
self,
|
||||
session: aiohttp.ClientSession,
|
||||
type_ids: List[int] = [15],
|
||||
date_from: str = "2021-10-18",
|
||||
date_to: str = None,
|
||||
max_pages: int = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch all snapshots across all pages using cursor-based pagination.
|
||||
|
||||
@@ -231,7 +244,9 @@ class SnapshotDownloader:
|
||||
cursor = None
|
||||
page_count = 0
|
||||
|
||||
self.logger.info(f"Starting snapshot fetch from {date_from} to {date_to or 'now'}")
|
||||
self.logger.info(
|
||||
f"Starting snapshot fetch from {date_from} to {date_to or 'now'}"
|
||||
)
|
||||
|
||||
while True:
|
||||
page_count += 1
|
||||
@@ -246,17 +261,19 @@ class SnapshotDownloader:
|
||||
)
|
||||
|
||||
# Extract snapshots from response
|
||||
snapshots = response.get('posts', [])
|
||||
new_cursor = response.get('cursor')
|
||||
snapshots = response.get("posts", [])
|
||||
new_cursor = response.get("cursor")
|
||||
|
||||
if not snapshots:
|
||||
self.logger.info("No more snapshots found (empty posts array)")
|
||||
break
|
||||
|
||||
all_snapshots.extend(snapshots)
|
||||
self.stats['total_snapshots'] += len(snapshots)
|
||||
self.stats["total_snapshots"] += len(snapshots)
|
||||
|
||||
self.logger.info(f"Page {page_count}: {len(snapshots)} snapshots (total: {len(all_snapshots)})")
|
||||
self.logger.info(
|
||||
f"Page {page_count}: {len(snapshots)} snapshots (total: {len(all_snapshots)})"
|
||||
)
|
||||
|
||||
# If no cursor returned, we've reached the end
|
||||
if not new_cursor:
|
||||
@@ -273,7 +290,9 @@ class SnapshotDownloader:
|
||||
self.logger.info(f"Total snapshots fetched: {len(all_snapshots)}")
|
||||
return all_snapshots
|
||||
|
||||
async def format_snapshot_html(self, snapshot: Dict[str, Any], session: aiohttp.ClientSession) -> str:
|
||||
async def format_snapshot_html(
|
||||
self, snapshot: Dict[str, Any], session: aiohttp.ClientSession
|
||||
) -> str:
|
||||
"""
|
||||
Format a single snapshot as HTML.
|
||||
|
||||
@@ -284,34 +303,46 @@ class SnapshotDownloader:
|
||||
HTML string for the snapshot
|
||||
"""
|
||||
# Extract key information from ParentZone snapshot format
|
||||
snapshot_id = snapshot.get('id', 'unknown')
|
||||
content = snapshot.get('notes', '') # Don't escape HTML in notes field
|
||||
start_time = snapshot.get('startTime', '')
|
||||
snapshot_type = snapshot.get('type', 'Snapshot')
|
||||
snapshot_id = snapshot.get("id", "unknown")
|
||||
content = snapshot.get("notes", "") # Don't escape HTML in notes field
|
||||
start_time = snapshot.get("startTime", "")
|
||||
snapshot_type = snapshot.get("type", "Snapshot")
|
||||
|
||||
# Format dates
|
||||
start_date = self.format_date(start_time) if start_time else 'Unknown'
|
||||
start_date = self.format_date(start_time) if start_time else "Unknown"
|
||||
|
||||
# Extract additional information
|
||||
author = snapshot.get('author', {})
|
||||
author_forename = author.get('forename', '') if author else ''
|
||||
author_surname = author.get('surname', '') if author else ''
|
||||
author_name = html.escape(f"{author_forename} {author_surname}".strip()) if author else 'Unknown'
|
||||
author = snapshot.get("author", {})
|
||||
author_forename = author.get("forename", "") if author else ""
|
||||
author_surname = author.get("surname", "") if author else ""
|
||||
author_name = (
|
||||
html.escape(f"{author_forename} {author_surname}".strip())
|
||||
if author
|
||||
else "Unknown"
|
||||
)
|
||||
|
||||
# Extract child information (if any)
|
||||
child = snapshot.get('child', {})
|
||||
child_forename = child.get('forename', '') if child else ''
|
||||
child_name = html.escape(f"{child.get('forename', '')} {child.get('surname', '')}".strip()) if child else ''
|
||||
child = snapshot.get("child", {})
|
||||
child_forename = child.get("forename", "") if child else ""
|
||||
child_name = (
|
||||
html.escape(
|
||||
f"{child.get('forename', '')} {child.get('surname', '')}".strip()
|
||||
)
|
||||
if child
|
||||
else ""
|
||||
)
|
||||
|
||||
# Create title in format: "Child Forename by Author Forename Surname"
|
||||
if child_forename and author_forename:
|
||||
title = html.escape(f"{child_forename} by {author_forename} {author_surname}".strip())
|
||||
title = html.escape(
|
||||
f"{child_forename} by {author_forename} {author_surname}".strip()
|
||||
)
|
||||
else:
|
||||
title = html.escape(f"Snapshot {snapshot_id}")
|
||||
|
||||
# Extract location/activity information
|
||||
activity = snapshot.get('activity', {})
|
||||
activity_name = html.escape(activity.get('name', '')) if activity else ''
|
||||
activity = snapshot.get("activity", {})
|
||||
activity_name = html.escape(activity.get("name", "")) if activity else ""
|
||||
|
||||
# Build HTML
|
||||
html_content = f"""
|
||||
@@ -327,12 +358,12 @@ class SnapshotDownloader:
|
||||
</div>
|
||||
|
||||
<div class="snapshot-content">
|
||||
{f'<div class="snapshot-author">👤 Author: {author_name}</div>' if author_name != 'Unknown' else ''}
|
||||
{f'<div class="snapshot-child">👶 Child: {child_name}</div>' if child_name else ''}
|
||||
{f'<div class="snapshot-activity">🎯 Activity: {activity_name}</div>' if activity_name else ''}
|
||||
{f'<div class="snapshot-author">👤 Author: {author_name}</div>' if author_name != "Unknown" else ""}
|
||||
{f'<div class="snapshot-child">👶 Child: {child_name}</div>' if child_name else ""}
|
||||
{f'<div class="snapshot-activity">🎯 Activity: {activity_name}</div>' if activity_name else ""}
|
||||
|
||||
<div class="snapshot-description">
|
||||
<div class="notes-content">{content if content else '<em>No description provided</em>'}</div>
|
||||
<div class="notes-content">{content if content else "<em>No description provided</em>"}</div>
|
||||
</div>
|
||||
|
||||
{await self.format_snapshot_media(snapshot, session)}
|
||||
@@ -343,112 +374,130 @@ class SnapshotDownloader:
|
||||
|
||||
return html_content.strip()
|
||||
|
||||
async def format_snapshot_media(self, snapshot: Dict[str, Any], session: aiohttp.ClientSession) -> str:
|
||||
async def format_snapshot_media(
|
||||
self, snapshot: Dict[str, Any], session: aiohttp.ClientSession
|
||||
) -> str:
|
||||
"""Format media attachments for a snapshot."""
|
||||
media_html = ""
|
||||
|
||||
# Check for media (images and other files)
|
||||
media = snapshot.get('media', [])
|
||||
images = [m for m in media if m.get('type') == 'image']
|
||||
media = snapshot.get("media", [])
|
||||
images = [m for m in media if m.get("type") == "image"]
|
||||
if images:
|
||||
media_html += '<div class="snapshot-images">\n'
|
||||
media_html += '<h4>📸 Images:</h4>\n'
|
||||
media_html += "<h4>📸 Images:</h4>\n"
|
||||
media_html += '<div class="image-grid">\n'
|
||||
|
||||
for image in images:
|
||||
# Download the image file
|
||||
local_path = await self.download_media_file(session, image)
|
||||
image_name = html.escape(image.get('fileName', 'Image'))
|
||||
image_name = html.escape(image.get("fileName", "Image"))
|
||||
|
||||
if local_path:
|
||||
media_html += f'<div class="image-item">\n'
|
||||
media_html += f' <img src="{local_path}" alt="{image_name}" loading="lazy">\n'
|
||||
media_html += f' <p class="image-caption">{image_name}</p>\n'
|
||||
media_html += f' <p class="image-meta">Updated: {self.format_date(image.get("updated", ""))}</p>\n'
|
||||
media_html += f'</div>\n'
|
||||
media_html += f"</div>\n"
|
||||
else:
|
||||
# Fallback to API URL if download failed
|
||||
image_url = f"{self.api_url}/v1/media/{image.get('id')}/full" if image.get('id') else ''
|
||||
image_url = (
|
||||
f"{self.api_url}/v1/media/{image.get('id')}/full"
|
||||
if image.get("id")
|
||||
else ""
|
||||
)
|
||||
if image_url:
|
||||
media_html += f'<div class="image-item">\n'
|
||||
media_html += f' <img src="{image_url}" alt="{image_name}" loading="lazy">\n'
|
||||
media_html += f' <p class="image-caption">{image_name} (online)</p>\n'
|
||||
media_html += (
|
||||
f' <p class="image-caption">{image_name} (online)</p>\n'
|
||||
)
|
||||
media_html += f' <p class="image-meta">Updated: {self.format_date(image.get("updated", ""))}</p>\n'
|
||||
media_html += f'</div>\n'
|
||||
media_html += f"</div>\n"
|
||||
|
||||
media_html += '</div>\n</div>\n'
|
||||
media_html += "</div>\n</div>\n"
|
||||
|
||||
# Check for non-image media as attachments
|
||||
attachments = [m for m in media if m.get('type') != 'image']
|
||||
attachments = [m for m in media if m.get("type") != "image"]
|
||||
if attachments:
|
||||
media_html += '<div class="snapshot-attachments">\n'
|
||||
media_html += '<h4>📎 Attachments:</h4>\n'
|
||||
media_html += "<h4>📎 Attachments:</h4>\n"
|
||||
media_html += '<ul class="attachment-list">\n'
|
||||
|
||||
for attachment in attachments:
|
||||
# Download the attachment file
|
||||
local_path = await self.download_media_file(session, attachment)
|
||||
attachment_name = html.escape(attachment.get('fileName', 'Attachment'))
|
||||
attachment_type = attachment.get('mimeType', 'unknown')
|
||||
attachment_name = html.escape(attachment.get("fileName", "Attachment"))
|
||||
attachment_type = attachment.get("mimeType", "unknown")
|
||||
|
||||
if local_path:
|
||||
media_html += f' <li><a href="{local_path}" target="_blank">{attachment_name} ({attachment_type})</a></li>\n'
|
||||
else:
|
||||
# Fallback to API URL if download failed
|
||||
attachment_url = f"{self.api_url}/v1/media/{attachment.get('id')}/full" if attachment.get('id') else ''
|
||||
attachment_url = (
|
||||
f"{self.api_url}/v1/media/{attachment.get('id')}/full"
|
||||
if attachment.get("id")
|
||||
else ""
|
||||
)
|
||||
if attachment_url:
|
||||
media_html += f' <li><a href="{attachment_url}" target="_blank">{attachment_name} ({attachment_type}) - online</a></li>\n'
|
||||
else:
|
||||
media_html += f' <li>{attachment_name} ({attachment_type})</li>\n'
|
||||
media_html += (
|
||||
f" <li>{attachment_name} ({attachment_type})</li>\n"
|
||||
)
|
||||
|
||||
media_html += '</ul>\n</div>\n'
|
||||
media_html += "</ul>\n</div>\n"
|
||||
|
||||
return media_html
|
||||
|
||||
def format_snapshot_metadata(self, snapshot: Dict[str, Any]) -> str:
|
||||
"""Format additional metadata for a snapshot."""
|
||||
metadata_html = '<div class="snapshot-metadata">\n'
|
||||
metadata_html += '<h4>ℹ️ Additional Information:</h4>\n'
|
||||
metadata_html += "<h4>ℹ️ Additional Information:</h4>\n"
|
||||
metadata_html += '<div class="metadata-grid">\n'
|
||||
|
||||
# Add any additional fields that might be interesting
|
||||
metadata_fields = [
|
||||
('code', 'Code'),
|
||||
('frameworkIndicatorCount', 'Framework Indicators'),
|
||||
('signed', 'Signed Status'),
|
||||
('type', 'Type')
|
||||
("code", "Code"),
|
||||
("frameworkIndicatorCount", "Framework Indicators"),
|
||||
("signed", "Signed Status"),
|
||||
("type", "Type"),
|
||||
]
|
||||
|
||||
for field, label in metadata_fields:
|
||||
value = snapshot.get(field)
|
||||
if value:
|
||||
if isinstance(value, list):
|
||||
value = ', '.join(str(v) for v in value)
|
||||
value = ", ".join(str(v) for v in value)
|
||||
metadata_html += f'<div class="metadata-item">\n'
|
||||
metadata_html += f' <strong>{label}:</strong> {html.escape(str(value))}\n'
|
||||
metadata_html += f'</div>\n'
|
||||
metadata_html += (
|
||||
f" <strong>{label}:</strong> {html.escape(str(value))}\n"
|
||||
)
|
||||
metadata_html += f"</div>\n"
|
||||
|
||||
# Raw JSON data (collapsed by default)
|
||||
metadata_html += '<details class="raw-data">\n'
|
||||
metadata_html += '<summary>🔍 Raw JSON Data</summary>\n'
|
||||
metadata_html += "<summary>🔍 Raw JSON Data</summary>\n"
|
||||
metadata_html += '<pre class="json-data">\n'
|
||||
metadata_html += html.escape(json.dumps(snapshot, indent=2, default=str))
|
||||
metadata_html += '\n</pre>\n'
|
||||
metadata_html += '</details>\n'
|
||||
metadata_html += "\n</pre>\n"
|
||||
metadata_html += "</details>\n"
|
||||
|
||||
metadata_html += '</div>\n</div>\n'
|
||||
metadata_html += "</div>\n</div>\n"
|
||||
return metadata_html
|
||||
|
||||
def format_date(self, date_string: str) -> str:
|
||||
"""Format a date string for display."""
|
||||
try:
|
||||
# Try to parse ISO format date
|
||||
dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))
|
||||
dt = datetime.fromisoformat(date_string.replace("Z", "+00:00"))
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except:
|
||||
return date_string
|
||||
|
||||
async def download_media_file(self, session: aiohttp.ClientSession, media: Dict[str, Any]) -> Optional[str]:
|
||||
async def download_media_file(
|
||||
self, session: aiohttp.ClientSession, media: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Download a media file to the assets folder.
|
||||
|
||||
@@ -459,11 +508,11 @@ class SnapshotDownloader:
|
||||
Returns:
|
||||
Relative path to downloaded file, or None if download failed
|
||||
"""
|
||||
media_id = media.get('id')
|
||||
media_id = media.get("id")
|
||||
if not media_id:
|
||||
return None
|
||||
|
||||
filename = media.get('fileName', f'media_{media_id}')
|
||||
filename = media.get("fileName", f"media_{media_id}")
|
||||
# Sanitize filename
|
||||
filename = self._sanitize_filename(filename)
|
||||
|
||||
@@ -480,11 +529,13 @@ class SnapshotDownloader:
|
||||
self.logger.info(f"Downloading media file: {filename}")
|
||||
|
||||
headers = self.get_auth_headers()
|
||||
async with session.get(download_url, headers=headers, timeout=30) as response:
|
||||
async with session.get(
|
||||
download_url, headers=headers, timeout=30
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
# Download the file
|
||||
async with aiofiles.open(filepath, 'wb') as f:
|
||||
async with aiofiles.open(filepath, "wb") as f:
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
await f.write(chunk)
|
||||
|
||||
@@ -502,19 +553,20 @@ class SnapshotDownloader:
|
||||
# Remove or replace invalid characters
|
||||
invalid_chars = '<>:"/\\|?*'
|
||||
for char in invalid_chars:
|
||||
filename = filename.replace(char, '_')
|
||||
filename = filename.replace(char, "_")
|
||||
|
||||
# Remove leading/trailing spaces and dots
|
||||
filename = filename.strip('. ')
|
||||
filename = filename.strip(". ")
|
||||
|
||||
# Ensure filename is not empty
|
||||
if not filename:
|
||||
filename = 'media_file'
|
||||
filename = "media_file"
|
||||
|
||||
return filename
|
||||
|
||||
async def generate_html_file(self, snapshots: List[Dict[str, Any]],
|
||||
date_from: str, date_to: str) -> Path:
|
||||
async def generate_html_file(
|
||||
self, snapshots: List[Dict[str, Any]], date_from: str, date_to: str
|
||||
) -> Path:
|
||||
"""
|
||||
Generate an HTML file containing all snapshots.
|
||||
|
||||
@@ -528,9 +580,7 @@ class SnapshotDownloader:
|
||||
"""
|
||||
# Sort snapshots by start time (newest first)
|
||||
sorted_snapshots = sorted(
|
||||
snapshots,
|
||||
key=lambda x: x.get('startTime', ''),
|
||||
reverse=True
|
||||
snapshots, key=lambda x: x.get("startTime", ""), reverse=True
|
||||
)
|
||||
|
||||
# Generate filename
|
||||
@@ -538,19 +588,22 @@ class SnapshotDownloader:
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Generate HTML content
|
||||
html_content = await self.generate_html_template(sorted_snapshots, date_from, date_to)
|
||||
html_content = await self.generate_html_template(
|
||||
sorted_snapshots, date_from, date_to
|
||||
)
|
||||
|
||||
# Write to file
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
|
||||
self.logger.info(f"Generated HTML file: {filepath}")
|
||||
self.stats['generated_files'] += 1
|
||||
self.stats["generated_files"] += 1
|
||||
|
||||
return filepath
|
||||
|
||||
async def generate_html_template(self, snapshots: List[Dict[str, Any]],
|
||||
date_from: str, date_to: str) -> str:
|
||||
async def generate_html_template(
|
||||
self, snapshots: List[Dict[str, Any]], date_from: str, date_to: str
|
||||
) -> str:
|
||||
"""Generate the complete HTML template."""
|
||||
|
||||
# Generate individual snapshot HTML
|
||||
@@ -560,7 +613,9 @@ class SnapshotDownloader:
|
||||
connector = aiohttp.TCPConnector(limit=100, limit_per_host=30)
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
connector=connector, timeout=timeout
|
||||
) as session:
|
||||
# Authenticate session for media downloads
|
||||
await self.authenticate()
|
||||
|
||||
@@ -604,7 +659,7 @@ class SnapshotDownloader:
|
||||
|
||||
<footer class="page-footer">
|
||||
<p>Generated by ParentZone Snapshot Downloader</p>
|
||||
<p>Total snapshots: {len(snapshots)} | Pages fetched: {self.stats['pages_fetched']}</p>
|
||||
<p>Total snapshots: {len(snapshots)} | Pages fetched: {self.stats["pages_fetched"]}</p>
|
||||
</footer>
|
||||
</div>
|
||||
|
||||
@@ -991,9 +1046,13 @@ class SnapshotDownloader:
|
||||
});
|
||||
"""
|
||||
|
||||
async def download_snapshots(self, type_ids: List[int] = [15],
|
||||
date_from: str = None, date_to: str = None,
|
||||
max_pages: int = None) -> Path:
|
||||
async def download_snapshots(
|
||||
self,
|
||||
type_ids: List[int] = [15],
|
||||
date_from: str = None,
|
||||
date_to: str = None,
|
||||
max_pages: int = None,
|
||||
) -> Path:
|
||||
"""
|
||||
Download all snapshots and generate HTML file.
|
||||
|
||||
@@ -1013,13 +1072,17 @@ class SnapshotDownloader:
|
||||
if date_to is None:
|
||||
date_to = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
self.logger.info(f"Starting snapshot download for period {date_from} to {date_to}")
|
||||
self.logger.info(
|
||||
f"Starting snapshot download for period {date_from} to {date_to}"
|
||||
)
|
||||
|
||||
# Create aiohttp session
|
||||
connector = aiohttp.TCPConnector(limit=100, limit_per_host=30)
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
||||
async with aiohttp.ClientSession(
|
||||
connector=connector, timeout=timeout
|
||||
) as session:
|
||||
try:
|
||||
# Authenticate if needed
|
||||
await self.authenticate()
|
||||
@@ -1077,64 +1140,53 @@ Examples:
|
||||
|
||||
# Specify output directory
|
||||
python3 snapshot_downloader.py --api-key KEY --output-dir ./my_snapshots
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--api-key", help="API key for authentication")
|
||||
|
||||
parser.add_argument("--email", help="Email for login authentication")
|
||||
|
||||
parser.add_argument("--password", help="Password for login authentication")
|
||||
|
||||
parser.add_argument(
|
||||
"--date-from", help="Start date in YYYY-MM-DD format (default: 1 year ago)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--api-key',
|
||||
help='API key for authentication'
|
||||
"--date-to", help="End date in YYYY-MM-DD format (default: today)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--email',
|
||||
help='Email for login authentication'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--password',
|
||||
help='Password for login authentication'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--date-from',
|
||||
help='Start date in YYYY-MM-DD format (default: 1 year ago)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--date-to',
|
||||
help='End date in YYYY-MM-DD format (default: today)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--type-ids',
|
||||
nargs='+',
|
||||
"--type-ids",
|
||||
nargs="+",
|
||||
type=int,
|
||||
default=[15],
|
||||
help='Type IDs to filter by (default: [15])'
|
||||
help="Type IDs to filter by (default: [15])",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
default='snapshots',
|
||||
help='Directory to save snapshot files (default: snapshots)'
|
||||
"--output-dir",
|
||||
default="snapshots",
|
||||
help="Directory to save snapshot files (default: snapshots)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--max-pages',
|
||||
"--max-pages",
|
||||
type=int,
|
||||
help='Maximum number of cursor pages to fetch (for testing)'
|
||||
help="Maximum number of cursor pages to fetch (for testing)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--api-url',
|
||||
default='https://api.parentzone.me',
|
||||
help='ParentZone API URL (default: https://api.parentzone.me)'
|
||||
"--api-url",
|
||||
default="https://api.parentzone.me",
|
||||
help="ParentZone API URL (default: https://api.parentzone.me)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
action='store_true',
|
||||
help='Enable debug mode with detailed server response logging'
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Enable debug mode with detailed server response logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -1160,19 +1212,21 @@ Examples:
|
||||
api_key=args.api_key,
|
||||
email=args.email,
|
||||
password=args.password,
|
||||
debug_mode=args.debug
|
||||
debug_mode=args.debug,
|
||||
)
|
||||
|
||||
if args.debug:
|
||||
print("🔍 DEBUG MODE ENABLED - Detailed server responses will be printed")
|
||||
|
||||
# Download snapshots
|
||||
html_file = asyncio.run(downloader.download_snapshots(
|
||||
type_ids=args.type_ids,
|
||||
date_from=args.date_from,
|
||||
date_to=args.date_to,
|
||||
max_pages=args.max_pages
|
||||
))
|
||||
html_file = asyncio.run(
|
||||
downloader.download_snapshots(
|
||||
type_ids=args.type_ids,
|
||||
date_from=args.date_from,
|
||||
date_to=args.date_to,
|
||||
max_pages=args.max_pages,
|
||||
)
|
||||
)
|
||||
|
||||
if html_file:
|
||||
print(f"\n✅ Success! Snapshots downloaded and saved to: {html_file}")
|
||||