parentzone_downloader/test_snapshot_downloader.py

#!/usr/bin/env python3
"""
Test Snapshot Downloader Functionality

This script tests the snapshot downloader to ensure it properly fetches
snapshots with pagination and generates HTML reports correctly.
"""

import asyncio
import json
import logging
import sys
import tempfile
from datetime import datetime, timedelta
from pathlib import Path
import os

# Add the current directory to the path so we can import modules
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from snapshot_downloader import SnapshotDownloader
from config_snapshot_downloader import ConfigSnapshotDownloader


class SnapshotDownloaderTester:
    """Test class for snapshot downloader functionality."""

    def __init__(self):
        """Initialize the tester."""
        self.logger = logging.getLogger(__name__)

        # Test credentials
        self.email = "tudor.sitaru@gmail.com"
        self.password = "mTVq8uNUvY7R39EPGVAm@"
        self.api_key = "95c74983-5d8f-4cf2-a216-3aa4416344ea"

    def create_test_config(self, output_dir: str, **kwargs) -> dict:
        """Create a test configuration."""
        config = {
            "api_url": "https://api.parentzone.me",
            "output_dir": output_dir,
            "type_ids": [15],
            "date_from": "2024-01-01",
            "date_to": "2024-01-31",  # Small range for testing
            "max_pages": 2,  # Limit for testing
            "email": self.email,
            "password": self.password
        }
        config.update(kwargs)
        return config

    def test_initialization(self):
        """Test that SnapshotDownloader initializes correctly."""
        print("=" * 60)
        print("TEST 1: Initialization")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            print("1. Testing basic initialization...")

            downloader = SnapshotDownloader(
                output_dir=temp_dir,
                email=self.email,
                password=self.password
            )

            # Check initialization
            if downloader.output_dir == Path(temp_dir):
                print("   ✅ Output directory set correctly")
            else:
                print("   ❌ Output directory not set correctly")
                return False

            if downloader.email == self.email:
                print("   ✅ Email set correctly")
            else:
                print("   ❌ Email not set correctly")
                return False

            if downloader.stats['total_snapshots'] == 0:
                print("   ✅ Statistics initialized correctly")
            else:
                print("   ❌ Statistics not initialized correctly")
                return False

            print("\n2. Testing with API key...")
            downloader_api = SnapshotDownloader(
                output_dir=temp_dir,
                api_key=self.api_key
            )

            if downloader_api.api_key == self.api_key:
                print("   ✅ API key set correctly")
            else:
                print("   ❌ API key not set correctly")
                return False

        print("\n✅ Initialization test passed!")
        return True

    def test_authentication_headers(self):
        """Test that authentication headers are set properly."""
        print("\n" + "=" * 60)
        print("TEST 2: Authentication Headers")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            print("1. Testing API key headers...")

            downloader = SnapshotDownloader(
                output_dir=temp_dir,
                api_key=self.api_key
            )

            headers = downloader.get_auth_headers()
            if 'x-api-key' in headers and headers['x-api-key'] == self.api_key:
                print("   ✅ API key header set correctly")
            else:
                print("   ❌ API key header not set correctly")
                return False

            print("\n2. Testing standard headers...")
            expected_headers = [
                'accept', 'accept-language', 'origin', 'user-agent',
                'sec-fetch-dest', 'sec-fetch-mode', 'sec-fetch-site'
            ]

            for header in expected_headers:
                if header in headers:
                    print(f"   ✅ {header} header present")
                else:
                    print(f"   ❌ {header} header missing")
                    return False

        print("\n✅ Authentication headers test passed!")
        return True

    async def test_authentication_flow(self):
        """Test the authentication flow."""
        print("\n" + "=" * 60)
        print("TEST 3: Authentication Flow")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            print("1. Testing login authentication...")

            downloader = SnapshotDownloader(
                output_dir=temp_dir,
                email=self.email,
                password=self.password
            )

            try:
                await downloader.authenticate()

                if downloader.auth_manager and downloader.auth_manager.is_authenticated():
                    print("   ✅ Login authentication successful")

                    # Check if API key was obtained
                    headers = downloader.get_auth_headers()
                    if 'x-api-key' in headers:
                        print("   ✅ API key obtained from authentication")
                        obtained_key = headers['x-api-key']
                        if obtained_key:
                            print(f"   ✅ API key: {obtained_key[:20]}...")
                    else:
                        print("   ❌ API key not obtained from authentication")
                        return False
                else:
                    print("   ❌ Login authentication failed")
                    return False

            except Exception as e:
                print(f"   ❌ Authentication error: {e}")
                return False

        print("\n✅ Authentication flow test passed!")
        return True

    async def test_url_building(self):
        """Test URL building for API requests."""
        print("\n" + "=" * 60)
        print("TEST 4: URL Building")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Testing basic URL construction...")

            # Mock session for URL building test
            class MockSession:
                def __init__(self):
                    self.last_url = None
                    self.last_headers = None

                async def get(self, url, headers=None, timeout=None):
                    self.last_url = url
                    self.last_headers = headers
                    # Return mock async context manager
                    return MockAsyncContext()

                async def __aenter__(self):
                    return self

                async def __aexit__(self, *args):
                    pass

            class MockAsyncContext:
                async def __aenter__(self):
                    raise Exception("Mock response - URL captured")

                async def __aexit__(self, *args):
                    pass

            mock_session = MockSession()

            try:
                await downloader.fetch_snapshots_page(
                    mock_session,
                    type_ids=[15],
                    date_from="2024-01-01",
                    date_to="2024-01-31",
                    page=1,
                    per_page=100
                )
            except Exception as e:
                # Expected - we just want to capture the URL
                if "Mock response" in str(e):
                    url = mock_session.last_url
                    print(f"   Generated URL: {url}")

                    # Check URL components
                    if "https://api.parentzone.me/v1/posts" in url:
                        print("   ✅ Base URL correct")
                    else:
                        print("   ❌ Base URL incorrect")
                        return False

                    if "typeIDs%5B%5D=15" in url or "typeIDs[]=15" in url:
                        print("   ✅ Type ID parameter correct")
                    else:
                        print("   ❌ Type ID parameter incorrect")
                        return False

                    if "dateFrom=2024-01-01" in url:
                        print("   ✅ Date from parameter correct")
                    else:
                        print("   ❌ Date from parameter incorrect")
                        return False

                    if "dateTo=2024-01-31" in url:
                        print("   ✅ Date to parameter correct")
                    else:
                        print("   ❌ Date to parameter incorrect")
                        return False

                    if "page=1" in url:
                        print("   ✅ Page parameter correct")
                    else:
                        print("   ❌ Page parameter incorrect")
                        return False
                else:
                    print(f"   ❌ Unexpected error: {e}")
                    return False

        print("\n✅ URL building test passed!")
        return True

    def test_html_formatting(self):
        """Test HTML formatting functions."""
        print("\n" + "=" * 60)
        print("TEST 5: HTML Formatting")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Testing snapshot HTML formatting...")

            # Create mock snapshot data
            mock_snapshot = {
                "id": "test_snapshot_123",
                "title": "Test Snapshot <script>alert('xss')</script>",
                "content": "This is a test snapshot with some content & special characters",
                "created_at": "2024-01-15T10:30:00Z",
                "updated_at": "2024-01-15T10:30:00Z",
                "author": {
                    "name": "Test Author"
                },
                "child": {
                    "name": "Test Child"
                },
                "activity": {
                    "name": "Test Activity"
                },
                "images": [
                    {
                        "url": "https://example.com/image1.jpg",
                        "name": "Test Image"
                    }
                ]
            }

            html = downloader.format_snapshot_html(mock_snapshot)

            # Check basic structure
            if '<div class="snapshot"' in html:
                print("   ✅ Snapshot container created")
            else:
                print("   ❌ Snapshot container missing")
                return False

            # Check HTML escaping - should have escaped script tags and quotes
            if "&lt;script&gt;" in html and "&quot;xss&quot;" in html:
                print("   ✅ HTML properly escaped")
            else:
                print("   ❌ HTML escaping failed")
                return False

            # Check content inclusion
            if "Test Snapshot" in html:
                print("   ✅ Title included")
            else:
                print("   ❌ Title missing")
                return False

            if "Test Author" in html:
                print("   ✅ Author included")
            else:
                print("   ❌ Author missing")
                return False

            if "Test Child" in html:
                print("   ✅ Child included")
            else:
                print("   ❌ Child missing")
                return False

            print("\n2. Testing complete HTML file generation...")

            mock_snapshots = [mock_snapshot]
            html_file = downloader.generate_html_file(
                mock_snapshots, "2024-01-01", "2024-01-31"
            )

            if html_file.exists():
                print("   ✅ HTML file created")

                # Check file content
                with open(html_file, 'r', encoding='utf-8') as f:
                    content = f.read()

                if "<!DOCTYPE html>" in content:
                    print("   ✅ Valid HTML document")
                else:
                    print("   ❌ Invalid HTML document")
                    return False

                if "ParentZone Snapshots" in content:
                    print("   ✅ Title included")
                else:
                    print("   ❌ Title missing")
                    return False

                if "Test Snapshot" in content:
                    print("   ✅ Snapshot content included")
                else:
                    print("   ❌ Snapshot content missing")
                    return False

            else:
                print("   ❌ HTML file not created")
                return False

        print("\n✅ HTML formatting test passed!")
        return True

    def test_config_downloader(self):
        """Test the configuration-based downloader."""
        print("\n" + "=" * 60)
        print("TEST 6: Config Downloader")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            print("1. Testing configuration loading...")

            # Create test config file
            config_data = self.create_test_config(temp_dir)
            config_file = Path(temp_dir) / "test_config.json"

            with open(config_file, 'w') as f:
                json.dump(config_data, f, indent=2)

            # Test config loading
            try:
                config_downloader = ConfigSnapshotDownloader(str(config_file))
                print("   ✅ Configuration loaded successfully")

                # Check if underlying downloader was created
                if hasattr(config_downloader, 'downloader'):
                    print("   ✅ Underlying downloader created")
                else:
                    print("   ❌ Underlying downloader not created")
                    return False

            except Exception as e:
                print(f"   ❌ Configuration loading failed: {e}")
                return False

            print("\n2. Testing invalid configuration...")

            # Test invalid config (missing auth)
            invalid_config = config_data.copy()
            del invalid_config['email']
            del invalid_config['password']
            # Don't set api_key either

            invalid_config_file = Path(temp_dir) / "invalid_config.json"
            with open(invalid_config_file, 'w') as f:
                json.dump(invalid_config, f, indent=2)

            try:
                ConfigSnapshotDownloader(str(invalid_config_file))
                print("   ❌ Should have failed with invalid config")
                return False
            except ValueError:
                print("   ✅ Correctly rejected invalid configuration")
            except Exception as e:
                print(f"   ❌ Unexpected error: {e}")
                return False

        print("\n✅ Config downloader test passed!")
        return True

    def test_date_formatting(self):
        """Test date formatting functionality."""
        print("\n" + "=" * 60)
        print("TEST 7: Date Formatting")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Testing various date formats...")

            test_dates = [
                ("2024-01-15T10:30:00Z", "2024-01-15 10:30:00"),
                ("2024-01-15T10:30:00.123Z", "2024-01-15 10:30:00"),
                ("2024-01-15T10:30:00+00:00", "2024-01-15 10:30:00"),
                ("invalid-date", "invalid-date"),  # Should pass through unchanged
                ("", "")  # Should handle empty string
            ]

            for input_date, expected_prefix in test_dates:
                formatted = downloader.format_date(input_date)
                print(f"   Input: '{input_date}' → Output: '{formatted}'")

                if expected_prefix in formatted or input_date == formatted:
                    print(f"   ✅ Date formatted correctly")
                else:
                    print(f"   ❌ Date formatting failed")
                    return False

        print("\n✅ Date formatting test passed!")
        return True

    async def test_pagination_logic(self):
        """Test pagination handling logic."""
        print("\n" + "=" * 60)
        print("TEST 8: Pagination Logic")
        print("=" * 60)

        print("1. Testing pagination parameters...")

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            # Mock session to test pagination
            class PaginationMockSession:
                def __init__(self):
                    self.call_count = 0
                    self.pages = [
                        # Page 1
                        {
                            "data": [{"id": "snap1"}, {"id": "snap2"}],
                            "pagination": {"current_page": 1, "last_page": 3}
                        },
                        # Page 2
                        {
                            "data": [{"id": "snap3"}, {"id": "snap4"}],
                            "pagination": {"current_page": 2, "last_page": 3}
                        },
                        # Page 3
                        {
                            "data": [{"id": "snap5"}],
                            "pagination": {"current_page": 3, "last_page": 3}
                        }
                    ]

                async def get(self, url, headers=None, timeout=None):
                    return MockResponse(self.pages[self.call_count])

                async def __aenter__(self):
                    return self

                async def __aexit__(self, *args):
                    pass

            class MockResponse:
                def __init__(self, data):
                    self.data = data
                    self.status = 200

                def raise_for_status(self):
                    pass

                async def json(self):
                    return self.data

            mock_session = PaginationMockSession()

            # Override the fetch_snapshots_page method to use our mock
            original_method = downloader.fetch_snapshots_page

            async def mock_fetch_page(session, type_ids, date_from, date_to, page, per_page):
                response_data = mock_session.pages[page - 1]
                mock_session.call_count += 1
                downloader.stats['pages_fetched'] += 1
                return response_data

            downloader.fetch_snapshots_page = mock_fetch_page

            try:
                # Test fetching all pages
                snapshots = await downloader.fetch_all_snapshots(
                    mock_session, [15], "2024-01-01", "2024-01-31"
                )

                if len(snapshots) == 5:  # Total snapshots across all pages
                    print("   ✅ All pages fetched correctly")
                else:
                    print(f"   ❌ Expected 5 snapshots, got {len(snapshots)}")
                    return False

                if downloader.stats['pages_fetched'] == 3:
                    print("   ✅ Page count tracked correctly")
                else:
                    print(f"   ❌ Expected 3 pages, tracked {downloader.stats['pages_fetched']}")
                    return False

                # Test max_pages limit
                downloader.stats['pages_fetched'] = 0  # Reset
                mock_session.call_count = 0  # Reset

                snapshots_limited = await downloader.fetch_all_snapshots(
                    mock_session, [15], "2024-01-01", "2024-01-31", max_pages=2
                )

                if len(snapshots_limited) == 4:  # First 2 pages only
                    print("   ✅ Max pages limit respected")
                else:
                    print(f"   ❌ Expected 4 snapshots with limit, got {len(snapshots_limited)}")
                    return False

            except Exception as e:
                print(f"   ❌ Pagination test error: {e}")
                return False

        print("\n✅ Pagination logic test passed!")
        return True

    async def run_all_tests(self):
        """Run all tests."""
        print("🚀 Starting Snapshot Downloader Tests")
        print("=" * 80)

        try:
            success = True

            success &= self.test_initialization()
            success &= self.test_authentication_headers()
            success &= await self.test_authentication_flow()
            success &= await self.test_url_building()
            success &= self.test_html_formatting()
            success &= self.test_config_downloader()
            success &= self.test_date_formatting()
            success &= await self.test_pagination_logic()

            if success:
                print("\n" + "=" * 80)
                print("🎉 ALL SNAPSHOT DOWNLOADER TESTS PASSED!")
                print("=" * 80)
                print("✅ Snapshot downloader is working correctly")
                print("✅ Pagination handling is implemented properly")
                print("✅ HTML generation creates proper markup files")
                print("✅ Authentication works with both API key and login")
                print("✅ Configuration-based downloader is functional")
            else:
                print("\n❌ SOME TESTS FAILED")

            return success

        except Exception as e:
            print(f"\n❌ TEST SUITE FAILED: {e}")
            import traceback
            traceback.print_exc()
            return False


def show_usage_examples():
    """Show usage examples for the snapshot downloader."""
    print("\n" + "=" * 80)
    print("📋 SNAPSHOT DOWNLOADER USAGE EXAMPLES")
    print("=" * 80)

    print("\n💻 Command Line Usage:")
    print("# Download snapshots with API key")
    print("python3 snapshot_downloader.py --api-key YOUR_API_KEY")
    print()
    print("# Download with login credentials")
    print("python3 snapshot_downloader.py --email user@example.com --password password")
    print()
    print("# Specify date range")
    print("python3 snapshot_downloader.py --api-key KEY --date-from 2024-01-01 --date-to 2024-12-31")
    print()
    print("# Limit pages for testing")
    print("python3 snapshot_downloader.py --api-key KEY --max-pages 5")

    print("\n🔧 Configuration File Usage:")
    print("# Create example config")
    print("python3 config_snapshot_downloader.py --create-example")
    print()
    print("# Use config file")
    print("python3 config_snapshot_downloader.py --config snapshot_config.json")
    print()
    print("# Show config summary")
    print("python3 config_snapshot_downloader.py --config snapshot_config.json --show-config")

    print("\n📄 Features:")
    print("• Downloads all snapshots with pagination support")
    print("• Generates interactive HTML reports")
    print("• Includes search and filtering capabilities")
    print("• Supports both API key and login authentication")
    print("• Configurable date ranges and type filters")
    print("• Mobile-responsive design")
    print("• Collapsible sections for detailed metadata")

    print("\n🎯 Output:")
    print("• HTML file with all snapshots in chronological order")
    print("• Embedded images and attachments (if available)")
    print("• Raw JSON data for each snapshot (expandable)")
    print("• Search functionality to find specific snapshots")
    print("• Statistics and summary information")


def main():
    """Main test function."""
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    tester = SnapshotDownloaderTester()

    # Run tests
    success = asyncio.run(tester.run_all_tests())

    # Show usage examples
    if success:
        show_usage_examples()

    return 0 if success else 1


if __name__ == "__main__":
    exit(main())