parentzone_downloader/test_media_download.py

#!/usr/bin/env python3
"""
Test Media Download Functionality

This script tests that media files (images and attachments) are properly downloaded
to the assets subfolder and referenced correctly in the HTML output.
"""

import asyncio
import json
import logging
import sys
import tempfile
from pathlib import Path
import os
from unittest.mock import AsyncMock, MagicMock

# Add the current directory to the path so we can import modules
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from snapshot_downloader import SnapshotDownloader


class MediaDownloadTester:
    """Test class for media download functionality."""

    def __init__(self):
        """Initialize the tester."""
        self.logger = logging.getLogger(__name__)

    def test_assets_folder_creation(self):
        """Test that assets subfolder is created correctly."""
        print("=" * 60)
        print("TEST: Assets Folder Creation")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            print("1. Testing assets folder creation...")

            downloader = SnapshotDownloader(output_dir=temp_dir)

            # Check if assets folder was created
            assets_dir = Path(temp_dir) / "assets"
            if assets_dir.exists() and assets_dir.is_dir():
                print("   ✅ Assets folder created successfully")
            else:
                print("   ❌ Assets folder not created")
                return False

            # Check if it's accessible
            if downloader.assets_dir == assets_dir:
                print("   ✅ Assets directory property set correctly")
            else:
                print("   ❌ Assets directory property incorrect")
                return False

        print("\n✅ Assets folder creation test passed!")
        return True

    def test_filename_sanitization(self):
        """Test filename sanitization functionality."""
        print("\n" + "=" * 60)
        print("TEST: Filename Sanitization")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            test_cases = [
                {
                    "input": "normal_filename.jpg",
                    "expected": "normal_filename.jpg",
                    "description": "Normal filename"
                },
                {
                    "input": "file<with>invalid:chars.png",
                    "expected": "file_with_invalid_chars.png",
                    "description": "Invalid characters"
                },
                {
                    "input": "  .leading_trailing_spaces.  ",
                    "expected": "leading_trailing_spaces",
                    "description": "Leading/trailing spaces and dots"
                },
                {
                    "input": "",
                    "expected": "media_file",
                    "description": "Empty filename"
                },
                {
                    "input": "file/with\\path|chars?.txt",
                    "expected": "file_with_path_chars_.txt",
                    "description": "Path characters"
                }
            ]

            print("1. Testing filename sanitization cases...")
            for i, test_case in enumerate(test_cases, 1):
                print(f"\n{i}. {test_case['description']}")
                print(f"   Input: '{test_case['input']}'")

                result = downloader._sanitize_filename(test_case['input'])
                print(f"   Output: '{result}'")

                if result == test_case['expected']:
                    print("   ✅ Correctly sanitized")
                else:
                    print(f"   ❌ Expected: '{test_case['expected']}'")
                    return False

        print("\n✅ Filename sanitization test passed!")
        return True

    async def test_media_download_mock(self):
        """Test media download with mocked HTTP responses."""
        print("\n" + "=" * 60)
        print("TEST: Media Download (Mocked)")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Testing image download...")

            # Mock media object
            mock_media = {
                "id": 794684,
                "fileName": "test_image.jpeg",
                "type": "image",
                "mimeType": "image/jpeg",
                "updated": "2025-07-31T12:46:24.413",
                "status": "available",
                "downloadable": True
            }

            # Create mock session and response
            mock_response = AsyncMock()
            mock_response.status = 200
            mock_response.raise_for_status = MagicMock()

            # Mock file content
            fake_image_content = b"fake_image_data_for_testing"

            async def mock_iter_chunked(chunk_size):
                yield fake_image_content

            mock_response.content.iter_chunked = mock_iter_chunked

            mock_session = AsyncMock()
            mock_session.get.return_value.__aenter__.return_value = mock_response

            # Test the download
            result = await downloader.download_media_file(mock_session, mock_media)

            # Check result
            if result == "assets/test_image.jpeg":
                print("   ✅ Download returned correct relative path")
            else:
                print(f"   ❌ Expected 'assets/test_image.jpeg', got '{result}'")
                return False

            # Check file was created
            expected_file = Path(temp_dir) / "assets" / "test_image.jpeg"
            if expected_file.exists():
                print("   ✅ File created in assets folder")

                # Check file content
                with open(expected_file, 'rb') as f:
                    content = f.read()
                if content == fake_image_content:
                    print("   ✅ File content matches")
                else:
                    print("   ❌ File content doesn't match")
                    return False
            else:
                print("   ❌ File not created")
                return False

            print("\n2. Testing existing file handling...")

            # Test downloading the same file again (should return existing)
            result2 = await downloader.download_media_file(mock_session, mock_media)
            if result2 == "assets/test_image.jpeg":
                print("   ✅ Existing file handling works")
            else:
                print("   ❌ Existing file handling failed")
                return False

            print("\n3. Testing download failure...")

            # Test with invalid media (no ID)
            invalid_media = {"fileName": "no_id_file.jpg"}
            result3 = await downloader.download_media_file(mock_session, invalid_media)
            if result3 is None:
                print("   ✅ Properly handles invalid media")
            else:
                print("   ❌ Should return None for invalid media")
                return False

        print("\n✅ Media download mock test passed!")
        return True

    async def test_media_formatting_integration(self):
        """Test media formatting with downloaded files."""
        print("\n" + "=" * 60)
        print("TEST: Media Formatting Integration")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Testing snapshot with media formatting...")

            # Create a test image file in assets
            test_image_path = Path(temp_dir) / "assets" / "test_snapshot_image.jpeg"
            test_image_path.parent.mkdir(exist_ok=True)
            test_image_path.write_bytes(b"fake_image_content")

            # Mock snapshot with media
            mock_snapshot = {
                "id": 123456,
                "type": "Snapshot",
                "child": {"forename": "Test", "surname": "Child"},
                "author": {"forename": "Test", "surname": "Teacher"},
                "startTime": "2024-01-15T10:30:00",
                "notes": "<p>Test snapshot with media</p>",
                "media": [
                    {
                        "id": 123456,
                        "fileName": "test_snapshot_image.jpeg",
                        "type": "image",
                        "mimeType": "image/jpeg",
                        "updated": "2024-01-15T10:30:00",
                        "status": "available",
                        "downloadable": True
                    }
                ]
            }

            # Mock session to simulate successful download
            mock_session = AsyncMock()

            # Override the download_media_file method to return our test path
            original_download = downloader.download_media_file
            async def mock_download(session, media):
                if media.get('fileName') == 'test_snapshot_image.jpeg':
                    return "assets/test_snapshot_image.jpeg"
                return await original_download(session, media)

            downloader.download_media_file = mock_download

            # Test formatting
            html_content = await downloader.format_snapshot_html(mock_snapshot, mock_session)

            print("2. Checking HTML content for media references...")

            # Check for local image reference
            if 'src="assets/test_snapshot_image.jpeg"' in html_content:
                print("   ✅ Local image path found in HTML")
            else:
                print("   ❌ Local image path not found")
                print("   Debug: Looking for image references...")
                if 'assets/' in html_content:
                    print("   Found assets/ references in HTML")
                if 'test_snapshot_image.jpeg' in html_content:
                    print("   Found filename in HTML")
                return False

            # Check for image grid structure
            if 'class="image-grid"' in html_content:
                print("   ✅ Image grid structure present")
            else:
                print("   ❌ Image grid structure missing")
                return False

            # Check for image metadata
            if 'class="image-caption"' in html_content and 'class="image-meta"' in html_content:
                print("   ✅ Image caption and metadata present")
            else:
                print("   ❌ Image caption or metadata missing")
                return False

        print("\n✅ Media formatting integration test passed!")
        return True

    async def test_complete_html_generation_with_media(self):
        """Test complete HTML generation with media downloads."""
        print("\n" + "=" * 60)
        print("TEST: Complete HTML Generation with Media")
        print("=" * 60)

        with tempfile.TemporaryDirectory() as temp_dir:
            downloader = SnapshotDownloader(output_dir=temp_dir)

            print("1. Setting up test environment...")

            # Create test image files
            test_images = ["image1.jpg", "image2.png"]
            for img_name in test_images:
                img_path = Path(temp_dir) / "assets" / img_name
                img_path.write_bytes(f"fake_content_for_{img_name}".encode())

            # Mock snapshots with media
            mock_snapshots = [
                {
                    "id": 100001,
                    "type": "Snapshot",
                    "child": {"forename": "Alice", "surname": "Smith"},
                    "author": {"forename": "Teacher", "surname": "One"},
                    "startTime": "2024-01-15T10:30:00",
                    "notes": "<p>Alice's first snapshot</p>",
                    "media": [
                        {
                            "id": 1001,
                            "fileName": "image1.jpg",
                            "type": "image",
                            "mimeType": "image/jpeg"
                        }
                    ]
                },
                {
                    "id": 100002,
                    "type": "Snapshot",
                    "child": {"forename": "Bob", "surname": "Johnson"},
                    "author": {"forename": "Teacher", "surname": "Two"},
                    "startTime": "2024-01-16T14:20:00",
                    "notes": "<p>Bob's creative work</p>",
                    "media": [
                        {
                            "id": 1002,
                            "fileName": "image2.png",
                            "type": "image",
                            "mimeType": "image/png"
                        }
                    ]
                }
            ]

            # Mock the download_media_file method
            async def mock_download_media(session, media):
                filename = media.get('fileName', 'unknown.jpg')
                if filename in test_images:
                    return f"assets/{filename}"
                return None

            downloader.download_media_file = mock_download_media

            print("2. Generating complete HTML file...")
            html_file = await downloader.generate_html_file(mock_snapshots, "2024-01-01", "2024-12-31")

            if html_file and html_file.exists():
                print("   ✅ HTML file generated successfully")

                with open(html_file, 'r', encoding='utf-8') as f:
                    content = f.read()

                print("3. Checking HTML content...")

                # Check for local image references
                checks = [
                    ('src="assets/image1.jpg"', "Image 1 local reference"),
                    ('src="assets/image2.png"', "Image 2 local reference"),
                    ('Alice by Teacher One', "Snapshot 1 title"),
                    ('Bob by Teacher Two', "Snapshot 2 title"),
                    ('class="image-grid"', "Image grid structure"),
                ]

                all_passed = True
                for check_text, description in checks:
                    if check_text in content:
                        print(f"   ✅ {description} found")
                    else:
                        print(f"   ❌ {description} missing")
                        all_passed = False

                if not all_passed:
                    return False

            else:
                print("   ❌ HTML file not generated")
                return False

        print("\n✅ Complete HTML generation with media test passed!")
        return True

    async def run_all_tests(self):
        """Run all media download tests."""
        print("🚀 Starting Media Download Tests")
        print("=" * 80)

        try:
            success = True

            success &= self.test_assets_folder_creation()
            success &= self.test_filename_sanitization()
            success &= await self.test_media_download_mock()
            success &= await self.test_media_formatting_integration()
            success &= await self.test_complete_html_generation_with_media()

            if success:
                print("\n" + "=" * 80)
                print("🎉 ALL MEDIA DOWNLOAD TESTS PASSED!")
                print("=" * 80)
                print("✅ Assets folder created correctly")
                print("✅ Filename sanitization works properly")
                print("✅ Media files download to assets subfolder")
                print("✅ HTML references local files correctly")
                print("✅ Complete integration working")
                print("\n📁 Media Download Features:")
                print("• Downloads images to assets/ subfolder")
                print("• Downloads attachments to assets/ subfolder")
                print("• Uses relative paths in HTML (assets/filename.jpg)")
                print("• Fallback to API URLs if download fails")
                print("• Sanitizes filenames for filesystem safety")
                print("• Handles existing files (no re-download)")
            else:
                print("\n❌ SOME MEDIA DOWNLOAD TESTS FAILED")

            return success

        except Exception as e:
            print(f"\n❌ MEDIA DOWNLOAD TESTS FAILED: {e}")
            import traceback
            traceback.print_exc()
            return False


def show_media_download_info():
    """Show information about media download functionality."""
    print("\n" + "=" * 80)
    print("📁 MEDIA DOWNLOAD FUNCTIONALITY")
    print("=" * 80)

    print("\n🎯 How It Works:")
    print("1. Creates 'assets' subfolder in output directory")
    print("2. Downloads media files (images, attachments) from API")
    print("3. Saves files with sanitized filenames")
    print("4. Updates HTML to reference local files")
    print("5. Fallback to API URLs if download fails")

    print("\n📋 Supported Media Types:")
    print("• Images: JPEG, PNG, GIF, WebP, etc.")
    print("• Documents: PDF, DOC, TXT, etc.")
    print("• Any file type from ParentZone media API")

    print("\n💾 File Organization:")
    print("output_directory/")
    print("├── snapshots_DATE_to_DATE.html")
    print("├── snapshots.log")
    print("└── assets/")
    print("    ├── image1.jpeg")
    print("    ├── document.pdf")
    print("    └── attachment.txt")

    print("\n🔗 HTML Integration:")
    print("• Images: <img src=\"assets/filename.jpg\">")
    print("• Attachments: <a href=\"assets/filename.pdf\">")
    print("• Relative paths for portability")
    print("• Self-contained reports (HTML + assets)")

    print("\n✨ Benefits:")
    print("• Offline viewing - images work without internet")
    print("• Faster loading - no API requests for media")
    print("• Portable reports - can be shared easily")
    print("• Professional presentation with embedded media")

    print("\n⚠️ Considerations:")
    print("• Requires storage space for downloaded media")
    print("• Download time increases with media count")
    print("• Large files may take longer to process")
    print("• API authentication required for media download")


def main():
    """Main test function."""
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    tester = MediaDownloadTester()

    # Run tests
    success = asyncio.run(tester.run_all_tests())

    # Show information
    if success:
        show_media_download_info()

    return 0 if success else 1


if __name__ == "__main__":
    exit(main())