Files
parentzone_downloader/test_media_download.py
Tudor Sitaru ddde67ca62 first commit
2025-10-07 14:52:04 +01:00

496 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Test Media Download Functionality
This script tests that media files (images and attachments) are properly downloaded
to the assets subfolder and referenced correctly in the HTML output.
"""
import asyncio
import json
import logging
import sys
import tempfile
from pathlib import Path
import os
from unittest.mock import AsyncMock, MagicMock
# Add the current directory to the path so we can import modules
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from snapshot_downloader import SnapshotDownloader
class MediaDownloadTester:
"""Test class for media download functionality."""
def __init__(self):
"""Initialize the tester."""
self.logger = logging.getLogger(__name__)
def test_assets_folder_creation(self):
"""Test that assets subfolder is created correctly."""
print("=" * 60)
print("TEST: Assets Folder Creation")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
print("1. Testing assets folder creation...")
downloader = SnapshotDownloader(output_dir=temp_dir)
# Check if assets folder was created
assets_dir = Path(temp_dir) / "assets"
if assets_dir.exists() and assets_dir.is_dir():
print(" ✅ Assets folder created successfully")
else:
print(" ❌ Assets folder not created")
return False
# Check if it's accessible
if downloader.assets_dir == assets_dir:
print(" ✅ Assets directory property set correctly")
else:
print(" ❌ Assets directory property incorrect")
return False
print("\n✅ Assets folder creation test passed!")
return True
def test_filename_sanitization(self):
"""Test filename sanitization functionality."""
print("\n" + "=" * 60)
print("TEST: Filename Sanitization")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
downloader = SnapshotDownloader(output_dir=temp_dir)
test_cases = [
{
"input": "normal_filename.jpg",
"expected": "normal_filename.jpg",
"description": "Normal filename"
},
{
"input": "file<with>invalid:chars.png",
"expected": "file_with_invalid_chars.png",
"description": "Invalid characters"
},
{
"input": " .leading_trailing_spaces. ",
"expected": "leading_trailing_spaces",
"description": "Leading/trailing spaces and dots"
},
{
"input": "",
"expected": "media_file",
"description": "Empty filename"
},
{
"input": "file/with\\path|chars?.txt",
"expected": "file_with_path_chars_.txt",
"description": "Path characters"
}
]
print("1. Testing filename sanitization cases...")
for i, test_case in enumerate(test_cases, 1):
print(f"\n{i}. {test_case['description']}")
print(f" Input: '{test_case['input']}'")
result = downloader._sanitize_filename(test_case['input'])
print(f" Output: '{result}'")
if result == test_case['expected']:
print(" ✅ Correctly sanitized")
else:
print(f" ❌ Expected: '{test_case['expected']}'")
return False
print("\n✅ Filename sanitization test passed!")
return True
async def test_media_download_mock(self):
"""Test media download with mocked HTTP responses."""
print("\n" + "=" * 60)
print("TEST: Media Download (Mocked)")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
downloader = SnapshotDownloader(output_dir=temp_dir)
print("1. Testing image download...")
# Mock media object
mock_media = {
"id": 794684,
"fileName": "test_image.jpeg",
"type": "image",
"mimeType": "image/jpeg",
"updated": "2025-07-31T12:46:24.413",
"status": "available",
"downloadable": True
}
# Create mock session and response
mock_response = AsyncMock()
mock_response.status = 200
mock_response.raise_for_status = MagicMock()
# Mock file content
fake_image_content = b"fake_image_data_for_testing"
async def mock_iter_chunked(chunk_size):
yield fake_image_content
mock_response.content.iter_chunked = mock_iter_chunked
mock_session = AsyncMock()
mock_session.get.return_value.__aenter__.return_value = mock_response
# Test the download
result = await downloader.download_media_file(mock_session, mock_media)
# Check result
if result == "assets/test_image.jpeg":
print(" ✅ Download returned correct relative path")
else:
print(f" ❌ Expected 'assets/test_image.jpeg', got '{result}'")
return False
# Check file was created
expected_file = Path(temp_dir) / "assets" / "test_image.jpeg"
if expected_file.exists():
print(" ✅ File created in assets folder")
# Check file content
with open(expected_file, 'rb') as f:
content = f.read()
if content == fake_image_content:
print(" ✅ File content matches")
else:
print(" ❌ File content doesn't match")
return False
else:
print(" ❌ File not created")
return False
print("\n2. Testing existing file handling...")
# Test downloading the same file again (should return existing)
result2 = await downloader.download_media_file(mock_session, mock_media)
if result2 == "assets/test_image.jpeg":
print(" ✅ Existing file handling works")
else:
print(" ❌ Existing file handling failed")
return False
print("\n3. Testing download failure...")
# Test with invalid media (no ID)
invalid_media = {"fileName": "no_id_file.jpg"}
result3 = await downloader.download_media_file(mock_session, invalid_media)
if result3 is None:
print(" ✅ Properly handles invalid media")
else:
print(" ❌ Should return None for invalid media")
return False
print("\n✅ Media download mock test passed!")
return True
async def test_media_formatting_integration(self):
"""Test media formatting with downloaded files."""
print("\n" + "=" * 60)
print("TEST: Media Formatting Integration")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
downloader = SnapshotDownloader(output_dir=temp_dir)
print("1. Testing snapshot with media formatting...")
# Create a test image file in assets
test_image_path = Path(temp_dir) / "assets" / "test_snapshot_image.jpeg"
test_image_path.parent.mkdir(exist_ok=True)
test_image_path.write_bytes(b"fake_image_content")
# Mock snapshot with media
mock_snapshot = {
"id": 123456,
"type": "Snapshot",
"child": {"forename": "Test", "surname": "Child"},
"author": {"forename": "Test", "surname": "Teacher"},
"startTime": "2024-01-15T10:30:00",
"notes": "<p>Test snapshot with media</p>",
"media": [
{
"id": 123456,
"fileName": "test_snapshot_image.jpeg",
"type": "image",
"mimeType": "image/jpeg",
"updated": "2024-01-15T10:30:00",
"status": "available",
"downloadable": True
}
]
}
# Mock session to simulate successful download
mock_session = AsyncMock()
# Override the download_media_file method to return our test path
original_download = downloader.download_media_file
async def mock_download(session, media):
if media.get('fileName') == 'test_snapshot_image.jpeg':
return "assets/test_snapshot_image.jpeg"
return await original_download(session, media)
downloader.download_media_file = mock_download
# Test formatting
html_content = await downloader.format_snapshot_html(mock_snapshot, mock_session)
print("2. Checking HTML content for media references...")
# Check for local image reference
if 'src="assets/test_snapshot_image.jpeg"' in html_content:
print(" ✅ Local image path found in HTML")
else:
print(" ❌ Local image path not found")
print(" Debug: Looking for image references...")
if 'assets/' in html_content:
print(" Found assets/ references in HTML")
if 'test_snapshot_image.jpeg' in html_content:
print(" Found filename in HTML")
return False
# Check for image grid structure
if 'class="image-grid"' in html_content:
print(" ✅ Image grid structure present")
else:
print(" ❌ Image grid structure missing")
return False
# Check for image metadata
if 'class="image-caption"' in html_content and 'class="image-meta"' in html_content:
print(" ✅ Image caption and metadata present")
else:
print(" ❌ Image caption or metadata missing")
return False
print("\n✅ Media formatting integration test passed!")
return True
async def test_complete_html_generation_with_media(self):
"""Test complete HTML generation with media downloads."""
print("\n" + "=" * 60)
print("TEST: Complete HTML Generation with Media")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
downloader = SnapshotDownloader(output_dir=temp_dir)
print("1. Setting up test environment...")
# Create test image files
test_images = ["image1.jpg", "image2.png"]
for img_name in test_images:
img_path = Path(temp_dir) / "assets" / img_name
img_path.write_bytes(f"fake_content_for_{img_name}".encode())
# Mock snapshots with media
mock_snapshots = [
{
"id": 100001,
"type": "Snapshot",
"child": {"forename": "Alice", "surname": "Smith"},
"author": {"forename": "Teacher", "surname": "One"},
"startTime": "2024-01-15T10:30:00",
"notes": "<p>Alice's first snapshot</p>",
"media": [
{
"id": 1001,
"fileName": "image1.jpg",
"type": "image",
"mimeType": "image/jpeg"
}
]
},
{
"id": 100002,
"type": "Snapshot",
"child": {"forename": "Bob", "surname": "Johnson"},
"author": {"forename": "Teacher", "surname": "Two"},
"startTime": "2024-01-16T14:20:00",
"notes": "<p>Bob's creative work</p>",
"media": [
{
"id": 1002,
"fileName": "image2.png",
"type": "image",
"mimeType": "image/png"
}
]
}
]
# Mock the download_media_file method
async def mock_download_media(session, media):
filename = media.get('fileName', 'unknown.jpg')
if filename in test_images:
return f"assets/{filename}"
return None
downloader.download_media_file = mock_download_media
print("2. Generating complete HTML file...")
html_file = await downloader.generate_html_file(mock_snapshots, "2024-01-01", "2024-12-31")
if html_file and html_file.exists():
print(" ✅ HTML file generated successfully")
with open(html_file, 'r', encoding='utf-8') as f:
content = f.read()
print("3. Checking HTML content...")
# Check for local image references
checks = [
('src="assets/image1.jpg"', "Image 1 local reference"),
('src="assets/image2.png"', "Image 2 local reference"),
('Alice by Teacher One', "Snapshot 1 title"),
('Bob by Teacher Two', "Snapshot 2 title"),
('class="image-grid"', "Image grid structure"),
]
all_passed = True
for check_text, description in checks:
if check_text in content:
print(f"{description} found")
else:
print(f"{description} missing")
all_passed = False
if not all_passed:
return False
else:
print(" ❌ HTML file not generated")
return False
print("\n✅ Complete HTML generation with media test passed!")
return True
async def run_all_tests(self):
"""Run all media download tests."""
print("🚀 Starting Media Download Tests")
print("=" * 80)
try:
success = True
success &= self.test_assets_folder_creation()
success &= self.test_filename_sanitization()
success &= await self.test_media_download_mock()
success &= await self.test_media_formatting_integration()
success &= await self.test_complete_html_generation_with_media()
if success:
print("\n" + "=" * 80)
print("🎉 ALL MEDIA DOWNLOAD TESTS PASSED!")
print("=" * 80)
print("✅ Assets folder created correctly")
print("✅ Filename sanitization works properly")
print("✅ Media files download to assets subfolder")
print("✅ HTML references local files correctly")
print("✅ Complete integration working")
print("\n📁 Media Download Features:")
print("• Downloads images to assets/ subfolder")
print("• Downloads attachments to assets/ subfolder")
print("• Uses relative paths in HTML (assets/filename.jpg)")
print("• Fallback to API URLs if download fails")
print("• Sanitizes filenames for filesystem safety")
print("• Handles existing files (no re-download)")
else:
print("\n❌ SOME MEDIA DOWNLOAD TESTS FAILED")
return success
except Exception as e:
print(f"\n❌ MEDIA DOWNLOAD TESTS FAILED: {e}")
import traceback
traceback.print_exc()
return False
def show_media_download_info():
"""Show information about media download functionality."""
print("\n" + "=" * 80)
print("📁 MEDIA DOWNLOAD FUNCTIONALITY")
print("=" * 80)
print("\n🎯 How It Works:")
print("1. Creates 'assets' subfolder in output directory")
print("2. Downloads media files (images, attachments) from API")
print("3. Saves files with sanitized filenames")
print("4. Updates HTML to reference local files")
print("5. Fallback to API URLs if download fails")
print("\n📋 Supported Media Types:")
print("• Images: JPEG, PNG, GIF, WebP, etc.")
print("• Documents: PDF, DOC, TXT, etc.")
print("• Any file type from ParentZone media API")
print("\n💾 File Organization:")
print("output_directory/")
print("├── snapshots_DATE_to_DATE.html")
print("├── snapshots.log")
print("└── assets/")
print(" ├── image1.jpeg")
print(" ├── document.pdf")
print(" └── attachment.txt")
print("\n🔗 HTML Integration:")
print("• Images: <img src=\"assets/filename.jpg\">")
print("• Attachments: <a href=\"assets/filename.pdf\">")
print("• Relative paths for portability")
print("• Self-contained reports (HTML + assets)")
print("\n✨ Benefits:")
print("• Offline viewing - images work without internet")
print("• Faster loading - no API requests for media")
print("• Portable reports - can be shared easily")
print("• Professional presentation with embedded media")
print("\n⚠️ Considerations:")
print("• Requires storage space for downloaded media")
print("• Download time increases with media count")
print("• Large files may take longer to process")
print("• API authentication required for media download")
def main():
"""Main test function."""
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
tester = MediaDownloadTester()
# Run tests
success = asyncio.run(tester.run_all_tests())
# Show information
if success:
show_media_download_info()
return 0 if success else 1
if __name__ == "__main__":
exit(main())