From 04cf4c7db5cd27a10bf6dcd81394b6b9ce94f361 Mon Sep 17 00:00:00 2001 From: Tudor Sitaru Date: Tue, 23 Dec 2025 21:45:27 +0000 Subject: [PATCH] refactoring pass one --- src/__init__.py | 3 ++ src/asset_tracker.py | 2 +- src/config_downloader.py | 55 +++++++++++++------------------ src/config_snapshot_downloader.py | 1 - src/image_downloader.py | 55 +++++++++++++------------------ src/snapshot_downloader.py | 30 ++++++++--------- src/utils.py | 55 +++++++++++++++++++++++++++++++ src/webserver.py | 4 +-- 8 files changed, 117 insertions(+), 88 deletions(-) create mode 100644 src/utils.py diff --git a/src/__init__.py b/src/__init__.py index 21d9ec0..f5b126c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -25,6 +25,7 @@ try: from .image_downloader import ImageDownloader from .snapshot_downloader import SnapshotDownloader from .webserver import SnapshotsWebServer + from .utils import sanitize_filename, get_extension_from_mime __all__ = [ "AssetTracker", @@ -34,6 +35,8 @@ try: "ImageDownloader", "SnapshotDownloader", "SnapshotsWebServer", + "sanitize_filename", + "get_extension_from_mime", ] except ImportError as e: diff --git a/src/asset_tracker.py b/src/asset_tracker.py index 6421de1..2675ac4 100644 --- a/src/asset_tracker.py +++ b/src/asset_tracker.py @@ -11,7 +11,7 @@ import logging import os from datetime import datetime from pathlib import Path -from typing import Dict, List, Set, Any, Optional +from typing import Dict, List, Any import hashlib diff --git a/src/config_downloader.py b/src/config_downloader.py index 99ee65f..edc4d1d 100644 --- a/src/config_downloader.py +++ b/src/config_downloader.py @@ -22,7 +22,7 @@ from typing import List, Dict, Any, Optional import time from tqdm import tqdm -# Import the auth manager and asset tracker +# Import the auth manager, asset tracker, and utilities try: from src.auth_manager import AuthManager except ImportError: @@ -33,6 +33,24 @@ try: except ImportError: AssetTracker = None +try: + from src.utils import sanitize_filename, get_extension_from_mime +except ImportError: + # Fallback implementations if utils not available + def sanitize_filename(filename: str) -> str: + invalid_chars = '<>:"/\\|?*' + for char in invalid_chars: + filename = filename.replace(char, "_") + filename = filename.strip(". ") + return filename if filename else "file" + + def get_extension_from_mime(mime_type: str) -> str: + mime_to_ext = { + "image/jpeg": ".jpg", "image/jpg": ".jpg", "image/png": ".png", + "image/gif": ".gif", "image/webp": ".webp", + } + return mime_to_ext.get(mime_type.lower(), ".jpg") + class ConfigImageDownloader: def __init__(self, config_file: str): @@ -233,15 +251,15 @@ class ConfigImageDownloader: # If no extension, try to get it from content-type or add default if "." not in filename: if "mimeType" in asset: - ext = self._get_extension_from_mime(asset["mimeType"]) + ext = get_extension_from_mime(asset["mimeType"]) elif "content_type" in asset: - ext = self._get_extension_from_mime(asset["content_type"]) + ext = get_extension_from_mime(asset["content_type"]) else: ext = ".jpg" # Default extension filename += ext # Sanitize filename - filename = self._sanitize_filename(filename) + filename = sanitize_filename(filename) # Ensure unique filename counter = 1 @@ -253,35 +271,6 @@ class ConfigImageDownloader: return filename - def _get_extension_from_mime(self, mime_type: str) -> str: - """Get file extension from MIME type.""" - mime_to_ext = { - "image/jpeg": ".jpg", - "image/jpg": ".jpg", - "image/png": ".png", - "image/gif": ".gif", - "image/webp": ".webp", - "image/bmp": ".bmp", - "image/tiff": ".tiff", - "image/svg+xml": ".svg", - } - return mime_to_ext.get(mime_type.lower(), ".jpg") - - def _sanitize_filename(self, filename: str) -> str: - """Sanitize filename by removing invalid characters.""" - # Remove or replace invalid characters - invalid_chars = '<>:"/\\|?*' - for char in invalid_chars: - filename = filename.replace(char, "_") - - # Remove leading/trailing spaces and dots - filename = filename.strip(". ") - - # Ensure filename is not empty - if not filename: - filename = "image" - - return filename async def download_asset( self, diff --git a/src/config_snapshot_downloader.py b/src/config_snapshot_downloader.py index 196e8e3..c864b9e 100644 --- a/src/config_snapshot_downloader.py +++ b/src/config_snapshot_downloader.py @@ -10,7 +10,6 @@ import argparse import asyncio import json import logging -import os from datetime import datetime, timedelta from pathlib import Path diff --git a/src/image_downloader.py b/src/image_downloader.py index 9aa7014..95c4d05 100644 --- a/src/image_downloader.py +++ b/src/image_downloader.py @@ -23,7 +23,7 @@ import aiofiles import aiohttp from tqdm import tqdm -# Import the auth manager and asset tracker +# Import the auth manager, asset tracker, and utilities try: from src.auth_manager import AuthManager except ImportError: @@ -34,6 +34,24 @@ try: except ImportError: AssetTracker = None +try: + from src.utils import sanitize_filename, get_extension_from_mime +except ImportError: + # Fallback implementations if utils not available + def sanitize_filename(filename: str) -> str: + invalid_chars = '<>:"/\\|?*' + for char in invalid_chars: + filename = filename.replace(char, "_") + filename = filename.strip(". ") + return filename if filename else "file" + + def get_extension_from_mime(mime_type: str) -> str: + mime_to_ext = { + "image/jpeg": ".jpg", "image/jpg": ".jpg", "image/png": ".png", + "image/gif": ".gif", "image/webp": ".webp", + } + return mime_to_ext.get(mime_type.lower(), ".jpg") + class ImageDownloader: def __init__( @@ -241,15 +259,15 @@ class ImageDownloader: # If no extension, try to get it from content-type or add default if "." not in filename: if "mimeType" in asset: - ext = self._get_extension_from_mime(asset["mimeType"]) + ext = get_extension_from_mime(asset["mimeType"]) elif "content_type" in asset: - ext = self._get_extension_from_mime(asset["content_type"]) + ext = get_extension_from_mime(asset["content_type"]) else: ext = ".jpg" # Default extension filename += ext # Sanitize filename - filename = self._sanitize_filename(filename) + filename = sanitize_filename(filename) # Ensure unique filename counter = 1 @@ -261,35 +279,6 @@ class ImageDownloader: return filename - def _get_extension_from_mime(self, mime_type: str) -> str: - """Get file extension from MIME type.""" - mime_to_ext = { - "image/jpeg": ".jpg", - "image/jpg": ".jpg", - "image/png": ".png", - "image/gif": ".gif", - "image/webp": ".webp", - "image/bmp": ".bmp", - "image/tiff": ".tiff", - "image/svg+xml": ".svg", - } - return mime_to_ext.get(mime_type.lower(), ".jpg") - - def _sanitize_filename(self, filename: str) -> str: - """Sanitize filename by removing invalid characters.""" - # Remove or replace invalid characters - invalid_chars = '<>:"/\\|?*' - for char in invalid_chars: - filename = filename.replace(char, "_") - - # Remove leading/trailing spaces and dots - filename = filename.strip(". ") - - # Ensure filename is not empty - if not filename: - filename = "image" - - return filename async def download_asset( self, diff --git a/src/snapshot_downloader.py b/src/snapshot_downloader.py index a2827fb..5850859 100644 --- a/src/snapshot_downloader.py +++ b/src/snapshot_downloader.py @@ -19,12 +19,23 @@ from urllib.parse import urlencode import aiofiles import aiohttp -# Import the auth manager +# Import the auth manager and utilities try: from src.auth_manager import AuthManager except ImportError: AuthManager = None +try: + from src.utils import sanitize_filename +except ImportError: + # Fallback implementation if utils not available + def sanitize_filename(filename: str) -> str: + invalid_chars = '<>:"/\\|?*' + for char in invalid_chars: + filename = filename.replace(char, "_") + filename = filename.strip(". ") + return filename if filename else "file" + class SnapshotDownloader: def __init__( @@ -509,7 +520,7 @@ class SnapshotDownloader: filename = media.get("fileName", f"media_{media_id}") # Sanitize filename - filename = self._sanitize_filename(filename) + filename = sanitize_filename(filename) # Check if file already exists filepath = self.assets_dir / filename @@ -543,21 +554,6 @@ class SnapshotDownloader: self.logger.error(f"Failed to download media {filename}: {e}") return None - def _sanitize_filename(self, filename: str) -> str: - """Sanitize filename by removing invalid characters.""" - # Remove or replace invalid characters - invalid_chars = '<>:"/\\|?*' - for char in invalid_chars: - filename = filename.replace(char, "_") - - # Remove leading/trailing spaces and dots - filename = filename.strip(". ") - - # Ensure filename is not empty - if not filename: - filename = "media_file" - - return filename async def generate_html_file( self, snapshots: List[Dict[str, Any]], date_from: str, date_to: str diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..7ac16c1 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Utility functions for ParentZone Downloader + +This module contains shared utility functions used across multiple modules. +""" + + +def sanitize_filename(filename: str) -> str: + """ + Sanitize filename by removing invalid characters. + + Args: + filename: The filename to sanitize + + Returns: + Sanitized filename safe for filesystem use + """ + # Remove or replace invalid characters + invalid_chars = '<>:"/\\|?*' + for char in invalid_chars: + filename = filename.replace(char, "_") + + # Remove leading/trailing spaces and dots + filename = filename.strip(". ") + + # Ensure filename is not empty + if not filename: + filename = "file" + + return filename + + +def get_extension_from_mime(mime_type: str) -> str: + """ + Get file extension from MIME type. + + Args: + mime_type: The MIME type string (e.g., 'image/jpeg') + + Returns: + File extension including the dot (e.g., '.jpg') + """ + mime_to_ext = { + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "image/webp": ".webp", + "image/bmp": ".bmp", + "image/tiff": ".tiff", + "image/svg+xml": ".svg", + } + return mime_to_ext.get(mime_type.lower(), ".jpg") + diff --git a/src/webserver.py b/src/webserver.py index 985e528..e883e7e 100644 --- a/src/webserver.py +++ b/src/webserver.py @@ -14,9 +14,7 @@ from pathlib import Path from urllib.parse import unquote from datetime import datetime -import aiohttp -from aiohttp import web, hdrs -from aiohttp.web_response import Response +from aiohttp import web class SnapshotsWebServer: