From e17d69c3081627a4ea3a5aaa35d72c7856a3d315 Mon Sep 17 00:00:00 2001 From: Tudor Sitaru Date: Fri, 10 Oct 2025 16:55:12 +0100 Subject: [PATCH] adding webserver --- WEBSERVER_README.md | 372 ++++++++++++++++++++++++++++++++ startup.sh | 72 +++++++ webserver.py | 503 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 947 insertions(+) create mode 100644 WEBSERVER_README.md create mode 100644 startup.sh create mode 100644 webserver.py diff --git a/WEBSERVER_README.md b/WEBSERVER_README.md new file mode 100644 index 0000000..04777ef --- /dev/null +++ b/WEBSERVER_README.md @@ -0,0 +1,372 @@ +# ParentZone Snapshots Web Server + +A built-in web server that serves your downloaded snapshot HTML files and their assets through a clean, responsive web interface. + +## Features + +- **šŸ“‚ Directory Listing**: Browse all your snapshot files with file sizes and modification dates +- **šŸ–¼ļø Asset Serving**: Properly serves images, CSS, and other assets referenced in HTML files +- **šŸ“± Responsive Design**: Works great on desktop, tablet, and mobile devices +- **šŸ”’ Security**: Path traversal protection and secure file serving +- **šŸ“Š Request Logging**: Detailed logging of all web requests +- **⚔ Caching**: Optimized caching headers for better performance + +## Quick Start + +### Using Docker (Recommended) + +The web server starts automatically when you run the Docker container: + +```bash +# Build and start with docker-compose +docker-compose up -d + +# Or build and run manually +docker build -t parentzone-downloader . +docker run -d -p 8080:8080 -v ./snapshots:/app/snapshots parentzone-downloader +``` + +The web interface will be available at: **http://localhost:8080** + +### Running Standalone + +You can also run the web server independently: + +```bash +# Start web server with default settings +python webserver.py + +# Custom port and directory +python webserver.py --port 3000 --snapshots-dir ./my-snapshots + +# Bind to all interfaces +python webserver.py --host 0.0.0.0 --port 8080 +``` + +## Configuration Options + +### Command Line Arguments + +| Argument | Default | Description | +|----------|---------|-------------| +| `--snapshots-dir` | `./snapshots` | Directory containing snapshot files | +| `--port` | `8080` | Port to run the server on | +| `--host` | `0.0.0.0` | Host interface to bind to | + +### Examples + +```bash +# Serve from custom directory on port 3000 +python webserver.py --snapshots-dir /path/to/snapshots --port 3000 + +# Local access only +python webserver.py --host 127.0.0.1 + +# Production setup +python webserver.py --host 0.0.0.0 --port 80 --snapshots-dir /var/snapshots +``` + +## Web Interface + +### Main Directory Page + +- **Clean Layout**: Modern, responsive design with file cards +- **File Information**: Shows file names, sizes, and last modified dates +- **Sorting**: Files are sorted by modification date (newest first) +- **Direct Links**: Click any file name to view the snapshot + +### File Serving + +- **HTML Files**: Served with proper content types and encoding +- **Assets**: Images, CSS, JS, and other assets are served correctly +- **Caching**: Efficient browser caching for better performance +- **Security**: Path traversal protection prevents unauthorized access + +## URL Structure + +| URL Pattern | Description | Example | +|-------------|-------------|---------| +| `/` | Main directory listing | `http://localhost:8080/` | +| `/{filename}.html` | Serve HTML snapshot file | `http://localhost:8080/snapshots_2024-01-01.html` | +| `/assets/{path}` | Serve asset files | `http://localhost:8080/assets/images/photo.jpg` | +| `/{filename}.{ext}` | Serve other files | `http://localhost:8080/snapshots.log` | + +## Docker Integration + +### Environment Variables + +The web server respects these environment variables when running in Docker: + +- `SNAPSHOTS_DIR`: Directory to serve files from (default: `/app/snapshots`) +- `WEB_PORT`: Port for the web server (default: `8080`) +- `WEB_HOST`: Host interface to bind to (default: `0.0.0.0`) + +### Volume Mounts + +Make sure your snapshots directory is properly mounted: + +```yaml +# docker-compose.yml +volumes: + - ./snapshots:/app/snapshots # Your local snapshots folder + - ./logs:/app/logs # Log files +``` + +### Port Mapping + +The default port `8080` is exposed and mapped in the Docker setup: + +```yaml +# docker-compose.yml +ports: + - "8080:8080" # Host:Container +``` + +To use a different port: + +```yaml +ports: + - "3000:8080" # Access via http://localhost:3000 +``` + +## File Types Supported + +### HTML Files +- **Snapshot files**: Main HTML files with embedded images and styles +- **Content-Type**: `text/html; charset=utf-8` +- **Features**: Full HTML rendering with linked assets + +### Asset Files +- **Images**: JPG, PNG, GIF, WebP, SVG, ICO +- **Stylesheets**: CSS files +- **Scripts**: JavaScript files +- **Data**: JSON files +- **Documents**: PDF files +- **Logs**: TXT and LOG files + +### Content Type Detection + +The server automatically detects content types based on file extensions: + +```python +content_types = { + ".html": "text/html; charset=utf-8", + ".css": "text/css; charset=utf-8", + ".js": "application/javascript; charset=utf-8", + ".jpg": "image/jpeg", + ".png": "image/png", + ".pdf": "application/pdf", + # ... and more +} +``` + +## Security Features + +### Path Traversal Protection + +The server prevents access to files outside the snapshots directory: + +- āœ… `/snapshots_2024-01-01.html` - Allowed +- āœ… `/assets/images/photo.jpg` - Allowed +- āŒ `/../../../etc/passwd` - Blocked +- āŒ `/../../config.json` - Blocked + +### Safe File Serving + +- Only serves files from designated directories +- Validates all file paths before serving +- Returns proper HTTP error codes for invalid requests +- Logs suspicious access attempts + +## Performance Optimization + +### Caching Headers + +The server sets appropriate caching headers: + +- **HTML files**: `Cache-Control: public, max-age=3600` (1 hour) +- **Asset files**: `Cache-Control: public, max-age=86400` (24 hours) +- **Last-Modified**: Proper modification time headers + +### Connection Handling + +- Built on `aiohttp` for high-performance async handling +- Efficient file serving with proper buffer sizes +- Graceful error handling and recovery + +## Logging + +### Request Logging + +All requests are logged with details: + +``` +2024-01-15 10:30:45 - webserver - INFO - 192.168.1.100 - GET /snapshots_2024-01-01.html - 200 - 0.045s +2024-01-15 10:30:46 - webserver - INFO - 192.168.1.100 - GET /assets/images/photo.jpg - 200 - 0.012s +``` + +### Error Logging + +Errors and security events are logged: + +``` +2024-01-15 10:31:00 - webserver - WARNING - Attempted path traversal: ../../../etc/passwd +2024-01-15 10:31:05 - webserver - ERROR - Error serving file unknown.html: File not found +``` + +### Log Location + +- **Docker**: Logs to `/app/logs/startup.log` and container stdout +- **Standalone**: Logs to console and any configured log files + +## Troubleshooting + +### Common Issues + +#### Port Already in Use +```bash +# Error: Address already in use +# Solution: Use a different port +python webserver.py --port 8081 +``` + +#### Permission Denied +```bash +# Error: Permission denied (port 80) +# Solution: Use sudo or higher port number +sudo python webserver.py --port 80 +# Or +python webserver.py --port 8080 +``` + +#### No Files Visible +- Check that snapshots directory exists and contains HTML files +- Verify directory permissions are readable +- Check docker volume mounts are correct + +#### Assets Not Loading +- Ensure assets directory exists within snapshots folder +- Check that asset files are properly referenced in HTML +- Verify file permissions on asset files + +### Debug Mode + +For more verbose logging, modify the logging level: + +```python +# In webserver.py +logging.basicConfig(level=logging.DEBUG) +``` + +### Health Check + +Test if the server is running: + +```bash +# Check if server responds +curl http://localhost:8080/ + +# Check specific file +curl -I http://localhost:8080/snapshots_2024-01-01.html +``` + +## Development + +### Adding New Features + +The web server is designed to be easily extensible: + +```python +# Add new route +async def custom_handler(request): + return web.Response(text="Custom response") + +# Register route +app.router.add_get("/custom", custom_handler) +``` + +### Custom Styling + +You can customize the directory listing appearance by modifying the CSS in `_generate_index_html()`. + +### API Endpoints + +Consider adding REST API endpoints for programmatic access: + +```python +# Example: JSON API for file listing +async def api_files(request): + files = get_file_list() # Your logic here + return web.json_response(files) + +app.router.add_get("/api/files", api_files) +``` + +## Production Deployment + +### Reverse Proxy Setup + +For production, consider using nginx as a reverse proxy: + +```nginx +server { + listen 80; + server_name your-domain.com; + + location / { + proxy_pass http://localhost:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } +} +``` + +### SSL/HTTPS + +Add SSL termination at the reverse proxy level: + +```nginx +server { + listen 443 ssl; + ssl_certificate /path/to/cert.pem; + ssl_certificate_key /path/to/key.pem; + + location / { + proxy_pass http://localhost:8080; + } +} +``` + +### Process Management + +Use systemd or supervisor to manage the web server process: + +```ini +# /etc/systemd/system/parentzone-webserver.service +[Unit] +Description=ParentZone Web Server +After=network.target + +[Service] +Type=simple +User=parentzone +WorkingDirectory=/opt/parentzone +ExecStart=/usr/bin/python3 webserver.py +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +## Contributing + +The web server is part of the ParentZone Downloader project. To contribute: + +1. Fork the repository +2. Make your changes to `webserver.py` +3. Test thoroughly +4. Submit a pull request + +## License + +This web server is part of the ParentZone Downloader project and follows the same license terms. \ No newline at end of file diff --git a/startup.sh b/startup.sh new file mode 100644 index 0000000..ae82f62 --- /dev/null +++ b/startup.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# ParentZone Downloader Startup Script +# This script starts both cron and the web server + +set -e + +LOG_DIR="/app/logs" +SNAPSHOTS_DIR="/app/snapshots" + +# Create directories if they don't exist +mkdir -p "$LOG_DIR" +mkdir -p "$SNAPSHOTS_DIR" + +# Function to log messages with timestamp +log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - STARTUP - $1" | tee -a "$LOG_DIR/startup.log" +} + +log_message "=== ParentZone Downloader Starting ===" + +# Start cron daemon +log_message "Starting cron daemon..." +cron + +# Create log file for cron output +touch /var/log/cron.log + +log_message "Cron daemon started" + +# Start web server in the background +log_message "Starting web server on port 8080..." +python3 webserver.py --host 0.0.0.0 --port 8080 --snapshots-dir "$SNAPSHOTS_DIR" & +WEB_SERVER_PID=$! + +log_message "Web server started with PID: $WEB_SERVER_PID" +log_message "Web interface available at http://localhost:8080" + +# Function to handle shutdown +shutdown() { + log_message "=== Shutdown Signal Received ===" + + if [ ! -z "$WEB_SERVER_PID" ]; then + log_message "Stopping web server (PID: $WEB_SERVER_PID)..." + kill "$WEB_SERVER_PID" 2>/dev/null || true + wait "$WEB_SERVER_PID" 2>/dev/null || true + log_message "Web server stopped" + fi + + log_message "Stopping cron daemon..." + pkill cron 2>/dev/null || true + + log_message "=== ParentZone Downloader Shutdown Complete ===" + exit 0 +} + +# Set up signal handlers +trap shutdown SIGTERM SIGINT + +log_message "=== ParentZone Downloader Started Successfully ===" +log_message "Services running:" +log_message " - Cron daemon (scheduled downloads)" +log_message " - Web server at http://0.0.0.0:8080" +log_message " - Log files in: $LOG_DIR" +log_message " - Snapshots in: $SNAPSHOTS_DIR" + +# Keep the container running and follow cron logs +tail -f /var/log/cron.log & +TAIL_PID=$! + +# Wait for any process to exit +wait $WEB_SERVER_PID $TAIL_PID diff --git a/webserver.py b/webserver.py new file mode 100644 index 0000000..7696a55 --- /dev/null +++ b/webserver.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +""" +ParentZone Snapshots Web Server + +A simple web server that serves HTML snapshot files and their assets. +Provides a directory listing and serves static files from the snapshots folder. +""" + +import os +import asyncio +import argparse +import logging +from pathlib import Path +from urllib.parse import unquote +from datetime import datetime + +import aiohttp +from aiohttp import web, hdrs +from aiohttp.web_response import Response + + +class SnapshotsWebServer: + def __init__( + self, + snapshots_dir: str = "./snapshots", + port: int = 8080, + host: str = "0.0.0.0", + ): + self.snapshots_dir = Path(snapshots_dir).resolve() + self.port = port + self.host = host + + # Setup logging + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + self.logger = logging.getLogger(__name__) + + # Ensure snapshots directory exists + self.snapshots_dir.mkdir(parents=True, exist_ok=True) + + self.logger.info(f"Serving snapshots from: {self.snapshots_dir}") + + async def index_handler(self, request): + """Serve the main directory listing page.""" + try: + html_files = [] + + # Find all HTML files in the snapshots directory + for file_path in self.snapshots_dir.glob("*.html"): + stat = file_path.stat() + html_files.append( + { + "name": file_path.name, + "size": stat.st_size, + "modified": datetime.fromtimestamp(stat.st_mtime), + "path": file_path.name, + } + ) + + # Sort by modification time (newest first) + html_files.sort(key=lambda x: x["modified"], reverse=True) + + # Generate HTML page + html_content = self._generate_index_html(html_files) + + return web.Response(text=html_content, content_type="text/html") + + except Exception as e: + self.logger.error(f"Error generating index: {e}") + return web.Response( + text=f"

Error

Could not generate directory listing: {e}

", + status=500, + content_type="text/html", + ) + + def _generate_index_html(self, html_files): + """Generate the HTML directory listing page.""" + files_list = "" + + if not html_files: + files_list = "

No snapshot files found.

" + else: + for file_info in html_files: + size_mb = file_info["size"] / (1024 * 1024) + files_list += f""" +
+
+

{file_info["name"]}

+
+ {size_mb:.2f} MB + {file_info["modified"].strftime("%Y-%m-%d %H:%M:%S")} +
+
+
+ """ + + return f""" + + + + + + ParentZone Snapshots + + + +
+
+

šŸ“ø ParentZone Snapshots

+

Browse and view your downloaded snapshot files

+
+ +
+
+ šŸ“ Available Snapshot Files ({len(html_files)} files) +
+ {files_list} +
+ + +
+ + +""" + + async def file_handler(self, request): + """Serve individual HTML files and their assets.""" + try: + # Get the requested file path + file_path = unquote(request.match_info["filename"]) + requested_file = self.snapshots_dir / file_path + + # Security check: ensure the file is within the snapshots directory + try: + requested_file.resolve().relative_to(self.snapshots_dir.resolve()) + except ValueError: + self.logger.warning(f"Attempted path traversal: {file_path}") + return web.Response( + text="

403 Forbidden

Access denied.

", + status=403, + content_type="text/html", + ) + + # Check if file exists + if not requested_file.exists(): + return web.Response( + text="

404 Not Found

The requested file was not found.

", + status=404, + content_type="text/html", + ) + + # Determine content type + content_type = self._get_content_type(requested_file) + + # Read and serve the file + with open(requested_file, "rb") as f: + content = f.read() + + return web.Response( + body=content, + content_type=content_type, + headers={ + "Cache-Control": "public, max-age=3600", + "Last-Modified": datetime.fromtimestamp( + requested_file.stat().st_mtime + ).strftime("%a, %d %b %Y %H:%M:%S GMT"), + }, + ) + + except Exception as e: + self.logger.error( + f"Error serving file {request.match_info.get('filename', 'unknown')}: {e}" + ) + return web.Response( + text=f"

500 Internal Server Error

Could not serve file: {e}

", + status=500, + content_type="text/html", + ) + + async def assets_handler(self, request): + """Serve asset files (images, CSS, JS, etc.) from assets subdirectories.""" + try: + # Get the requested asset path + asset_path = unquote(request.match_info["path"]) + requested_file = self.snapshots_dir / "assets" / asset_path + + # Security check + try: + requested_file.resolve().relative_to(self.snapshots_dir.resolve()) + except ValueError: + self.logger.warning(f"Attempted path traversal in assets: {asset_path}") + return web.Response(text="403 Forbidden", status=403) + + # Check if file exists + if not requested_file.exists(): + return web.Response(text="404 Not Found", status=404) + + # Determine content type + content_type = self._get_content_type(requested_file) + + # Read and serve the file + with open(requested_file, "rb") as f: + content = f.read() + + return web.Response( + body=content, + content_type=content_type, + headers={ + "Cache-Control": "public, max-age=86400", # Cache assets for 24 hours + "Last-Modified": datetime.fromtimestamp( + requested_file.stat().st_mtime + ).strftime("%a, %d %b %Y %H:%M:%S GMT"), + }, + ) + + except Exception as e: + self.logger.error( + f"Error serving asset {request.match_info.get('path', 'unknown')}: {e}" + ) + return web.Response(text="500 Internal Server Error", status=500) + + def _get_content_type(self, file_path: Path) -> str: + """Determine the content type based on file extension.""" + suffix = file_path.suffix.lower() + + content_types = { + ".html": "text/html; charset=utf-8", + ".css": "text/css; charset=utf-8", + ".js": "application/javascript; charset=utf-8", + ".json": "application/json; charset=utf-8", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + ".ico": "image/x-icon", + ".pdf": "application/pdf", + ".txt": "text/plain; charset=utf-8", + ".log": "text/plain; charset=utf-8", + } + + return content_types.get(suffix, "application/octet-stream") + + def setup_routes(self, app): + """Configure the web application routes.""" + # Main index page + app.router.add_get("/", self.index_handler) + + # Serve HTML files directly + app.router.add_get("/{filename:.+\.html}", self.file_handler) + + # Serve assets (images, CSS, JS, etc.) + app.router.add_get("/assets/{path:.+}", self.assets_handler) + + # Serve other static files (logs, etc.) + app.router.add_get( + "/{filename:.+\.(css|js|json|txt|log|ico)}", self.file_handler + ) + + async def create_app(self): + """Create and configure the web application.""" + app = web.Application() + + # Setup routes + self.setup_routes(app) + + # Add middleware for logging + async def logging_middleware(request, handler): + start_time = datetime.now() + + try: + response = await handler(request) + + # Log the request + duration = (datetime.now() - start_time).total_seconds() + self.logger.info( + f"{request.remote} - {request.method} {request.path} - " + f"{response.status} - {duration:.3f}s" + ) + + return response + + except Exception as e: + duration = (datetime.now() - start_time).total_seconds() + self.logger.error( + f"{request.remote} - {request.method} {request.path} - " + f"ERROR: {e} - {duration:.3f}s" + ) + raise + + app.middlewares.append(logging_middleware) + + return app + + async def start_server(self): + """Start the web server.""" + app = await self.create_app() + + runner = web.AppRunner(app) + await runner.setup() + + site = web.TCPSite(runner, self.host, self.port) + await site.start() + + self.logger.info(f"šŸš€ ParentZone Snapshots Web Server started!") + self.logger.info(f"šŸ“‚ Serving files from: {self.snapshots_dir}") + self.logger.info(f"🌐 Server running at: http://{self.host}:{self.port}") + self.logger.info(f"šŸ”— Open in browser: http://localhost:{self.port}") + self.logger.info("Press Ctrl+C to stop the server") + + return runner + + +def main(): + parser = argparse.ArgumentParser( + description="ParentZone Snapshots Web Server", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Start server with default settings + python webserver.py + + # Start server on custom port + python webserver.py --port 3000 + + # Serve from custom directory + python webserver.py --snapshots-dir /path/to/snapshots + + # Start server on all interfaces + python webserver.py --host 0.0.0.0 --port 8080 + """, + ) + + parser.add_argument( + "--snapshots-dir", + default="./snapshots", + help="Directory containing snapshot files (default: ./snapshots)", + ) + + parser.add_argument( + "--port", + type=int, + default=8080, + help="Port to run the server on (default: 8080)", + ) + + parser.add_argument( + "--host", + default="0.0.0.0", + help="Host to bind the server to (default: 0.0.0.0)", + ) + + args = parser.parse_args() + + # Create and start the server + server = SnapshotsWebServer( + snapshots_dir=args.snapshots_dir, port=args.port, host=args.host + ) + + async def run_server(): + runner = None + try: + runner = await server.start_server() + + # Keep the server running + while True: + await asyncio.sleep(1) + + except KeyboardInterrupt: + print("\nšŸ‘‹ Shutting down server...") + except Exception as e: + print(f"āŒ Server error: {e}") + finally: + if runner: + await runner.cleanup() + + try: + asyncio.run(run_server()) + except KeyboardInterrupt: + print("\nāœ… Server stopped") + + +if __name__ == "__main__": + main()