""" Monitoring and Profiling Router Provides endpoints for: - Browser performance profiling - Real-time crawler statistics - System resource monitoring - Session management """ from fastapi import APIRouter, HTTPException, BackgroundTasks, Query from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from typing import Dict, List, Optional, Any, AsyncGenerator from datetime import datetime, timedelta import uuid import asyncio import json import time import psutil import logging from collections import defaultdict logger = logging.getLogger(__name__) router = APIRouter( prefix="/monitoring", tags=["Monitoring & Profiling"], responses={ 404: {"description": "Session not found"}, 500: {"description": "Internal server error"} } ) # ============================================================================ # Data Structures # ============================================================================ # In-memory storage for profiling sessions PROFILING_SESSIONS: Dict[str, Dict[str, Any]] = {} # Real-time crawler statistics CRAWLER_STATS = { "active_crawls": 0, "total_crawls": 0, "successful_crawls": 0, "failed_crawls": 0, "total_bytes_processed": 0, "average_response_time_ms": 0.0, "last_updated": datetime.now().isoformat(), } # Per-URL statistics URL_STATS: Dict[str, Dict[str, Any]] = defaultdict(lambda: { "total_requests": 0, "success_count": 0, "failure_count": 0, "average_time_ms": 0.0, "last_accessed": None, }) # ============================================================================ # Pydantic Models # ============================================================================ class ProfilingStartRequest(BaseModel): """Request to start a profiling session.""" url: str = Field(..., description="URL to profile") browser_config: Optional[Dict[str, Any]] = Field( default_factory=dict, description="Browser configuration" ) crawler_config: Optional[Dict[str, Any]] = Field( default_factory=dict, description="Crawler configuration" ) profile_duration: Optional[int] = Field( default=30, ge=5, le=300, description="Maximum profiling duration in seconds" ) collect_network: bool = Field( default=True, description="Collect network performance data" ) collect_memory: bool = Field( default=True, description="Collect memory usage data" ) collect_cpu: bool = Field( default=True, description="Collect CPU usage data" ) class Config: schema_extra = { "example": { "url": "https://example.com", "profile_duration": 30, "collect_network": True, "collect_memory": True, "collect_cpu": True } } class ProfilingSession(BaseModel): """Profiling session information.""" session_id: str = Field(..., description="Unique session identifier") status: str = Field(..., description="Session status: running, completed, failed") url: str = Field(..., description="URL being profiled") start_time: str = Field(..., description="Session start time (ISO format)") end_time: Optional[str] = Field(None, description="Session end time (ISO format)") duration_seconds: Optional[float] = Field(None, description="Total duration in seconds") results: Optional[Dict[str, Any]] = Field(None, description="Profiling results") error: Optional[str] = Field(None, description="Error message if failed") class Config: schema_extra = { "example": { "session_id": "abc123", "status": "completed", "url": "https://example.com", "start_time": "2025-10-16T10:30:00", "end_time": "2025-10-16T10:30:30", "duration_seconds": 30.5, "results": { "performance": { "page_load_time_ms": 1234, "dom_content_loaded_ms": 890, "first_paint_ms": 567 } } } } class CrawlerStats(BaseModel): """Current crawler statistics.""" active_crawls: int = Field(..., description="Number of currently active crawls") total_crawls: int = Field(..., description="Total crawls since server start") successful_crawls: int = Field(..., description="Number of successful crawls") failed_crawls: int = Field(..., description="Number of failed crawls") success_rate: float = Field(..., description="Success rate percentage") total_bytes_processed: int = Field(..., description="Total bytes processed") average_response_time_ms: float = Field(..., description="Average response time") uptime_seconds: float = Field(..., description="Server uptime in seconds") memory_usage_mb: float = Field(..., description="Current memory usage in MB") cpu_percent: float = Field(..., description="Current CPU usage percentage") last_updated: str = Field(..., description="Last update timestamp") class URLStatistics(BaseModel): """Statistics for a specific URL pattern.""" url_pattern: str total_requests: int success_count: int failure_count: int success_rate: float average_time_ms: float last_accessed: Optional[str] class SessionListResponse(BaseModel): """List of profiling sessions.""" total: int sessions: List[ProfilingSession] # ============================================================================ # Helper Functions # ============================================================================ def get_system_stats() -> Dict[str, Any]: """Get current system resource usage.""" try: process = psutil.Process() return { "memory_usage_mb": process.memory_info().rss / 1024 / 1024, "cpu_percent": process.cpu_percent(interval=0.1), "num_threads": process.num_threads(), "open_files": len(process.open_files()), "connections": len(process.connections()), } except Exception as e: logger.error(f"Error getting system stats: {e}") return { "memory_usage_mb": 0.0, "cpu_percent": 0.0, "num_threads": 0, "open_files": 0, "connections": 0, } def cleanup_old_sessions(max_age_hours: int = 24): """Remove old profiling sessions to prevent memory leaks.""" cutoff = datetime.now() - timedelta(hours=max_age_hours) to_remove = [] for session_id, session in PROFILING_SESSIONS.items(): try: start_time = datetime.fromisoformat(session["start_time"]) if start_time < cutoff: to_remove.append(session_id) except (ValueError, KeyError): continue for session_id in to_remove: del PROFILING_SESSIONS[session_id] logger.info(f"Cleaned up old session: {session_id}") return len(to_remove) # ============================================================================ # Profiling Endpoints # ============================================================================ @router.post( "/profile/start", response_model=ProfilingSession, summary="Start profiling session", description="Start a new browser profiling session for performance analysis" ) async def start_profiling_session( request: ProfilingStartRequest, background_tasks: BackgroundTasks ): """ Start a new profiling session. Returns a session ID that can be used to retrieve results later. The profiling runs in the background and collects: - Page load performance metrics - Network requests and timing - Memory usage patterns - CPU utilization - Browser-specific metrics """ session_id = str(uuid.uuid4()) start_time = datetime.now() session_data = { "session_id": session_id, "status": "running", "url": request.url, "start_time": start_time.isoformat(), "end_time": None, "duration_seconds": None, "results": None, "error": None, "config": { "profile_duration": request.profile_duration, "collect_network": request.collect_network, "collect_memory": request.collect_memory, "collect_cpu": request.collect_cpu, } } PROFILING_SESSIONS[session_id] = session_data # Add background task to run profiling background_tasks.add_task( run_profiling_session, session_id, request ) logger.info(f"Started profiling session {session_id} for {request.url}") return ProfilingSession(**session_data) @router.get( "/profile/{session_id}", response_model=ProfilingSession, summary="Get profiling results", description="Retrieve results from a profiling session" ) async def get_profiling_results(session_id: str): """ Get profiling session results. Returns the current status and results of a profiling session. If the session is still running, results will be None. """ if session_id not in PROFILING_SESSIONS: raise HTTPException( status_code=404, detail=f"Profiling session '{session_id}' not found" ) session = PROFILING_SESSIONS[session_id] return ProfilingSession(**session) @router.get( "/profile", response_model=SessionListResponse, summary="List profiling sessions", description="List all profiling sessions with optional filtering" ) async def list_profiling_sessions( status: Optional[str] = Query(None, description="Filter by status: running, completed, failed"), limit: int = Query(50, ge=1, le=500, description="Maximum number of sessions to return") ): """ List all profiling sessions. Can be filtered by status and limited in number. """ sessions = list(PROFILING_SESSIONS.values()) # Filter by status if provided if status: sessions = [s for s in sessions if s["status"] == status] # Sort by start time (newest first) sessions.sort(key=lambda x: x["start_time"], reverse=True) # Limit results sessions = sessions[:limit] return SessionListResponse( total=len(sessions), sessions=[ProfilingSession(**s) for s in sessions] ) @router.delete( "/profile/{session_id}", summary="Delete profiling session", description="Delete a profiling session and its results" ) async def delete_profiling_session(session_id: str): """ Delete a profiling session. Removes the session and all associated data from memory. """ if session_id not in PROFILING_SESSIONS: raise HTTPException( status_code=404, detail=f"Profiling session '{session_id}' not found" ) session = PROFILING_SESSIONS.pop(session_id) logger.info(f"Deleted profiling session {session_id}") return { "success": True, "message": f"Session {session_id} deleted", "session": ProfilingSession(**session) } @router.post( "/profile/cleanup", summary="Cleanup old sessions", description="Remove old profiling sessions to free memory" ) async def cleanup_sessions( max_age_hours: int = Query(24, ge=1, le=168, description="Maximum age in hours") ): """ Cleanup old profiling sessions. Removes sessions older than the specified age. """ removed = cleanup_old_sessions(max_age_hours) return { "success": True, "removed_count": removed, "remaining_count": len(PROFILING_SESSIONS), "message": f"Removed {removed} sessions older than {max_age_hours} hours" } # ============================================================================ # Statistics Endpoints # ============================================================================ @router.get( "/stats", response_model=CrawlerStats, summary="Get crawler statistics", description="Get current crawler statistics and system metrics" ) async def get_crawler_stats(): """ Get current crawler statistics. Returns real-time metrics about: - Active and total crawls - Success/failure rates - Response times - System resource usage """ system_stats = get_system_stats() total = CRAWLER_STATS["successful_crawls"] + CRAWLER_STATS["failed_crawls"] success_rate = ( (CRAWLER_STATS["successful_crawls"] / total * 100) if total > 0 else 0.0 ) # Calculate uptime # In a real implementation, you'd track server start time uptime_seconds = 0.0 # Placeholder stats = CrawlerStats( active_crawls=CRAWLER_STATS["active_crawls"], total_crawls=CRAWLER_STATS["total_crawls"], successful_crawls=CRAWLER_STATS["successful_crawls"], failed_crawls=CRAWLER_STATS["failed_crawls"], success_rate=success_rate, total_bytes_processed=CRAWLER_STATS["total_bytes_processed"], average_response_time_ms=CRAWLER_STATS["average_response_time_ms"], uptime_seconds=uptime_seconds, memory_usage_mb=system_stats["memory_usage_mb"], cpu_percent=system_stats["cpu_percent"], last_updated=datetime.now().isoformat() ) return stats @router.get( "/stats/stream", summary="Stream crawler statistics", description="Server-Sent Events stream of real-time crawler statistics" ) async def stream_crawler_stats( interval: int = Query(2, ge=1, le=60, description="Update interval in seconds") ): """ Stream real-time crawler statistics. Returns an SSE (Server-Sent Events) stream that pushes statistics updates at the specified interval. Example: ```javascript const eventSource = new EventSource('/monitoring/stats/stream?interval=2'); eventSource.onmessage = (event) => { const stats = JSON.parse(event.data); console.log('Stats:', stats); }; ``` """ async def generate_stats() -> AsyncGenerator[str, None]: """Generate stats stream.""" try: while True: # Get current stats stats = await get_crawler_stats() # Format as SSE data = json.dumps(stats.dict()) yield f"data: {data}\n\n" # Wait for next interval await asyncio.sleep(interval) except asyncio.CancelledError: logger.info("Stats stream cancelled by client") except Exception as e: logger.error(f"Error in stats stream: {e}") yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n" return StreamingResponse( generate_stats(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", } ) @router.get( "/stats/urls", response_model=List[URLStatistics], summary="Get URL statistics", description="Get statistics for crawled URLs" ) async def get_url_statistics( limit: int = Query(100, ge=1, le=1000, description="Maximum number of URLs to return"), sort_by: str = Query("total_requests", description="Sort field: total_requests, success_rate, average_time_ms") ): """ Get statistics for crawled URLs. Returns metrics for each URL that has been crawled, including request counts, success rates, and timing. """ stats_list = [] for url, stats in URL_STATS.items(): total = stats["total_requests"] success_rate = (stats["success_count"] / total * 100) if total > 0 else 0.0 stats_list.append(URLStatistics( url_pattern=url, total_requests=stats["total_requests"], success_count=stats["success_count"], failure_count=stats["failure_count"], success_rate=success_rate, average_time_ms=stats["average_time_ms"], last_accessed=stats["last_accessed"] )) # Sort if sort_by == "success_rate": stats_list.sort(key=lambda x: x.success_rate, reverse=True) elif sort_by == "average_time_ms": stats_list.sort(key=lambda x: x.average_time_ms) else: # total_requests stats_list.sort(key=lambda x: x.total_requests, reverse=True) return stats_list[:limit] @router.post( "/stats/reset", summary="Reset statistics", description="Reset all crawler statistics to zero" ) async def reset_statistics(): """ Reset all statistics. Clears all accumulated statistics but keeps the server running. Useful for testing or starting fresh measurements. """ global CRAWLER_STATS, URL_STATS CRAWLER_STATS = { "active_crawls": 0, "total_crawls": 0, "successful_crawls": 0, "failed_crawls": 0, "total_bytes_processed": 0, "average_response_time_ms": 0.0, "last_updated": datetime.now().isoformat(), } URL_STATS.clear() logger.info("All statistics reset") return { "success": True, "message": "All statistics have been reset", "timestamp": datetime.now().isoformat() } # ============================================================================ # Background Tasks # ============================================================================ async def run_profiling_session(session_id: str, request: ProfilingStartRequest): """ Background task to run profiling session. This performs the actual profiling work: 1. Creates a crawler with profiling enabled 2. Crawls the target URL 3. Collects performance metrics 4. Stores results in the session """ start_time = time.time() try: from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig from crawl4ai.browser_profiler import BrowserProfiler logger.info(f"Starting profiling for session {session_id}") # Create profiler profiler = BrowserProfiler() # Configure browser and crawler browser_config = BrowserConfig.load(request.browser_config) crawler_config = CrawlerRunConfig.load(request.crawler_config) # Enable profiling options browser_config.profiling_enabled = True results = {} async with AsyncWebCrawler(config=browser_config) as crawler: # Start profiling profiler.start() # Collect system stats before stats_before = get_system_stats() # Crawl with timeout try: result = await asyncio.wait_for( crawler.arun(request.url, config=crawler_config), timeout=request.profile_duration ) crawl_success = result.success except asyncio.TimeoutError: logger.warning(f"Profiling session {session_id} timed out") crawl_success = False result = None # Stop profiling profiler_results = profiler.stop() # Collect system stats after stats_after = get_system_stats() # Build results results = { "crawl_success": crawl_success, "url": request.url, "performance": profiler_results if profiler_results else {}, "system": { "before": stats_before, "after": stats_after, "delta": { "memory_mb": stats_after["memory_usage_mb"] - stats_before["memory_usage_mb"], "cpu_percent": stats_after["cpu_percent"] - stats_before["cpu_percent"], } } } if result: results["content"] = { "markdown_length": len(result.markdown) if result.markdown else 0, "html_length": len(result.html) if result.html else 0, "links_count": len(result.links["internal"]) + len(result.links["external"]), "media_count": len(result.media["images"]) + len(result.media["videos"]), } # Update session with results end_time = time.time() duration = end_time - start_time PROFILING_SESSIONS[session_id].update({ "status": "completed", "end_time": datetime.now().isoformat(), "duration_seconds": duration, "results": results }) logger.info(f"Profiling session {session_id} completed in {duration:.2f}s") except Exception as e: logger.error(f"Profiling session {session_id} failed: {str(e)}") PROFILING_SESSIONS[session_id].update({ "status": "failed", "end_time": datetime.now().isoformat(), "duration_seconds": time.time() - start_time, "error": str(e) }) # ============================================================================ # Middleware Integration Points # ============================================================================ def track_crawl_start(): """Call this when a crawl starts.""" CRAWLER_STATS["active_crawls"] += 1 CRAWLER_STATS["total_crawls"] += 1 CRAWLER_STATS["last_updated"] = datetime.now().isoformat() def track_crawl_end(url: str, success: bool, duration_ms: float, bytes_processed: int = 0): """Call this when a crawl ends.""" CRAWLER_STATS["active_crawls"] = max(0, CRAWLER_STATS["active_crawls"] - 1) if success: CRAWLER_STATS["successful_crawls"] += 1 else: CRAWLER_STATS["failed_crawls"] += 1 CRAWLER_STATS["total_bytes_processed"] += bytes_processed # Update average response time (running average) total = CRAWLER_STATS["successful_crawls"] + CRAWLER_STATS["failed_crawls"] current_avg = CRAWLER_STATS["average_response_time_ms"] CRAWLER_STATS["average_response_time_ms"] = ( (current_avg * (total - 1) + duration_ms) / total ) # Update URL stats url_stat = URL_STATS[url] url_stat["total_requests"] += 1 if success: url_stat["success_count"] += 1 else: url_stat["failure_count"] += 1 # Update average time for this URL total_url = url_stat["total_requests"] current_avg_url = url_stat["average_time_ms"] url_stat["average_time_ms"] = ( (current_avg_url * (total_url - 1) + duration_ms) / total_url ) url_stat["last_accessed"] = datetime.now().isoformat() CRAWLER_STATS["last_updated"] = datetime.now().isoformat() # ============================================================================ # Health Check # ============================================================================ @router.get( "/health", summary="Health check", description="Check if monitoring system is operational" ) async def health_check(): """ Health check endpoint. Returns status of the monitoring system. """ system_stats = get_system_stats() return { "status": "healthy", "timestamp": datetime.now().isoformat(), "active_sessions": len([s for s in PROFILING_SESSIONS.values() if s["status"] == "running"]), "total_sessions": len(PROFILING_SESSIONS), "system": system_stats }