- Implemented an interactive monitoring dashboard in `demo_monitoring_dashboard.py` for real-time statistics, profiling session management, and system resource monitoring. - Created a quick test script `test_monitoring_quick.py` to verify the functionality of monitoring endpoints. - Developed comprehensive integration tests in `test_monitoring_endpoints.py` covering health checks, statistics, profiling sessions, and real-time streaming. - Added error handling and user-friendly output for better usability in the dashboard.
747 lines
24 KiB
Python
747 lines
24 KiB
Python
"""
|
|
Monitoring and Profiling Router
|
|
|
|
Provides endpoints for:
|
|
- Browser performance profiling
|
|
- Real-time crawler statistics
|
|
- System resource monitoring
|
|
- Session management
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel, Field
|
|
from typing import Dict, List, Optional, Any, AsyncGenerator
|
|
from datetime import datetime, timedelta
|
|
import uuid
|
|
import asyncio
|
|
import json
|
|
import time
|
|
import psutil
|
|
import logging
|
|
from collections import defaultdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(
|
|
prefix="/monitoring",
|
|
tags=["Monitoring & Profiling"],
|
|
responses={
|
|
404: {"description": "Session not found"},
|
|
500: {"description": "Internal server error"}
|
|
}
|
|
)
|
|
|
|
# ============================================================================
|
|
# Data Structures
|
|
# ============================================================================
|
|
|
|
# In-memory storage for profiling sessions
|
|
PROFILING_SESSIONS: Dict[str, Dict[str, Any]] = {}
|
|
|
|
# Real-time crawler statistics
|
|
CRAWLER_STATS = {
|
|
"active_crawls": 0,
|
|
"total_crawls": 0,
|
|
"successful_crawls": 0,
|
|
"failed_crawls": 0,
|
|
"total_bytes_processed": 0,
|
|
"average_response_time_ms": 0.0,
|
|
"last_updated": datetime.now().isoformat(),
|
|
}
|
|
|
|
# Per-URL statistics
|
|
URL_STATS: Dict[str, Dict[str, Any]] = defaultdict(lambda: {
|
|
"total_requests": 0,
|
|
"success_count": 0,
|
|
"failure_count": 0,
|
|
"average_time_ms": 0.0,
|
|
"last_accessed": None,
|
|
})
|
|
|
|
|
|
# ============================================================================
|
|
# Pydantic Models
|
|
# ============================================================================
|
|
|
|
class ProfilingStartRequest(BaseModel):
|
|
"""Request to start a profiling session."""
|
|
url: str = Field(..., description="URL to profile")
|
|
browser_config: Optional[Dict[str, Any]] = Field(
|
|
default_factory=dict,
|
|
description="Browser configuration"
|
|
)
|
|
crawler_config: Optional[Dict[str, Any]] = Field(
|
|
default_factory=dict,
|
|
description="Crawler configuration"
|
|
)
|
|
profile_duration: Optional[int] = Field(
|
|
default=30,
|
|
ge=5,
|
|
le=300,
|
|
description="Maximum profiling duration in seconds"
|
|
)
|
|
collect_network: bool = Field(
|
|
default=True,
|
|
description="Collect network performance data"
|
|
)
|
|
collect_memory: bool = Field(
|
|
default=True,
|
|
description="Collect memory usage data"
|
|
)
|
|
collect_cpu: bool = Field(
|
|
default=True,
|
|
description="Collect CPU usage data"
|
|
)
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"url": "https://example.com",
|
|
"profile_duration": 30,
|
|
"collect_network": True,
|
|
"collect_memory": True,
|
|
"collect_cpu": True
|
|
}
|
|
}
|
|
|
|
|
|
class ProfilingSession(BaseModel):
|
|
"""Profiling session information."""
|
|
session_id: str = Field(..., description="Unique session identifier")
|
|
status: str = Field(..., description="Session status: running, completed, failed")
|
|
url: str = Field(..., description="URL being profiled")
|
|
start_time: str = Field(..., description="Session start time (ISO format)")
|
|
end_time: Optional[str] = Field(None, description="Session end time (ISO format)")
|
|
duration_seconds: Optional[float] = Field(None, description="Total duration in seconds")
|
|
results: Optional[Dict[str, Any]] = Field(None, description="Profiling results")
|
|
error: Optional[str] = Field(None, description="Error message if failed")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"session_id": "abc123",
|
|
"status": "completed",
|
|
"url": "https://example.com",
|
|
"start_time": "2025-10-16T10:30:00",
|
|
"end_time": "2025-10-16T10:30:30",
|
|
"duration_seconds": 30.5,
|
|
"results": {
|
|
"performance": {
|
|
"page_load_time_ms": 1234,
|
|
"dom_content_loaded_ms": 890,
|
|
"first_paint_ms": 567
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class CrawlerStats(BaseModel):
|
|
"""Current crawler statistics."""
|
|
active_crawls: int = Field(..., description="Number of currently active crawls")
|
|
total_crawls: int = Field(..., description="Total crawls since server start")
|
|
successful_crawls: int = Field(..., description="Number of successful crawls")
|
|
failed_crawls: int = Field(..., description="Number of failed crawls")
|
|
success_rate: float = Field(..., description="Success rate percentage")
|
|
total_bytes_processed: int = Field(..., description="Total bytes processed")
|
|
average_response_time_ms: float = Field(..., description="Average response time")
|
|
uptime_seconds: float = Field(..., description="Server uptime in seconds")
|
|
memory_usage_mb: float = Field(..., description="Current memory usage in MB")
|
|
cpu_percent: float = Field(..., description="Current CPU usage percentage")
|
|
last_updated: str = Field(..., description="Last update timestamp")
|
|
|
|
|
|
class URLStatistics(BaseModel):
|
|
"""Statistics for a specific URL pattern."""
|
|
url_pattern: str
|
|
total_requests: int
|
|
success_count: int
|
|
failure_count: int
|
|
success_rate: float
|
|
average_time_ms: float
|
|
last_accessed: Optional[str]
|
|
|
|
|
|
class SessionListResponse(BaseModel):
|
|
"""List of profiling sessions."""
|
|
total: int
|
|
sessions: List[ProfilingSession]
|
|
|
|
|
|
# ============================================================================
|
|
# Helper Functions
|
|
# ============================================================================
|
|
|
|
def get_system_stats() -> Dict[str, Any]:
|
|
"""Get current system resource usage."""
|
|
try:
|
|
process = psutil.Process()
|
|
|
|
return {
|
|
"memory_usage_mb": process.memory_info().rss / 1024 / 1024,
|
|
"cpu_percent": process.cpu_percent(interval=0.1),
|
|
"num_threads": process.num_threads(),
|
|
"open_files": len(process.open_files()),
|
|
"connections": len(process.connections()),
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting system stats: {e}")
|
|
return {
|
|
"memory_usage_mb": 0.0,
|
|
"cpu_percent": 0.0,
|
|
"num_threads": 0,
|
|
"open_files": 0,
|
|
"connections": 0,
|
|
}
|
|
|
|
|
|
def cleanup_old_sessions(max_age_hours: int = 24):
|
|
"""Remove old profiling sessions to prevent memory leaks."""
|
|
cutoff = datetime.now() - timedelta(hours=max_age_hours)
|
|
|
|
to_remove = []
|
|
for session_id, session in PROFILING_SESSIONS.items():
|
|
try:
|
|
start_time = datetime.fromisoformat(session["start_time"])
|
|
if start_time < cutoff:
|
|
to_remove.append(session_id)
|
|
except (ValueError, KeyError):
|
|
continue
|
|
|
|
for session_id in to_remove:
|
|
del PROFILING_SESSIONS[session_id]
|
|
logger.info(f"Cleaned up old session: {session_id}")
|
|
|
|
return len(to_remove)
|
|
|
|
|
|
# ============================================================================
|
|
# Profiling Endpoints
|
|
# ============================================================================
|
|
|
|
@router.post(
|
|
"/profile/start",
|
|
response_model=ProfilingSession,
|
|
summary="Start profiling session",
|
|
description="Start a new browser profiling session for performance analysis"
|
|
)
|
|
async def start_profiling_session(
|
|
request: ProfilingStartRequest,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Start a new profiling session.
|
|
|
|
Returns a session ID that can be used to retrieve results later.
|
|
The profiling runs in the background and collects:
|
|
- Page load performance metrics
|
|
- Network requests and timing
|
|
- Memory usage patterns
|
|
- CPU utilization
|
|
- Browser-specific metrics
|
|
"""
|
|
session_id = str(uuid.uuid4())
|
|
start_time = datetime.now()
|
|
|
|
session_data = {
|
|
"session_id": session_id,
|
|
"status": "running",
|
|
"url": request.url,
|
|
"start_time": start_time.isoformat(),
|
|
"end_time": None,
|
|
"duration_seconds": None,
|
|
"results": None,
|
|
"error": None,
|
|
"config": {
|
|
"profile_duration": request.profile_duration,
|
|
"collect_network": request.collect_network,
|
|
"collect_memory": request.collect_memory,
|
|
"collect_cpu": request.collect_cpu,
|
|
}
|
|
}
|
|
|
|
PROFILING_SESSIONS[session_id] = session_data
|
|
|
|
# Add background task to run profiling
|
|
background_tasks.add_task(
|
|
run_profiling_session,
|
|
session_id,
|
|
request
|
|
)
|
|
|
|
logger.info(f"Started profiling session {session_id} for {request.url}")
|
|
|
|
return ProfilingSession(**session_data)
|
|
|
|
|
|
@router.get(
|
|
"/profile/{session_id}",
|
|
response_model=ProfilingSession,
|
|
summary="Get profiling results",
|
|
description="Retrieve results from a profiling session"
|
|
)
|
|
async def get_profiling_results(session_id: str):
|
|
"""
|
|
Get profiling session results.
|
|
|
|
Returns the current status and results of a profiling session.
|
|
If the session is still running, results will be None.
|
|
"""
|
|
if session_id not in PROFILING_SESSIONS:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Profiling session '{session_id}' not found"
|
|
)
|
|
|
|
session = PROFILING_SESSIONS[session_id]
|
|
return ProfilingSession(**session)
|
|
|
|
|
|
@router.get(
|
|
"/profile",
|
|
response_model=SessionListResponse,
|
|
summary="List profiling sessions",
|
|
description="List all profiling sessions with optional filtering"
|
|
)
|
|
async def list_profiling_sessions(
|
|
status: Optional[str] = Query(None, description="Filter by status: running, completed, failed"),
|
|
limit: int = Query(50, ge=1, le=500, description="Maximum number of sessions to return")
|
|
):
|
|
"""
|
|
List all profiling sessions.
|
|
|
|
Can be filtered by status and limited in number.
|
|
"""
|
|
sessions = list(PROFILING_SESSIONS.values())
|
|
|
|
# Filter by status if provided
|
|
if status:
|
|
sessions = [s for s in sessions if s["status"] == status]
|
|
|
|
# Sort by start time (newest first)
|
|
sessions.sort(key=lambda x: x["start_time"], reverse=True)
|
|
|
|
# Limit results
|
|
sessions = sessions[:limit]
|
|
|
|
return SessionListResponse(
|
|
total=len(sessions),
|
|
sessions=[ProfilingSession(**s) for s in sessions]
|
|
)
|
|
|
|
|
|
@router.delete(
|
|
"/profile/{session_id}",
|
|
summary="Delete profiling session",
|
|
description="Delete a profiling session and its results"
|
|
)
|
|
async def delete_profiling_session(session_id: str):
|
|
"""
|
|
Delete a profiling session.
|
|
|
|
Removes the session and all associated data from memory.
|
|
"""
|
|
if session_id not in PROFILING_SESSIONS:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Profiling session '{session_id}' not found"
|
|
)
|
|
|
|
session = PROFILING_SESSIONS.pop(session_id)
|
|
logger.info(f"Deleted profiling session {session_id}")
|
|
|
|
return {
|
|
"success": True,
|
|
"message": f"Session {session_id} deleted",
|
|
"session": ProfilingSession(**session)
|
|
}
|
|
|
|
|
|
@router.post(
|
|
"/profile/cleanup",
|
|
summary="Cleanup old sessions",
|
|
description="Remove old profiling sessions to free memory"
|
|
)
|
|
async def cleanup_sessions(
|
|
max_age_hours: int = Query(24, ge=1, le=168, description="Maximum age in hours")
|
|
):
|
|
"""
|
|
Cleanup old profiling sessions.
|
|
|
|
Removes sessions older than the specified age.
|
|
"""
|
|
removed = cleanup_old_sessions(max_age_hours)
|
|
|
|
return {
|
|
"success": True,
|
|
"removed_count": removed,
|
|
"remaining_count": len(PROFILING_SESSIONS),
|
|
"message": f"Removed {removed} sessions older than {max_age_hours} hours"
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Statistics Endpoints
|
|
# ============================================================================
|
|
|
|
@router.get(
|
|
"/stats",
|
|
response_model=CrawlerStats,
|
|
summary="Get crawler statistics",
|
|
description="Get current crawler statistics and system metrics"
|
|
)
|
|
async def get_crawler_stats():
|
|
"""
|
|
Get current crawler statistics.
|
|
|
|
Returns real-time metrics about:
|
|
- Active and total crawls
|
|
- Success/failure rates
|
|
- Response times
|
|
- System resource usage
|
|
"""
|
|
system_stats = get_system_stats()
|
|
|
|
total = CRAWLER_STATS["successful_crawls"] + CRAWLER_STATS["failed_crawls"]
|
|
success_rate = (
|
|
(CRAWLER_STATS["successful_crawls"] / total * 100)
|
|
if total > 0 else 0.0
|
|
)
|
|
|
|
# Calculate uptime
|
|
# In a real implementation, you'd track server start time
|
|
uptime_seconds = 0.0 # Placeholder
|
|
|
|
stats = CrawlerStats(
|
|
active_crawls=CRAWLER_STATS["active_crawls"],
|
|
total_crawls=CRAWLER_STATS["total_crawls"],
|
|
successful_crawls=CRAWLER_STATS["successful_crawls"],
|
|
failed_crawls=CRAWLER_STATS["failed_crawls"],
|
|
success_rate=success_rate,
|
|
total_bytes_processed=CRAWLER_STATS["total_bytes_processed"],
|
|
average_response_time_ms=CRAWLER_STATS["average_response_time_ms"],
|
|
uptime_seconds=uptime_seconds,
|
|
memory_usage_mb=system_stats["memory_usage_mb"],
|
|
cpu_percent=system_stats["cpu_percent"],
|
|
last_updated=datetime.now().isoformat()
|
|
)
|
|
|
|
return stats
|
|
|
|
|
|
@router.get(
|
|
"/stats/stream",
|
|
summary="Stream crawler statistics",
|
|
description="Server-Sent Events stream of real-time crawler statistics"
|
|
)
|
|
async def stream_crawler_stats(
|
|
interval: int = Query(2, ge=1, le=60, description="Update interval in seconds")
|
|
):
|
|
"""
|
|
Stream real-time crawler statistics.
|
|
|
|
Returns an SSE (Server-Sent Events) stream that pushes
|
|
statistics updates at the specified interval.
|
|
|
|
Example:
|
|
```javascript
|
|
const eventSource = new EventSource('/monitoring/stats/stream?interval=2');
|
|
eventSource.onmessage = (event) => {
|
|
const stats = JSON.parse(event.data);
|
|
console.log('Stats:', stats);
|
|
};
|
|
```
|
|
"""
|
|
|
|
async def generate_stats() -> AsyncGenerator[str, None]:
|
|
"""Generate stats stream."""
|
|
try:
|
|
while True:
|
|
# Get current stats
|
|
stats = await get_crawler_stats()
|
|
|
|
# Format as SSE
|
|
data = json.dumps(stats.dict())
|
|
yield f"data: {data}\n\n"
|
|
|
|
# Wait for next interval
|
|
await asyncio.sleep(interval)
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info("Stats stream cancelled by client")
|
|
except Exception as e:
|
|
logger.error(f"Error in stats stream: {e}")
|
|
yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
|
|
|
|
return StreamingResponse(
|
|
generate_stats(),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no",
|
|
}
|
|
)
|
|
|
|
|
|
@router.get(
|
|
"/stats/urls",
|
|
response_model=List[URLStatistics],
|
|
summary="Get URL statistics",
|
|
description="Get statistics for crawled URLs"
|
|
)
|
|
async def get_url_statistics(
|
|
limit: int = Query(100, ge=1, le=1000, description="Maximum number of URLs to return"),
|
|
sort_by: str = Query("total_requests", description="Sort field: total_requests, success_rate, average_time_ms")
|
|
):
|
|
"""
|
|
Get statistics for crawled URLs.
|
|
|
|
Returns metrics for each URL that has been crawled,
|
|
including request counts, success rates, and timing.
|
|
"""
|
|
stats_list = []
|
|
|
|
for url, stats in URL_STATS.items():
|
|
total = stats["total_requests"]
|
|
success_rate = (stats["success_count"] / total * 100) if total > 0 else 0.0
|
|
|
|
stats_list.append(URLStatistics(
|
|
url_pattern=url,
|
|
total_requests=stats["total_requests"],
|
|
success_count=stats["success_count"],
|
|
failure_count=stats["failure_count"],
|
|
success_rate=success_rate,
|
|
average_time_ms=stats["average_time_ms"],
|
|
last_accessed=stats["last_accessed"]
|
|
))
|
|
|
|
# Sort
|
|
if sort_by == "success_rate":
|
|
stats_list.sort(key=lambda x: x.success_rate, reverse=True)
|
|
elif sort_by == "average_time_ms":
|
|
stats_list.sort(key=lambda x: x.average_time_ms)
|
|
else: # total_requests
|
|
stats_list.sort(key=lambda x: x.total_requests, reverse=True)
|
|
|
|
return stats_list[:limit]
|
|
|
|
|
|
@router.post(
|
|
"/stats/reset",
|
|
summary="Reset statistics",
|
|
description="Reset all crawler statistics to zero"
|
|
)
|
|
async def reset_statistics():
|
|
"""
|
|
Reset all statistics.
|
|
|
|
Clears all accumulated statistics but keeps the server running.
|
|
Useful for testing or starting fresh measurements.
|
|
"""
|
|
global CRAWLER_STATS, URL_STATS
|
|
|
|
CRAWLER_STATS = {
|
|
"active_crawls": 0,
|
|
"total_crawls": 0,
|
|
"successful_crawls": 0,
|
|
"failed_crawls": 0,
|
|
"total_bytes_processed": 0,
|
|
"average_response_time_ms": 0.0,
|
|
"last_updated": datetime.now().isoformat(),
|
|
}
|
|
|
|
URL_STATS.clear()
|
|
|
|
logger.info("All statistics reset")
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "All statistics have been reset",
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Background Tasks
|
|
# ============================================================================
|
|
|
|
async def run_profiling_session(session_id: str, request: ProfilingStartRequest):
|
|
"""
|
|
Background task to run profiling session.
|
|
|
|
This performs the actual profiling work:
|
|
1. Creates a crawler with profiling enabled
|
|
2. Crawls the target URL
|
|
3. Collects performance metrics
|
|
4. Stores results in the session
|
|
"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
|
|
from crawl4ai.browser_profiler import BrowserProfiler
|
|
|
|
logger.info(f"Starting profiling for session {session_id}")
|
|
|
|
# Create profiler
|
|
profiler = BrowserProfiler()
|
|
|
|
# Configure browser and crawler
|
|
browser_config = BrowserConfig.load(request.browser_config)
|
|
crawler_config = CrawlerRunConfig.load(request.crawler_config)
|
|
|
|
# Enable profiling options
|
|
browser_config.profiling_enabled = True
|
|
|
|
results = {}
|
|
|
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
# Start profiling
|
|
profiler.start()
|
|
|
|
# Collect system stats before
|
|
stats_before = get_system_stats()
|
|
|
|
# Crawl with timeout
|
|
try:
|
|
result = await asyncio.wait_for(
|
|
crawler.arun(request.url, config=crawler_config),
|
|
timeout=request.profile_duration
|
|
)
|
|
|
|
crawl_success = result.success
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"Profiling session {session_id} timed out")
|
|
crawl_success = False
|
|
result = None
|
|
|
|
# Stop profiling
|
|
profiler_results = profiler.stop()
|
|
|
|
# Collect system stats after
|
|
stats_after = get_system_stats()
|
|
|
|
# Build results
|
|
results = {
|
|
"crawl_success": crawl_success,
|
|
"url": request.url,
|
|
"performance": profiler_results if profiler_results else {},
|
|
"system": {
|
|
"before": stats_before,
|
|
"after": stats_after,
|
|
"delta": {
|
|
"memory_mb": stats_after["memory_usage_mb"] - stats_before["memory_usage_mb"],
|
|
"cpu_percent": stats_after["cpu_percent"] - stats_before["cpu_percent"],
|
|
}
|
|
}
|
|
}
|
|
|
|
if result:
|
|
results["content"] = {
|
|
"markdown_length": len(result.markdown) if result.markdown else 0,
|
|
"html_length": len(result.html) if result.html else 0,
|
|
"links_count": len(result.links["internal"]) + len(result.links["external"]),
|
|
"media_count": len(result.media["images"]) + len(result.media["videos"]),
|
|
}
|
|
|
|
# Update session with results
|
|
end_time = time.time()
|
|
duration = end_time - start_time
|
|
|
|
PROFILING_SESSIONS[session_id].update({
|
|
"status": "completed",
|
|
"end_time": datetime.now().isoformat(),
|
|
"duration_seconds": duration,
|
|
"results": results
|
|
})
|
|
|
|
logger.info(f"Profiling session {session_id} completed in {duration:.2f}s")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Profiling session {session_id} failed: {str(e)}")
|
|
|
|
PROFILING_SESSIONS[session_id].update({
|
|
"status": "failed",
|
|
"end_time": datetime.now().isoformat(),
|
|
"duration_seconds": time.time() - start_time,
|
|
"error": str(e)
|
|
})
|
|
|
|
|
|
# ============================================================================
|
|
# Middleware Integration Points
|
|
# ============================================================================
|
|
|
|
def track_crawl_start():
|
|
"""Call this when a crawl starts."""
|
|
CRAWLER_STATS["active_crawls"] += 1
|
|
CRAWLER_STATS["total_crawls"] += 1
|
|
CRAWLER_STATS["last_updated"] = datetime.now().isoformat()
|
|
|
|
|
|
def track_crawl_end(url: str, success: bool, duration_ms: float, bytes_processed: int = 0):
|
|
"""Call this when a crawl ends."""
|
|
CRAWLER_STATS["active_crawls"] = max(0, CRAWLER_STATS["active_crawls"] - 1)
|
|
|
|
if success:
|
|
CRAWLER_STATS["successful_crawls"] += 1
|
|
else:
|
|
CRAWLER_STATS["failed_crawls"] += 1
|
|
|
|
CRAWLER_STATS["total_bytes_processed"] += bytes_processed
|
|
|
|
# Update average response time (running average)
|
|
total = CRAWLER_STATS["successful_crawls"] + CRAWLER_STATS["failed_crawls"]
|
|
current_avg = CRAWLER_STATS["average_response_time_ms"]
|
|
CRAWLER_STATS["average_response_time_ms"] = (
|
|
(current_avg * (total - 1) + duration_ms) / total
|
|
)
|
|
|
|
# Update URL stats
|
|
url_stat = URL_STATS[url]
|
|
url_stat["total_requests"] += 1
|
|
|
|
if success:
|
|
url_stat["success_count"] += 1
|
|
else:
|
|
url_stat["failure_count"] += 1
|
|
|
|
# Update average time for this URL
|
|
total_url = url_stat["total_requests"]
|
|
current_avg_url = url_stat["average_time_ms"]
|
|
url_stat["average_time_ms"] = (
|
|
(current_avg_url * (total_url - 1) + duration_ms) / total_url
|
|
)
|
|
url_stat["last_accessed"] = datetime.now().isoformat()
|
|
|
|
CRAWLER_STATS["last_updated"] = datetime.now().isoformat()
|
|
|
|
|
|
# ============================================================================
|
|
# Health Check
|
|
# ============================================================================
|
|
|
|
@router.get(
|
|
"/health",
|
|
summary="Health check",
|
|
description="Check if monitoring system is operational"
|
|
)
|
|
async def health_check():
|
|
"""
|
|
Health check endpoint.
|
|
|
|
Returns status of the monitoring system.
|
|
"""
|
|
system_stats = get_system_stats()
|
|
|
|
return {
|
|
"status": "healthy",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"active_sessions": len([s for s in PROFILING_SESSIONS.values() if s["status"] == "running"]),
|
|
"total_sessions": len(PROFILING_SESSIONS),
|
|
"system": system_stats
|
|
}
|