Profiling/monitoring :Add interactive monitoring dashboard and integration tests for monitoring endpoints
- Implemented an interactive monitoring dashboard in `demo_monitoring_dashboard.py` for real-time statistics, profiling session management, and system resource monitoring. - Created a quick test script `test_monitoring_quick.py` to verify the functionality of monitoring endpoints. - Developed comprehensive integration tests in `test_monitoring_endpoints.py` covering health checks, statistics, profiling sessions, and real-time streaming. - Added error handling and user-friendly output for better usability in the dashboard.
This commit is contained in:
479
tests/docker/extended_features/demo_monitoring_dashboard.py
Normal file
479
tests/docker/extended_features/demo_monitoring_dashboard.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""
|
||||
Interactive Monitoring Dashboard Demo
|
||||
|
||||
This demo showcases the monitoring and profiling capabilities of Crawl4AI's Docker server.
|
||||
It provides:
|
||||
- Real-time statistics dashboard with auto-refresh
|
||||
- Profiling session management
|
||||
- System resource monitoring
|
||||
- URL-specific statistics
|
||||
- Interactive terminal UI
|
||||
|
||||
Usage:
|
||||
python demo_monitoring_dashboard.py [--url BASE_URL]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class Colors:
|
||||
"""ANSI color codes for terminal output."""
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKCYAN = '\033[96m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
UNDERLINE = '\033[4m'
|
||||
|
||||
|
||||
class MonitoringDashboard:
|
||||
"""Interactive monitoring dashboard for Crawl4AI."""
|
||||
|
||||
def __init__(self, base_url: str = "http://localhost:11234"):
|
||||
self.base_url = base_url
|
||||
self.client = httpx.AsyncClient(base_url=base_url, timeout=60.0)
|
||||
self.running = True
|
||||
self.current_view = "dashboard" # dashboard, sessions, urls
|
||||
self.profiling_sessions: List[Dict] = []
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client."""
|
||||
await self.client.aclose()
|
||||
|
||||
def clear_screen(self):
|
||||
"""Clear the terminal screen."""
|
||||
print("\033[2J\033[H", end="")
|
||||
|
||||
def print_header(self, title: str):
|
||||
"""Print a formatted header."""
|
||||
width = 80
|
||||
print(f"\n{Colors.HEADER}{Colors.BOLD}")
|
||||
print("=" * width)
|
||||
print(f"{title.center(width)}")
|
||||
print("=" * width)
|
||||
print(f"{Colors.ENDC}")
|
||||
|
||||
def print_section(self, title: str):
|
||||
"""Print a section header."""
|
||||
print(f"\n{Colors.OKBLUE}{Colors.BOLD}▶ {title}{Colors.ENDC}")
|
||||
print("-" * 80)
|
||||
|
||||
async def check_health(self) -> Dict:
|
||||
"""Check server health."""
|
||||
try:
|
||||
response = await self.client.get("/monitoring/health")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
async def get_stats(self) -> Dict:
|
||||
"""Get current statistics."""
|
||||
try:
|
||||
response = await self.client.get("/monitoring/stats")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def get_url_stats(self) -> List[Dict]:
|
||||
"""Get URL-specific statistics."""
|
||||
try:
|
||||
response = await self.client.get("/monitoring/stats/urls")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
async def list_profiling_sessions(self) -> List[Dict]:
|
||||
"""List all profiling sessions."""
|
||||
try:
|
||||
response = await self.client.get("/monitoring/profile")
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("sessions", [])
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
async def start_profiling_session(self, urls: List[str], duration: int = 30) -> Dict:
|
||||
"""Start a new profiling session."""
|
||||
try:
|
||||
request_data = {
|
||||
"urls": urls,
|
||||
"duration_seconds": duration,
|
||||
"crawler_config": {
|
||||
"word_count_threshold": 10
|
||||
}
|
||||
}
|
||||
response = await self.client.post("/monitoring/profile/start", json=request_data)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def get_profiling_session(self, session_id: str) -> Dict:
|
||||
"""Get profiling session details."""
|
||||
try:
|
||||
response = await self.client.get(f"/monitoring/profile/{session_id}")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def delete_profiling_session(self, session_id: str) -> Dict:
|
||||
"""Delete a profiling session."""
|
||||
try:
|
||||
response = await self.client.delete(f"/monitoring/profile/{session_id}")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
async def reset_stats(self) -> Dict:
|
||||
"""Reset all statistics."""
|
||||
try:
|
||||
response = await self.client.post("/monitoring/stats/reset")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
def display_dashboard(self, stats: Dict):
|
||||
"""Display the main statistics dashboard."""
|
||||
self.clear_screen()
|
||||
self.print_header("Crawl4AI Monitoring Dashboard")
|
||||
|
||||
# Health Status
|
||||
print(f"\n{Colors.OKGREEN}● Server Status: ONLINE{Colors.ENDC}")
|
||||
print(f"Base URL: {self.base_url}")
|
||||
print(f"Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# Crawler Statistics
|
||||
self.print_section("Crawler Statistics")
|
||||
if "error" in stats:
|
||||
print(f"{Colors.FAIL}Error fetching stats: {stats['error']}{Colors.ENDC}")
|
||||
else:
|
||||
print(f"Active Crawls: {Colors.BOLD}{stats.get('active_crawls', 0)}{Colors.ENDC}")
|
||||
print(f"Total Crawls: {stats.get('total_crawls', 0)}")
|
||||
print(f"Successful: {Colors.OKGREEN}{stats.get('successful_crawls', 0)}{Colors.ENDC}")
|
||||
print(f"Failed: {Colors.FAIL}{stats.get('failed_crawls', 0)}{Colors.ENDC}")
|
||||
print(f"Success Rate: {stats.get('success_rate', 0):.2f}%")
|
||||
print(f"Avg Duration: {stats.get('avg_duration_ms', 0):.2f} ms")
|
||||
|
||||
# Format bytes
|
||||
total_bytes = stats.get('total_bytes_processed', 0)
|
||||
if total_bytes > 1024 * 1024:
|
||||
bytes_str = f"{total_bytes / (1024 * 1024):.2f} MB"
|
||||
elif total_bytes > 1024:
|
||||
bytes_str = f"{total_bytes / 1024:.2f} KB"
|
||||
else:
|
||||
bytes_str = f"{total_bytes} bytes"
|
||||
print(f"Total Data Processed: {bytes_str}")
|
||||
|
||||
# System Statistics
|
||||
if "system_stats" in stats:
|
||||
self.print_section("System Resources")
|
||||
sys_stats = stats["system_stats"]
|
||||
|
||||
cpu = sys_stats.get("cpu_percent", 0)
|
||||
cpu_color = Colors.OKGREEN if cpu < 50 else Colors.WARNING if cpu < 80 else Colors.FAIL
|
||||
print(f"CPU Usage: {cpu_color}{cpu:.1f}%{Colors.ENDC}")
|
||||
|
||||
mem = sys_stats.get("memory_percent", 0)
|
||||
mem_color = Colors.OKGREEN if mem < 50 else Colors.WARNING if mem < 80 else Colors.FAIL
|
||||
print(f"Memory Usage: {mem_color}{mem:.1f}%{Colors.ENDC}")
|
||||
|
||||
mem_used = sys_stats.get("memory_used_mb", 0)
|
||||
mem_available = sys_stats.get("memory_available_mb", 0)
|
||||
print(f"Memory Used: {mem_used:.0f} MB / {mem_available:.0f} MB")
|
||||
|
||||
disk = sys_stats.get("disk_usage_percent", 0)
|
||||
disk_color = Colors.OKGREEN if disk < 70 else Colors.WARNING if disk < 90 else Colors.FAIL
|
||||
print(f"Disk Usage: {disk_color}{disk:.1f}%{Colors.ENDC}")
|
||||
|
||||
print(f"Active Processes: {sys_stats.get('active_processes', 0)}")
|
||||
|
||||
# Navigation
|
||||
self.print_section("Navigation")
|
||||
print(f"[D] Dashboard [S] Profiling Sessions [U] URL Stats [R] Reset Stats [Q] Quit")
|
||||
|
||||
def display_url_stats(self, url_stats: List[Dict]):
|
||||
"""Display URL-specific statistics."""
|
||||
self.clear_screen()
|
||||
self.print_header("URL Statistics")
|
||||
|
||||
if not url_stats:
|
||||
print(f"\n{Colors.WARNING}No URL statistics available yet.{Colors.ENDC}")
|
||||
else:
|
||||
print(f"\nTotal URLs tracked: {len(url_stats)}")
|
||||
print()
|
||||
|
||||
# Table header
|
||||
print(f"{Colors.BOLD}{'URL':<50} {'Requests':<10} {'Success':<10} {'Avg Time':<12} {'Data':<12}{Colors.ENDC}")
|
||||
print("-" * 94)
|
||||
|
||||
# Sort by total requests
|
||||
sorted_stats = sorted(url_stats, key=lambda x: x.get('total_requests', 0), reverse=True)
|
||||
|
||||
for stat in sorted_stats[:20]: # Show top 20
|
||||
url = stat.get('url', 'unknown')
|
||||
if len(url) > 47:
|
||||
url = url[:44] + "..."
|
||||
|
||||
total = stat.get('total_requests', 0)
|
||||
success = stat.get('successful_requests', 0)
|
||||
success_pct = f"{(success/total*100):.0f}%" if total > 0 else "N/A"
|
||||
|
||||
avg_time = stat.get('avg_duration_ms', 0)
|
||||
time_str = f"{avg_time:.0f} ms"
|
||||
|
||||
bytes_processed = stat.get('total_bytes_processed', 0)
|
||||
if bytes_processed > 1024 * 1024:
|
||||
data_str = f"{bytes_processed / (1024 * 1024):.2f} MB"
|
||||
elif bytes_processed > 1024:
|
||||
data_str = f"{bytes_processed / 1024:.2f} KB"
|
||||
else:
|
||||
data_str = f"{bytes_processed} B"
|
||||
|
||||
print(f"{url:<50} {total:<10} {success_pct:<10} {time_str:<12} {data_str:<12}")
|
||||
|
||||
# Navigation
|
||||
self.print_section("Navigation")
|
||||
print(f"[D] Dashboard [S] Profiling Sessions [U] URL Stats [R] Reset Stats [Q] Quit")
|
||||
|
||||
def display_profiling_sessions(self, sessions: List[Dict]):
|
||||
"""Display profiling sessions."""
|
||||
self.clear_screen()
|
||||
self.print_header("Profiling Sessions")
|
||||
|
||||
if not sessions:
|
||||
print(f"\n{Colors.WARNING}No profiling sessions found.{Colors.ENDC}")
|
||||
else:
|
||||
print(f"\nTotal sessions: {len(sessions)}")
|
||||
print()
|
||||
|
||||
# Table header
|
||||
print(f"{Colors.BOLD}{'ID':<25} {'Status':<12} {'URLs':<6} {'Duration':<12} {'Started':<20}{Colors.ENDC}")
|
||||
print("-" * 85)
|
||||
|
||||
# Sort by started time (newest first)
|
||||
sorted_sessions = sorted(sessions, key=lambda x: x.get('started_at', ''), reverse=True)
|
||||
|
||||
for session in sorted_sessions[:15]: # Show top 15
|
||||
session_id = session.get('session_id', 'unknown')
|
||||
if len(session_id) > 22:
|
||||
session_id = session_id[:19] + "..."
|
||||
|
||||
status = session.get('status', 'unknown')
|
||||
status_color = Colors.OKGREEN if status == 'completed' else Colors.WARNING if status == 'running' else Colors.FAIL
|
||||
|
||||
url_count = len(session.get('urls', []))
|
||||
|
||||
duration = session.get('duration_seconds', 0)
|
||||
duration_str = f"{duration}s" if duration else "N/A"
|
||||
|
||||
started = session.get('started_at', 'N/A')
|
||||
if started != 'N/A':
|
||||
try:
|
||||
dt = datetime.fromisoformat(started.replace('Z', '+00:00'))
|
||||
started = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"{session_id:<25} {status_color}{status:<12}{Colors.ENDC} {url_count:<6} {duration_str:<12} {started:<20}")
|
||||
|
||||
# Navigation
|
||||
self.print_section("Navigation & Actions")
|
||||
print(f"[D] Dashboard [S] Profiling Sessions [U] URL Stats")
|
||||
print(f"[N] New Session [V] View Session [X] Delete Session")
|
||||
print(f"[R] Reset Stats [Q] Quit")
|
||||
|
||||
async def interactive_session_view(self, session_id: str):
|
||||
"""Display detailed view of a profiling session."""
|
||||
session = await self.get_profiling_session(session_id)
|
||||
|
||||
self.clear_screen()
|
||||
self.print_header(f"Profiling Session: {session_id}")
|
||||
|
||||
if "error" in session:
|
||||
print(f"\n{Colors.FAIL}Error: {session['error']}{Colors.ENDC}")
|
||||
else:
|
||||
print(f"\n{Colors.BOLD}Session ID:{Colors.ENDC} {session.get('session_id', 'N/A')}")
|
||||
|
||||
status = session.get('status', 'unknown')
|
||||
status_color = Colors.OKGREEN if status == 'completed' else Colors.WARNING
|
||||
print(f"{Colors.BOLD}Status:{Colors.ENDC} {status_color}{status}{Colors.ENDC}")
|
||||
|
||||
print(f"{Colors.BOLD}URLs:{Colors.ENDC}")
|
||||
for url in session.get('urls', []):
|
||||
print(f" - {url}")
|
||||
|
||||
started = session.get('started_at', 'N/A')
|
||||
print(f"{Colors.BOLD}Started:{Colors.ENDC} {started}")
|
||||
|
||||
if 'completed_at' in session:
|
||||
print(f"{Colors.BOLD}Completed:{Colors.ENDC} {session['completed_at']}")
|
||||
|
||||
if 'results' in session:
|
||||
self.print_section("Profiling Results")
|
||||
results = session['results']
|
||||
|
||||
print(f"Total Requests: {results.get('total_requests', 0)}")
|
||||
print(f"Successful: {Colors.OKGREEN}{results.get('successful_requests', 0)}{Colors.ENDC}")
|
||||
print(f"Failed: {Colors.FAIL}{results.get('failed_requests', 0)}{Colors.ENDC}")
|
||||
print(f"Avg Response Time: {results.get('avg_response_time_ms', 0):.2f} ms")
|
||||
|
||||
if 'system_metrics' in results:
|
||||
self.print_section("System Metrics During Profiling")
|
||||
metrics = results['system_metrics']
|
||||
print(f"Avg CPU: {metrics.get('avg_cpu_percent', 0):.1f}%")
|
||||
print(f"Peak CPU: {metrics.get('peak_cpu_percent', 0):.1f}%")
|
||||
print(f"Avg Memory: {metrics.get('avg_memory_percent', 0):.1f}%")
|
||||
print(f"Peak Memory: {metrics.get('peak_memory_percent', 0):.1f}%")
|
||||
|
||||
print(f"\n{Colors.OKCYAN}Press any key to return...{Colors.ENDC}")
|
||||
input()
|
||||
|
||||
async def create_new_session(self):
|
||||
"""Interactive session creation."""
|
||||
self.clear_screen()
|
||||
self.print_header("Create New Profiling Session")
|
||||
|
||||
print(f"\n{Colors.BOLD}Enter URLs to profile (one per line, empty line to finish):{Colors.ENDC}")
|
||||
urls = []
|
||||
while True:
|
||||
url = input(f"{Colors.OKCYAN}URL {len(urls) + 1}:{Colors.ENDC} ").strip()
|
||||
if not url:
|
||||
break
|
||||
urls.append(url)
|
||||
|
||||
if not urls:
|
||||
print(f"{Colors.FAIL}No URLs provided. Cancelled.{Colors.ENDC}")
|
||||
time.sleep(2)
|
||||
return
|
||||
|
||||
duration = input(f"{Colors.OKCYAN}Duration (seconds, default 30):{Colors.ENDC} ").strip()
|
||||
try:
|
||||
duration = int(duration) if duration else 30
|
||||
except:
|
||||
duration = 30
|
||||
|
||||
print(f"\n{Colors.WARNING}Starting profiling session for {len(urls)} URL(s), {duration}s...{Colors.ENDC}")
|
||||
result = await self.start_profiling_session(urls, duration)
|
||||
|
||||
if "error" in result:
|
||||
print(f"{Colors.FAIL}Error: {result['error']}{Colors.ENDC}")
|
||||
else:
|
||||
print(f"{Colors.OKGREEN}✓ Session started successfully!{Colors.ENDC}")
|
||||
print(f"Session ID: {result.get('session_id', 'N/A')}")
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
async def run_dashboard(self):
|
||||
"""Run the interactive dashboard."""
|
||||
print(f"{Colors.OKGREEN}Starting Crawl4AI Monitoring Dashboard...{Colors.ENDC}")
|
||||
print(f"Connecting to {self.base_url}...")
|
||||
|
||||
# Check health
|
||||
health = await self.check_health()
|
||||
if health.get("status") != "healthy":
|
||||
print(f"{Colors.FAIL}Error: Server not responding or unhealthy{Colors.ENDC}")
|
||||
print(f"Health check result: {health}")
|
||||
return
|
||||
|
||||
print(f"{Colors.OKGREEN}✓ Connected successfully!{Colors.ENDC}")
|
||||
time.sleep(1)
|
||||
|
||||
# Main loop
|
||||
while self.running:
|
||||
if self.current_view == "dashboard":
|
||||
stats = await self.get_stats()
|
||||
self.display_dashboard(stats)
|
||||
elif self.current_view == "urls":
|
||||
url_stats = await self.get_url_stats()
|
||||
self.display_url_stats(url_stats)
|
||||
elif self.current_view == "sessions":
|
||||
sessions = await self.list_profiling_sessions()
|
||||
self.display_profiling_sessions(sessions)
|
||||
|
||||
# Get user input (non-blocking with timeout)
|
||||
print(f"\n{Colors.OKCYAN}Enter command (or wait 5s for auto-refresh):{Colors.ENDC} ", end="", flush=True)
|
||||
|
||||
try:
|
||||
# Simple input with timeout simulation
|
||||
import select
|
||||
if sys.platform != 'win32':
|
||||
i, _, _ = select.select([sys.stdin], [], [], 5.0)
|
||||
if i:
|
||||
command = sys.stdin.readline().strip().lower()
|
||||
else:
|
||||
command = ""
|
||||
else:
|
||||
# Windows doesn't support select on stdin
|
||||
command = input()
|
||||
except:
|
||||
command = ""
|
||||
|
||||
# Process command
|
||||
if command == 'q':
|
||||
self.running = False
|
||||
elif command == 'd':
|
||||
self.current_view = "dashboard"
|
||||
elif command == 's':
|
||||
self.current_view = "sessions"
|
||||
elif command == 'u':
|
||||
self.current_view = "urls"
|
||||
elif command == 'r':
|
||||
print(f"\n{Colors.WARNING}Resetting statistics...{Colors.ENDC}")
|
||||
await self.reset_stats()
|
||||
time.sleep(1)
|
||||
elif command == 'n' and self.current_view == "sessions":
|
||||
await self.create_new_session()
|
||||
elif command == 'v' and self.current_view == "sessions":
|
||||
session_id = input(f"{Colors.OKCYAN}Enter session ID:{Colors.ENDC} ").strip()
|
||||
if session_id:
|
||||
await self.interactive_session_view(session_id)
|
||||
elif command == 'x' and self.current_view == "sessions":
|
||||
session_id = input(f"{Colors.OKCYAN}Enter session ID to delete:{Colors.ENDC} ").strip()
|
||||
if session_id:
|
||||
result = await self.delete_profiling_session(session_id)
|
||||
if "error" in result:
|
||||
print(f"{Colors.FAIL}Error: {result['error']}{Colors.ENDC}")
|
||||
else:
|
||||
print(f"{Colors.OKGREEN}✓ Session deleted{Colors.ENDC}")
|
||||
time.sleep(2)
|
||||
|
||||
self.clear_screen()
|
||||
print(f"\n{Colors.OKGREEN}Dashboard closed. Goodbye!{Colors.ENDC}\n")
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(description="Crawl4AI Monitoring Dashboard")
|
||||
parser.add_argument(
|
||||
"--url",
|
||||
default="http://localhost:11234",
|
||||
help="Base URL of the Crawl4AI Docker server (default: http://localhost:11234)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
dashboard = MonitoringDashboard(base_url=args.url)
|
||||
try:
|
||||
await dashboard.run_dashboard()
|
||||
finally:
|
||||
await dashboard.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
88
tests/docker/extended_features/test_monitoring_quick.py
Normal file
88
tests/docker/extended_features/test_monitoring_quick.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick test to verify monitoring endpoints are working
|
||||
"""
|
||||
import requests
|
||||
import sys
|
||||
|
||||
BASE_URL = "http://localhost:11234"
|
||||
|
||||
def test_health():
|
||||
"""Test health endpoint"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/monitoring/health", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print("✅ Health check: PASSED")
|
||||
print(f" Response: {response.json()}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Health check: FAILED (status {response.status_code})")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Health check: ERROR - {e}")
|
||||
return False
|
||||
|
||||
def test_stats():
|
||||
"""Test stats endpoint"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/monitoring/stats", timeout=5)
|
||||
if response.status_code == 200:
|
||||
stats = response.json()
|
||||
print("✅ Stats endpoint: PASSED")
|
||||
print(f" Active crawls: {stats.get('active_crawls', 'N/A')}")
|
||||
print(f" Total crawls: {stats.get('total_crawls', 'N/A')}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Stats endpoint: FAILED (status {response.status_code})")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Stats endpoint: ERROR - {e}")
|
||||
return False
|
||||
|
||||
def test_url_stats():
|
||||
"""Test URL stats endpoint"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/monitoring/stats/urls", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print("✅ URL stats endpoint: PASSED")
|
||||
url_stats = response.json()
|
||||
print(f" URLs tracked: {len(url_stats)}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ URL stats endpoint: FAILED (status {response.status_code})")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ URL stats endpoint: ERROR - {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Monitoring Endpoints Quick Test")
|
||||
print("=" * 60)
|
||||
print(f"\nTesting server at: {BASE_URL}")
|
||||
print("\nMake sure the server is running:")
|
||||
print(" cd deploy/docker && python server.py")
|
||||
print("\n" + "-" * 60 + "\n")
|
||||
|
||||
results = []
|
||||
results.append(test_health())
|
||||
print()
|
||||
results.append(test_stats())
|
||||
print()
|
||||
results.append(test_url_stats())
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
passed = sum(results)
|
||||
total = len(results)
|
||||
|
||||
if passed == total:
|
||||
print(f"✅ All tests passed! ({passed}/{total})")
|
||||
print("\nMonitoring endpoints are working correctly! 🎉")
|
||||
return 0
|
||||
else:
|
||||
print(f"❌ Some tests failed ({passed}/{total} passed)")
|
||||
print("\nPlease check the server logs for errors.")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
522
tests/docker/test_monitoring_endpoints.py
Normal file
522
tests/docker/test_monitoring_endpoints.py
Normal file
@@ -0,0 +1,522 @@
|
||||
"""
|
||||
Integration tests for monitoring and profiling endpoints.
|
||||
|
||||
Tests all monitoring endpoints including profiling sessions, statistics,
|
||||
health checks, and real-time streaming.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, List
|
||||
|
||||
import pytest
|
||||
from httpx import AsyncClient
|
||||
|
||||
# Base URL for the Docker API server
|
||||
BASE_URL = "http://localhost:11235"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def event_loop():
|
||||
"""Create event loop for async tests."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def client():
|
||||
"""Create HTTP client for tests."""
|
||||
async with AsyncClient(base_url=BASE_URL, timeout=60.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
"""Tests for /monitoring/health endpoint."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_check(self, client: AsyncClient):
|
||||
"""Test basic health check returns OK."""
|
||||
response = await client.get("/monitoring/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert "uptime_seconds" in data
|
||||
assert data["uptime_seconds"] >= 0
|
||||
|
||||
|
||||
class TestStatsEndpoints:
|
||||
"""Tests for /monitoring/stats/* endpoints."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_stats_empty(self, client: AsyncClient):
|
||||
"""Test getting stats when no crawls have been performed."""
|
||||
# Reset stats first
|
||||
await client.post("/monitoring/stats/reset")
|
||||
|
||||
response = await client.get("/monitoring/stats")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Verify all expected fields
|
||||
assert "active_crawls" in data
|
||||
assert "total_crawls" in data
|
||||
assert "successful_crawls" in data
|
||||
assert "failed_crawls" in data
|
||||
assert "success_rate" in data
|
||||
assert "avg_duration_ms" in data
|
||||
assert "total_bytes_processed" in data
|
||||
assert "system_stats" in data
|
||||
|
||||
# Verify system stats
|
||||
system = data["system_stats"]
|
||||
assert "cpu_percent" in system
|
||||
assert "memory_percent" in system
|
||||
assert "memory_used_mb" in system
|
||||
assert "memory_available_mb" in system
|
||||
assert "disk_usage_percent" in system
|
||||
assert "active_processes" in system
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_after_crawl(self, client: AsyncClient):
|
||||
"""Test stats are updated after performing a crawl."""
|
||||
# Reset stats
|
||||
await client.post("/monitoring/stats/reset")
|
||||
|
||||
# Perform a simple crawl
|
||||
crawl_request = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"crawler_config": {
|
||||
"word_count_threshold": 10
|
||||
}
|
||||
}
|
||||
crawl_response = await client.post("/crawl", json=crawl_request)
|
||||
assert crawl_response.status_code == 200
|
||||
|
||||
# Get stats
|
||||
response = await client.get("/monitoring/stats")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Verify stats are updated
|
||||
assert data["total_crawls"] >= 1
|
||||
assert data["successful_crawls"] >= 0
|
||||
assert data["failed_crawls"] >= 0
|
||||
assert data["total_crawls"] == data["successful_crawls"] + data["failed_crawls"]
|
||||
|
||||
# Verify success rate calculation
|
||||
if data["total_crawls"] > 0:
|
||||
expected_rate = (data["successful_crawls"] / data["total_crawls"]) * 100
|
||||
assert abs(data["success_rate"] - expected_rate) < 0.01
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_reset(self, client: AsyncClient):
|
||||
"""Test resetting stats clears all counters."""
|
||||
# Ensure we have some stats
|
||||
crawl_request = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
await client.post("/crawl", json=crawl_request)
|
||||
|
||||
# Reset stats
|
||||
reset_response = await client.post("/monitoring/stats/reset")
|
||||
assert reset_response.status_code == 200
|
||||
data = reset_response.json()
|
||||
assert data["status"] == "reset"
|
||||
assert "previous_stats" in data
|
||||
|
||||
# Verify stats are cleared
|
||||
stats_response = await client.get("/monitoring/stats")
|
||||
stats = stats_response.json()
|
||||
assert stats["total_crawls"] == 0
|
||||
assert stats["successful_crawls"] == 0
|
||||
assert stats["failed_crawls"] == 0
|
||||
assert stats["active_crawls"] == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_url_specific_stats(self, client: AsyncClient):
|
||||
"""Test getting URL-specific statistics."""
|
||||
# Reset and crawl
|
||||
await client.post("/monitoring/stats/reset")
|
||||
crawl_request = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
await client.post("/crawl", json=crawl_request)
|
||||
|
||||
# Get URL stats
|
||||
response = await client.get("/monitoring/stats/urls")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert isinstance(data, list)
|
||||
if len(data) > 0:
|
||||
url_stat = data[0]
|
||||
assert "url" in url_stat
|
||||
assert "total_requests" in url_stat
|
||||
assert "successful_requests" in url_stat
|
||||
assert "failed_requests" in url_stat
|
||||
assert "avg_duration_ms" in url_stat
|
||||
assert "total_bytes_processed" in url_stat
|
||||
assert "last_request_time" in url_stat
|
||||
|
||||
|
||||
class TestStatsStreaming:
|
||||
"""Tests for /monitoring/stats/stream SSE endpoint."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_stream_basic(self, client: AsyncClient):
|
||||
"""Test SSE streaming of statistics."""
|
||||
# Start streaming (collect a few events then stop)
|
||||
events = []
|
||||
async with client.stream("GET", "/monitoring/stats/stream") as response:
|
||||
assert response.status_code == 200
|
||||
assert "text/event-stream" in response.headers.get("content-type", "")
|
||||
|
||||
# Collect first 3 events
|
||||
count = 0
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:] # Remove "data: " prefix
|
||||
data = json.loads(data_str)
|
||||
events.append(data)
|
||||
count += 1
|
||||
if count >= 3:
|
||||
break
|
||||
|
||||
# Verify we got events
|
||||
assert len(events) >= 3
|
||||
|
||||
# Verify event structure
|
||||
for event in events:
|
||||
assert "active_crawls" in event
|
||||
assert "total_crawls" in event
|
||||
assert "successful_crawls" in event
|
||||
assert "system_stats" in event
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stats_stream_during_crawl(self, client: AsyncClient):
|
||||
"""Test streaming updates during active crawl."""
|
||||
# Start streaming in background
|
||||
stream_task = None
|
||||
events = []
|
||||
|
||||
async def collect_stream():
|
||||
async with client.stream("GET", "/monitoring/stats/stream") as response:
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:]
|
||||
data = json.loads(data_str)
|
||||
events.append(data)
|
||||
if len(events) >= 5:
|
||||
break
|
||||
|
||||
# Start stream collection
|
||||
stream_task = asyncio.create_task(collect_stream())
|
||||
|
||||
# Wait a bit then start crawl
|
||||
await asyncio.sleep(1)
|
||||
crawl_request = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
asyncio.create_task(client.post("/crawl", json=crawl_request))
|
||||
|
||||
# Wait for events
|
||||
try:
|
||||
await asyncio.wait_for(stream_task, timeout=15.0)
|
||||
except asyncio.TimeoutError:
|
||||
stream_task.cancel()
|
||||
|
||||
# Should have collected some events
|
||||
assert len(events) > 0
|
||||
|
||||
|
||||
class TestProfilingEndpoints:
|
||||
"""Tests for /monitoring/profile/* endpoints."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_profiling_sessions_empty(self, client: AsyncClient):
|
||||
"""Test listing profiling sessions when none exist."""
|
||||
response = await client.get("/monitoring/profile")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "sessions" in data
|
||||
assert isinstance(data["sessions"], list)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_profiling_session(self, client: AsyncClient):
|
||||
"""Test starting a new profiling session."""
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com", "https://www.python.org"],
|
||||
"duration_seconds": 2,
|
||||
"crawler_config": {
|
||||
"word_count_threshold": 10
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert "session_id" in data
|
||||
assert "status" in data
|
||||
assert data["status"] == "running"
|
||||
assert "started_at" in data
|
||||
assert "urls" in data
|
||||
assert len(data["urls"]) == 2
|
||||
|
||||
return data["session_id"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_profiling_session(self, client: AsyncClient):
|
||||
"""Test retrieving a profiling session by ID."""
|
||||
# Start a session
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 2,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
start_response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
session_id = start_response.json()["session_id"]
|
||||
|
||||
# Get session immediately (should be running)
|
||||
response = await client.get(f"/monitoring/profile/{session_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["session_id"] == session_id
|
||||
assert data["status"] in ["running", "completed"]
|
||||
assert "started_at" in data
|
||||
assert "urls" in data
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profiling_session_completion(self, client: AsyncClient):
|
||||
"""Test profiling session completes and produces results."""
|
||||
# Start a short session
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 3,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
start_response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
session_id = start_response.json()["session_id"]
|
||||
|
||||
# Wait for completion
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Get completed session
|
||||
response = await client.get(f"/monitoring/profile/{session_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["status"] == "completed"
|
||||
assert "completed_at" in data
|
||||
assert "duration_seconds" in data
|
||||
assert "results" in data
|
||||
|
||||
# Verify results structure
|
||||
results = data["results"]
|
||||
assert "total_requests" in results
|
||||
assert "successful_requests" in results
|
||||
assert "failed_requests" in results
|
||||
assert "avg_response_time_ms" in results
|
||||
assert "system_metrics" in results
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profiling_session_not_found(self, client: AsyncClient):
|
||||
"""Test retrieving non-existent session returns 404."""
|
||||
response = await client.get("/monitoring/profile/nonexistent-id-12345")
|
||||
assert response.status_code == 404
|
||||
data = response.json()
|
||||
assert "detail" in data
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_profiling_session(self, client: AsyncClient):
|
||||
"""Test deleting a profiling session."""
|
||||
# Start a session
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 1,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
start_response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
session_id = start_response.json()["session_id"]
|
||||
|
||||
# Wait for completion
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Delete session
|
||||
delete_response = await client.delete(f"/monitoring/profile/{session_id}")
|
||||
assert delete_response.status_code == 200
|
||||
data = delete_response.json()
|
||||
assert data["status"] == "deleted"
|
||||
assert data["session_id"] == session_id
|
||||
|
||||
# Verify it's gone
|
||||
get_response = await client.get(f"/monitoring/profile/{session_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cleanup_old_sessions(self, client: AsyncClient):
|
||||
"""Test cleaning up old profiling sessions."""
|
||||
# Start a few sessions
|
||||
for i in range(3):
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 1,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
await client.post("/monitoring/profile/start", json=request_data)
|
||||
|
||||
# Wait for completion
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Cleanup sessions older than 0 seconds (all completed ones)
|
||||
cleanup_response = await client.post(
|
||||
"/monitoring/profile/cleanup",
|
||||
json={"max_age_seconds": 0}
|
||||
)
|
||||
assert cleanup_response.status_code == 200
|
||||
data = cleanup_response.json()
|
||||
assert "deleted_count" in data
|
||||
assert data["deleted_count"] >= 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_after_operations(self, client: AsyncClient):
|
||||
"""Test listing sessions shows correct state after various operations."""
|
||||
# Start a session
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 5,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
start_response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
session_id = start_response.json()["session_id"]
|
||||
|
||||
# List sessions
|
||||
list_response = await client.get("/monitoring/profile")
|
||||
assert list_response.status_code == 200
|
||||
data = list_response.json()
|
||||
|
||||
# Should have at least one session
|
||||
sessions = data["sessions"]
|
||||
assert len(sessions) >= 1
|
||||
|
||||
# Find our session
|
||||
our_session = next((s for s in sessions if s["session_id"] == session_id), None)
|
||||
assert our_session is not None
|
||||
assert our_session["status"] in ["running", "completed"]
|
||||
|
||||
|
||||
class TestProfilingWithCrawlConfig:
|
||||
"""Tests for profiling with various crawler configurations."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profiling_with_extraction_strategy(self, client: AsyncClient):
|
||||
"""Test profiling with extraction strategy configured."""
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 2,
|
||||
"crawler_config": {
|
||||
"word_count_threshold": 10,
|
||||
"extraction_strategy": "NoExtractionStrategy"
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "running"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profiling_with_browser_config(self, client: AsyncClient):
|
||||
"""Test profiling with custom browser configuration."""
|
||||
request_data = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 2,
|
||||
"browser_config": {
|
||||
"headless": True,
|
||||
"verbose": False
|
||||
},
|
||||
"crawler_config": {
|
||||
"word_count_threshold": 10
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/monitoring/profile/start", json=request_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "running"
|
||||
|
||||
|
||||
class TestIntegrationScenarios:
|
||||
"""Integration tests for real-world monitoring scenarios."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_crawls_and_monitoring(self, client: AsyncClient):
|
||||
"""Test monitoring multiple concurrent crawls."""
|
||||
# Reset stats
|
||||
await client.post("/monitoring/stats/reset")
|
||||
|
||||
# Start multiple crawls concurrently
|
||||
crawl_tasks = []
|
||||
urls = [
|
||||
"https://www.example.com",
|
||||
"https://www.python.org",
|
||||
"https://www.github.com"
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
crawl_request = {
|
||||
"urls": [url],
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
task = client.post("/crawl", json=crawl_request)
|
||||
crawl_tasks.append(task)
|
||||
|
||||
# Execute concurrently
|
||||
responses = await asyncio.gather(*crawl_tasks, return_exceptions=True)
|
||||
|
||||
# Get stats
|
||||
await asyncio.sleep(1) # Give tracking time to update
|
||||
stats_response = await client.get("/monitoring/stats")
|
||||
stats = stats_response.json()
|
||||
|
||||
# Should have tracked multiple crawls
|
||||
assert stats["total_crawls"] >= len(urls)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profiling_and_stats_correlation(self, client: AsyncClient):
|
||||
"""Test that profiling data correlates with statistics."""
|
||||
# Reset stats
|
||||
await client.post("/monitoring/stats/reset")
|
||||
|
||||
# Start profiling session
|
||||
profile_request = {
|
||||
"urls": ["https://www.example.com"],
|
||||
"duration_seconds": 3,
|
||||
"crawler_config": {"word_count_threshold": 10}
|
||||
}
|
||||
profile_response = await client.post("/monitoring/profile/start", json=profile_request)
|
||||
session_id = profile_response.json()["session_id"]
|
||||
|
||||
# Wait for completion
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Get profiling results
|
||||
profile_data_response = await client.get(f"/monitoring/profile/{session_id}")
|
||||
profile_data = profile_data_response.json()
|
||||
|
||||
# Get stats
|
||||
stats_response = await client.get("/monitoring/stats")
|
||||
stats = stats_response.json()
|
||||
|
||||
# Stats should reflect profiling activity
|
||||
assert stats["total_crawls"] >= profile_data["results"]["total_requests"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s"])
|
||||
Reference in New Issue
Block a user