#!/usr/bin/env python3 """ Benchmark reporting tool for Crawl4AI stress tests. Generates visual reports and comparisons between test runs. """ import os import json import glob import argparse import sys from datetime import datetime from pathlib import Path from rich.console import Console from rich.table import Table from rich.panel import Panel # Initialize rich console console = Console() # Try to import optional visualization dependencies VISUALIZATION_AVAILABLE = True try: import pandas as pd import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np import seaborn as sns except ImportError: VISUALIZATION_AVAILABLE = False console.print("[yellow]Warning: Visualization dependencies not found. Install with:[/yellow]") console.print("[yellow]pip install pandas matplotlib seaborn[/yellow]") console.print("[yellow]Only text-based reports will be generated.[/yellow]") # Configure plotting if available if VISUALIZATION_AVAILABLE: # Set plot style for dark theme plt.style.use('dark_background') sns.set_theme(style="darkgrid") # Custom color palette based on Nord theme nord_palette = ["#88c0d0", "#81a1c1", "#a3be8c", "#ebcb8b", "#bf616a", "#b48ead", "#5e81ac"] sns.set_palette(nord_palette) class BenchmarkReporter: """Generates visual reports and comparisons for Crawl4AI stress tests.""" def __init__(self, reports_dir="reports", output_dir="benchmark_reports"): """Initialize the benchmark reporter. Args: reports_dir: Directory containing test result files output_dir: Directory to save generated reports """ self.reports_dir = Path(reports_dir) self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) # Configure matplotlib if available if VISUALIZATION_AVAILABLE: # Ensure the matplotlib backend works in headless environments mpl.use('Agg') # Set up styling for plots with dark theme mpl.rcParams['figure.figsize'] = (12, 8) mpl.rcParams['font.size'] = 12 mpl.rcParams['axes.labelsize'] = 14 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 12 mpl.rcParams['ytick.labelsize'] = 12 mpl.rcParams['legend.fontsize'] = 12 mpl.rcParams['figure.facecolor'] = '#1e1e1e' mpl.rcParams['axes.facecolor'] = '#2e3440' mpl.rcParams['savefig.facecolor'] = '#1e1e1e' mpl.rcParams['text.color'] = '#e0e0e0' mpl.rcParams['axes.labelcolor'] = '#e0e0e0' mpl.rcParams['xtick.color'] = '#e0e0e0' mpl.rcParams['ytick.color'] = '#e0e0e0' mpl.rcParams['grid.color'] = '#444444' mpl.rcParams['figure.edgecolor'] = '#444444' def load_test_results(self, limit=None): """Load all test results from the reports directory. Args: limit: Optional limit on number of most recent tests to load Returns: Dictionary mapping test IDs to result data """ result_files = glob.glob(str(self.reports_dir / "test_results_*.json")) # Sort files by modification time (newest first) result_files.sort(key=os.path.getmtime, reverse=True) if limit: result_files = result_files[:limit] results = {} for file_path in result_files: try: with open(file_path, 'r') as f: data = json.load(f) test_id = data.get('test_id') if test_id: results[test_id] = data # Try to load the corresponding memory samples csv_path = self.reports_dir / f"memory_samples_{test_id}.csv" if csv_path.exists(): try: memory_df = pd.read_csv(csv_path) results[test_id]['memory_samples'] = memory_df except Exception as e: console.print(f"[yellow]Warning: Could not load memory samples for {test_id}: {e}[/yellow]") except Exception as e: console.print(f"[red]Error loading {file_path}: {e}[/red]") console.print(f"Loaded {len(results)} test results") return results def generate_summary_table(self, results): """Generate a summary table of test results. Args: results: Dictionary mapping test IDs to result data Returns: Rich Table object """ table = Table(title="Crawl4AI Stress Test Summary", show_header=True) # Define columns table.add_column("Test ID", style="cyan") table.add_column("Date", style="bright_green") table.add_column("URLs", justify="right") table.add_column("Workers", justify="right") table.add_column("Success %", justify="right") table.add_column("Time (s)", justify="right") table.add_column("Mem Growth", justify="right") table.add_column("URLs/sec", justify="right") # Add rows for test_id, data in sorted(results.items(), key=lambda x: x[0], reverse=True): # Parse timestamp from test_id try: date_str = datetime.strptime(test_id, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M") except: date_str = "Unknown" # Calculate success percentage total_urls = data.get('url_count', 0) successful = data.get('successful_urls', 0) success_pct = (successful / total_urls * 100) if total_urls > 0 else 0 # Calculate memory growth if available mem_growth = "N/A" if 'memory_samples' in data: samples = data['memory_samples'] if len(samples) >= 2: # Try to extract numeric values from memory_info strings try: first_mem = float(samples.iloc[0]['memory_info'].split()[0]) last_mem = float(samples.iloc[-1]['memory_info'].split()[0]) mem_growth = f"{last_mem - first_mem:.1f} MB" except: pass # Calculate URLs per second time_taken = data.get('total_time_seconds', 0) urls_per_sec = total_urls / time_taken if time_taken > 0 else 0 table.add_row( test_id, date_str, str(total_urls), str(data.get('workers', 'N/A')), f"{success_pct:.1f}%", f"{data.get('total_time_seconds', 0):.2f}", mem_growth, f"{urls_per_sec:.1f}" ) return table def generate_performance_chart(self, results, output_file=None): """Generate a performance comparison chart. Args: results: Dictionary mapping test IDs to result data output_file: File path to save the chart Returns: Path to the saved chart file or None if visualization is not available """ if not VISUALIZATION_AVAILABLE: console.print("[yellow]Skipping performance chart - visualization dependencies not available[/yellow]") return None # Extract relevant data data = [] for test_id, result in results.items(): urls = result.get('url_count', 0) workers = result.get('workers', 0) time_taken = result.get('total_time_seconds', 0) urls_per_sec = urls / time_taken if time_taken > 0 else 0 # Parse timestamp from test_id for sorting try: timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S") data.append({ 'test_id': test_id, 'timestamp': timestamp, 'urls': urls, 'workers': workers, 'time_seconds': time_taken, 'urls_per_sec': urls_per_sec }) except: console.print(f"[yellow]Warning: Could not parse timestamp from {test_id}[/yellow]") if not data: console.print("[yellow]No valid data for performance chart[/yellow]") return None # Convert to DataFrame and sort by timestamp df = pd.DataFrame(data) df = df.sort_values('timestamp') # Create the plot fig, ax1 = plt.subplots(figsize=(12, 6)) # Plot URLs per second as bars with properly set x-axis x_pos = range(len(df['test_id'])) bars = ax1.bar(x_pos, df['urls_per_sec'], color='#88c0d0', alpha=0.8) ax1.set_ylabel('URLs per Second', color='#88c0d0') ax1.tick_params(axis='y', labelcolor='#88c0d0') # Properly set x-axis labels ax1.set_xticks(x_pos) ax1.set_xticklabels(df['test_id'].tolist(), rotation=45, ha='right') # Add worker count as text on each bar for i, bar in enumerate(bars): height = bar.get_height() workers = df.iloc[i]['workers'] ax1.text(i, height + 0.1, f'W: {workers}', ha='center', va='bottom', fontsize=9, color='#e0e0e0') # Add a second y-axis for total URLs ax2 = ax1.twinx() ax2.plot(x_pos, df['urls'], '-', color='#bf616a', alpha=0.8, markersize=6, marker='o') ax2.set_ylabel('Total URLs', color='#bf616a') ax2.tick_params(axis='y', labelcolor='#bf616a') # Set title and layout plt.title('Crawl4AI Performance Benchmarks') plt.tight_layout() # Save the figure if output_file is None: output_file = self.output_dir / "performance_comparison.png" plt.savefig(output_file, dpi=100, bbox_inches='tight') plt.close() return output_file def generate_memory_charts(self, results, output_prefix=None): """Generate memory usage charts for each test. Args: results: Dictionary mapping test IDs to result data output_prefix: Prefix for output file names Returns: List of paths to the saved chart files """ if not VISUALIZATION_AVAILABLE: console.print("[yellow]Skipping memory charts - visualization dependencies not available[/yellow]") return [] output_files = [] for test_id, result in results.items(): if 'memory_samples' not in result: continue memory_df = result['memory_samples'] # Check if we have enough data points if len(memory_df) < 2: continue # Try to extract numeric values from memory_info strings try: memory_values = [] for mem_str in memory_df['memory_info']: # Extract the number from strings like "142.8 MB" value = float(mem_str.split()[0]) memory_values.append(value) memory_df['memory_mb'] = memory_values except Exception as e: console.print(f"[yellow]Could not parse memory values for {test_id}: {e}[/yellow]") continue # Create the plot plt.figure(figsize=(10, 6)) # Plot memory usage over time plt.plot(memory_df['elapsed_seconds'], memory_df['memory_mb'], color='#88c0d0', marker='o', linewidth=2, markersize=4) # Add annotations for chunk processing chunk_size = result.get('chunk_size', 0) url_count = result.get('url_count', 0) if chunk_size > 0 and url_count > 0: # Estimate chunk processing times num_chunks = (url_count + chunk_size - 1) // chunk_size # Ceiling division total_time = result.get('total_time_seconds', memory_df['elapsed_seconds'].max()) chunk_times = np.linspace(0, total_time, num_chunks + 1)[1:] for i, time_point in enumerate(chunk_times): if time_point <= memory_df['elapsed_seconds'].max(): plt.axvline(x=time_point, color='#4c566a', linestyle='--', alpha=0.6) plt.text(time_point, memory_df['memory_mb'].min(), f'Chunk {i+1}', rotation=90, verticalalignment='bottom', fontsize=8, color='#e0e0e0') # Set labels and title plt.xlabel('Elapsed Time (seconds)', color='#e0e0e0') plt.ylabel('Memory Usage (MB)', color='#e0e0e0') plt.title(f'Memory Usage During Test {test_id}\n({url_count} URLs, {result.get("workers", "?")} Workers)', color='#e0e0e0') # Add grid and set y-axis to start from zero plt.grid(True, alpha=0.3, color='#4c566a') # Add test metadata as text info_text = ( f"URLs: {url_count}\n" f"Workers: {result.get('workers', 'N/A')}\n" f"Chunk Size: {result.get('chunk_size', 'N/A')}\n" f"Total Time: {result.get('total_time_seconds', 0):.2f}s\n" ) # Calculate memory growth if len(memory_df) >= 2: first_mem = memory_df.iloc[0]['memory_mb'] last_mem = memory_df.iloc[-1]['memory_mb'] growth = last_mem - first_mem growth_rate = growth / result.get('total_time_seconds', 1) info_text += f"Memory Growth: {growth:.1f} MB\n" info_text += f"Growth Rate: {growth_rate:.2f} MB/s" plt.figtext(0.02, 0.02, info_text, fontsize=9, color='#e0e0e0', bbox=dict(facecolor='#3b4252', alpha=0.8, edgecolor='#4c566a')) # Save the figure if output_prefix is None: output_file = self.output_dir / f"memory_chart_{test_id}.png" else: output_file = Path(f"{output_prefix}_memory_{test_id}.png") plt.tight_layout() plt.savefig(output_file, dpi=100, bbox_inches='tight') plt.close() output_files.append(output_file) return output_files def generate_comparison_report(self, results, title=None, output_file=None): """Generate a comprehensive comparison report of multiple test runs. Args: results: Dictionary mapping test IDs to result data title: Optional title for the report output_file: File path to save the report Returns: Path to the saved report file """ if not results: console.print("[yellow]No results to generate comparison report[/yellow]") return None if output_file is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = self.output_dir / f"comparison_report_{timestamp}.html" # Create data for the report rows = [] for test_id, data in results.items(): # Calculate metrics urls = data.get('url_count', 0) workers = data.get('workers', 0) successful = data.get('successful_urls', 0) failed = data.get('failed_urls', 0) time_seconds = data.get('total_time_seconds', 0) # Calculate additional metrics success_rate = (successful / urls) * 100 if urls > 0 else 0 urls_per_second = urls / time_seconds if time_seconds > 0 else 0 urls_per_worker = urls / workers if workers > 0 else 0 # Calculate memory growth if available mem_start = None mem_end = None mem_growth = None if 'memory_samples' in data: samples = data['memory_samples'] if len(samples) >= 2: try: first_mem = float(samples.iloc[0]['memory_info'].split()[0]) last_mem = float(samples.iloc[-1]['memory_info'].split()[0]) mem_start = first_mem mem_end = last_mem mem_growth = last_mem - first_mem except: pass # Parse timestamp from test_id try: timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S") except: timestamp = None rows.append({ 'test_id': test_id, 'timestamp': timestamp, 'date': timestamp.strftime("%Y-%m-%d %H:%M:%S") if timestamp else "Unknown", 'urls': urls, 'workers': workers, 'chunk_size': data.get('chunk_size', 0), 'successful': successful, 'failed': failed, 'success_rate': success_rate, 'time_seconds': time_seconds, 'urls_per_second': urls_per_second, 'urls_per_worker': urls_per_worker, 'memory_start': mem_start, 'memory_end': mem_end, 'memory_growth': mem_growth }) # Sort data by timestamp if possible if VISUALIZATION_AVAILABLE: # Convert to DataFrame and sort by timestamp df = pd.DataFrame(rows) if 'timestamp' in df.columns and not df['timestamp'].isna().all(): df = df.sort_values('timestamp', ascending=False) else: # Simple sorting without pandas rows.sort(key=lambda x: x.get('timestamp', datetime.now()), reverse=True) df = None # Generate HTML report html = [] html.append('') html.append('') html.append('
') html.append('') html.append('') html.append(f'Report generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
') # Summary section html.append('This report compares the performance of Crawl4AI across multiple test runs.
') # Summary metrics data_available = (VISUALIZATION_AVAILABLE and df is not None and not df.empty) or (not VISUALIZATION_AVAILABLE and len(rows) > 0) if data_available: # Get the latest test data if VISUALIZATION_AVAILABLE and df is not None and not df.empty: latest_test = df.iloc[0] latest_id = latest_test['test_id'] else: latest_test = rows[0] # First row (already sorted by timestamp) latest_id = latest_test['test_id'] html.append('Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.
') html.append('Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.
') html.append('| {col} | ') html.append('||||||||
|---|---|---|---|---|---|---|---|---|
| {row["test_id"]} | ') html.append(f'{row["date"]} | ') html.append(f'{row["urls"]} | ') html.append(f'{row["workers"]} | ') html.append(f'{row["success_rate"]:.1f}% | ') html.append(f'{row["time_seconds"]:.2f} | ') html.append(f'{row["urls_per_second"]:.1f} | ') # Memory growth cell if pd.notna(row["memory_growth"]): html.append(f'{row["memory_growth"]:.1f} | ') else: html.append('N/A | ') html.append('
| {row["test_id"]} | ') html.append(f'{row["date"]} | ') html.append(f'{row["urls"]} | ') html.append(f'{row["workers"]} | ') html.append(f'{row["success_rate"]:.1f}% | ') html.append(f'{row["time_seconds"]:.2f} | ') html.append(f'{row["urls_per_second"]:.1f} | ') # Memory growth cell if row["memory_growth"] is not None: html.append(f'{row["memory_growth"]:.1f} | ') else: html.append('N/A | ') html.append('
Overall performance has {trend_desc} over the test period.
') html.append(f'Average throughput: {avg_urls_per_sec:.1f} URLs/second
') html.append(f'Maximum throughput: {max_urls_per_sec:.1f} URLs/second
') # Memory leak assessment if 'memory_growth' in df.columns and not df['memory_growth'].isna().all(): avg_growth = df['memory_growth'].mean() max_growth = df['memory_growth'].max() if avg_growth < 5: leak_assessment = "No significant memory leaks detected" leak_class = "status-good" elif avg_growth < 10: leak_assessment = "Minor memory growth observed" leak_class = "status-warning" else: leak_assessment = "Potential memory leak detected" leak_class = "status-bad" html.append(f'{leak_assessment}. Average memory growth: {avg_growth:.1f} MB per test.
') else: # Manual calculations without pandas if rows: # Calculate average and max throughput total_urls_per_sec = sum(row['urls_per_second'] for row in rows) avg_urls_per_sec = total_urls_per_sec / len(rows) max_urls_per_sec = max(row['urls_per_second'] for row in rows) html.append(f'Average throughput: {avg_urls_per_sec:.1f} URLs/second
') html.append(f'Maximum throughput: {max_urls_per_sec:.1f} URLs/second
') # Memory assessment (simplified without pandas) growth_values = [row['memory_growth'] for row in rows if row['memory_growth'] is not None] if growth_values: avg_growth = sum(growth_values) / len(growth_values) if avg_growth < 5: leak_assessment = "No significant memory leaks detected" leak_class = "status-good" elif avg_growth < 10: leak_assessment = "Minor memory growth observed" leak_class = "status-warning" else: leak_assessment = "Potential memory leak detected" leak_class = "status-bad" html.append(f'{leak_assessment}. Average memory growth: {avg_growth:.1f} MB per test.
') else: html.append('No test data available for analysis.
') html.append('Generated by Crawl4AI Benchmark Reporter
') html.append('