feat(tests): implement high volume stress testing framework
Add comprehensive stress testing solution for SDK using arun_many and dispatcher system: - Create test_stress_sdk.py for running high volume crawl tests - Add run_benchmark.py for orchestrating tests with predefined configs - Implement benchmark_report.py for generating performance reports - Add memory tracking and local test site generation - Support both streaming and batch processing modes - Add detailed documentation in README.md The framework enables testing SDK performance, concurrency handling, and memory behavior under high-volume scenarios.
This commit is contained in:
887
tests/memory/benchmark_report.py
Executable file
887
tests/memory/benchmark_report.py
Executable file
@@ -0,0 +1,887 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Benchmark reporting tool for Crawl4AI stress tests.
|
||||
Generates visual reports and comparisons between test runs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import glob
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
|
||||
# Initialize rich console
|
||||
console = Console()
|
||||
|
||||
# Try to import optional visualization dependencies
|
||||
VISUALIZATION_AVAILABLE = True
|
||||
try:
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib as mpl
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
except ImportError:
|
||||
VISUALIZATION_AVAILABLE = False
|
||||
console.print("[yellow]Warning: Visualization dependencies not found. Install with:[/yellow]")
|
||||
console.print("[yellow]pip install pandas matplotlib seaborn[/yellow]")
|
||||
console.print("[yellow]Only text-based reports will be generated.[/yellow]")
|
||||
|
||||
# Configure plotting if available
|
||||
if VISUALIZATION_AVAILABLE:
|
||||
# Set plot style for dark theme
|
||||
plt.style.use('dark_background')
|
||||
sns.set_theme(style="darkgrid")
|
||||
|
||||
# Custom color palette based on Nord theme
|
||||
nord_palette = ["#88c0d0", "#81a1c1", "#a3be8c", "#ebcb8b", "#bf616a", "#b48ead", "#5e81ac"]
|
||||
sns.set_palette(nord_palette)
|
||||
|
||||
class BenchmarkReporter:
|
||||
"""Generates visual reports and comparisons for Crawl4AI stress tests."""
|
||||
|
||||
def __init__(self, reports_dir="reports", output_dir="benchmark_reports"):
|
||||
"""Initialize the benchmark reporter.
|
||||
|
||||
Args:
|
||||
reports_dir: Directory containing test result files
|
||||
output_dir: Directory to save generated reports
|
||||
"""
|
||||
self.reports_dir = Path(reports_dir)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Configure matplotlib if available
|
||||
if VISUALIZATION_AVAILABLE:
|
||||
# Ensure the matplotlib backend works in headless environments
|
||||
mpl.use('Agg')
|
||||
|
||||
# Set up styling for plots with dark theme
|
||||
mpl.rcParams['figure.figsize'] = (12, 8)
|
||||
mpl.rcParams['font.size'] = 12
|
||||
mpl.rcParams['axes.labelsize'] = 14
|
||||
mpl.rcParams['axes.titlesize'] = 16
|
||||
mpl.rcParams['xtick.labelsize'] = 12
|
||||
mpl.rcParams['ytick.labelsize'] = 12
|
||||
mpl.rcParams['legend.fontsize'] = 12
|
||||
mpl.rcParams['figure.facecolor'] = '#1e1e1e'
|
||||
mpl.rcParams['axes.facecolor'] = '#2e3440'
|
||||
mpl.rcParams['savefig.facecolor'] = '#1e1e1e'
|
||||
mpl.rcParams['text.color'] = '#e0e0e0'
|
||||
mpl.rcParams['axes.labelcolor'] = '#e0e0e0'
|
||||
mpl.rcParams['xtick.color'] = '#e0e0e0'
|
||||
mpl.rcParams['ytick.color'] = '#e0e0e0'
|
||||
mpl.rcParams['grid.color'] = '#444444'
|
||||
mpl.rcParams['figure.edgecolor'] = '#444444'
|
||||
|
||||
def load_test_results(self, limit=None):
|
||||
"""Load all test results from the reports directory.
|
||||
|
||||
Args:
|
||||
limit: Optional limit on number of most recent tests to load
|
||||
|
||||
Returns:
|
||||
Dictionary mapping test IDs to result data
|
||||
"""
|
||||
result_files = glob.glob(str(self.reports_dir / "test_results_*.json"))
|
||||
|
||||
# Sort files by modification time (newest first)
|
||||
result_files.sort(key=os.path.getmtime, reverse=True)
|
||||
|
||||
if limit:
|
||||
result_files = result_files[:limit]
|
||||
|
||||
results = {}
|
||||
for file_path in result_files:
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
test_id = data.get('test_id')
|
||||
if test_id:
|
||||
results[test_id] = data
|
||||
|
||||
# Try to load the corresponding memory samples
|
||||
csv_path = self.reports_dir / f"memory_samples_{test_id}.csv"
|
||||
if csv_path.exists():
|
||||
try:
|
||||
memory_df = pd.read_csv(csv_path)
|
||||
results[test_id]['memory_samples'] = memory_df
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Warning: Could not load memory samples for {test_id}: {e}[/yellow]")
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error loading {file_path}: {e}[/red]")
|
||||
|
||||
console.print(f"Loaded {len(results)} test results")
|
||||
return results
|
||||
|
||||
def generate_summary_table(self, results):
|
||||
"""Generate a summary table of test results.
|
||||
|
||||
Args:
|
||||
results: Dictionary mapping test IDs to result data
|
||||
|
||||
Returns:
|
||||
Rich Table object
|
||||
"""
|
||||
table = Table(title="Crawl4AI Stress Test Summary", show_header=True)
|
||||
|
||||
# Define columns
|
||||
table.add_column("Test ID", style="cyan")
|
||||
table.add_column("Date", style="bright_green")
|
||||
table.add_column("URLs", justify="right")
|
||||
table.add_column("Workers", justify="right")
|
||||
table.add_column("Success %", justify="right")
|
||||
table.add_column("Time (s)", justify="right")
|
||||
table.add_column("Mem Growth", justify="right")
|
||||
table.add_column("URLs/sec", justify="right")
|
||||
|
||||
# Add rows
|
||||
for test_id, data in sorted(results.items(), key=lambda x: x[0], reverse=True):
|
||||
# Parse timestamp from test_id
|
||||
try:
|
||||
date_str = datetime.strptime(test_id, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M")
|
||||
except:
|
||||
date_str = "Unknown"
|
||||
|
||||
# Calculate success percentage
|
||||
total_urls = data.get('url_count', 0)
|
||||
successful = data.get('successful_urls', 0)
|
||||
success_pct = (successful / total_urls * 100) if total_urls > 0 else 0
|
||||
|
||||
# Calculate memory growth if available
|
||||
mem_growth = "N/A"
|
||||
if 'memory_samples' in data:
|
||||
samples = data['memory_samples']
|
||||
if len(samples) >= 2:
|
||||
# Try to extract numeric values from memory_info strings
|
||||
try:
|
||||
first_mem = float(samples.iloc[0]['memory_info'].split()[0])
|
||||
last_mem = float(samples.iloc[-1]['memory_info'].split()[0])
|
||||
mem_growth = f"{last_mem - first_mem:.1f} MB"
|
||||
except:
|
||||
pass
|
||||
|
||||
# Calculate URLs per second
|
||||
time_taken = data.get('total_time_seconds', 0)
|
||||
urls_per_sec = total_urls / time_taken if time_taken > 0 else 0
|
||||
|
||||
table.add_row(
|
||||
test_id,
|
||||
date_str,
|
||||
str(total_urls),
|
||||
str(data.get('workers', 'N/A')),
|
||||
f"{success_pct:.1f}%",
|
||||
f"{data.get('total_time_seconds', 0):.2f}",
|
||||
mem_growth,
|
||||
f"{urls_per_sec:.1f}"
|
||||
)
|
||||
|
||||
return table
|
||||
|
||||
def generate_performance_chart(self, results, output_file=None):
|
||||
"""Generate a performance comparison chart.
|
||||
|
||||
Args:
|
||||
results: Dictionary mapping test IDs to result data
|
||||
output_file: File path to save the chart
|
||||
|
||||
Returns:
|
||||
Path to the saved chart file or None if visualization is not available
|
||||
"""
|
||||
if not VISUALIZATION_AVAILABLE:
|
||||
console.print("[yellow]Skipping performance chart - visualization dependencies not available[/yellow]")
|
||||
return None
|
||||
|
||||
# Extract relevant data
|
||||
data = []
|
||||
for test_id, result in results.items():
|
||||
urls = result.get('url_count', 0)
|
||||
workers = result.get('workers', 0)
|
||||
time_taken = result.get('total_time_seconds', 0)
|
||||
urls_per_sec = urls / time_taken if time_taken > 0 else 0
|
||||
|
||||
# Parse timestamp from test_id for sorting
|
||||
try:
|
||||
timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S")
|
||||
data.append({
|
||||
'test_id': test_id,
|
||||
'timestamp': timestamp,
|
||||
'urls': urls,
|
||||
'workers': workers,
|
||||
'time_seconds': time_taken,
|
||||
'urls_per_sec': urls_per_sec
|
||||
})
|
||||
except:
|
||||
console.print(f"[yellow]Warning: Could not parse timestamp from {test_id}[/yellow]")
|
||||
|
||||
if not data:
|
||||
console.print("[yellow]No valid data for performance chart[/yellow]")
|
||||
return None
|
||||
|
||||
# Convert to DataFrame and sort by timestamp
|
||||
df = pd.DataFrame(data)
|
||||
df = df.sort_values('timestamp')
|
||||
|
||||
# Create the plot
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
# Plot URLs per second as bars with properly set x-axis
|
||||
x_pos = range(len(df['test_id']))
|
||||
bars = ax1.bar(x_pos, df['urls_per_sec'], color='#88c0d0', alpha=0.8)
|
||||
ax1.set_ylabel('URLs per Second', color='#88c0d0')
|
||||
ax1.tick_params(axis='y', labelcolor='#88c0d0')
|
||||
|
||||
# Properly set x-axis labels
|
||||
ax1.set_xticks(x_pos)
|
||||
ax1.set_xticklabels(df['test_id'].tolist(), rotation=45, ha='right')
|
||||
|
||||
# Add worker count as text on each bar
|
||||
for i, bar in enumerate(bars):
|
||||
height = bar.get_height()
|
||||
workers = df.iloc[i]['workers']
|
||||
ax1.text(i, height + 0.1,
|
||||
f'W: {workers}', ha='center', va='bottom', fontsize=9, color='#e0e0e0')
|
||||
|
||||
# Add a second y-axis for total URLs
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(x_pos, df['urls'], '-', color='#bf616a', alpha=0.8, markersize=6, marker='o')
|
||||
ax2.set_ylabel('Total URLs', color='#bf616a')
|
||||
ax2.tick_params(axis='y', labelcolor='#bf616a')
|
||||
|
||||
# Set title and layout
|
||||
plt.title('Crawl4AI Performance Benchmarks')
|
||||
plt.tight_layout()
|
||||
|
||||
# Save the figure
|
||||
if output_file is None:
|
||||
output_file = self.output_dir / "performance_comparison.png"
|
||||
plt.savefig(output_file, dpi=100, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
return output_file
|
||||
|
||||
def generate_memory_charts(self, results, output_prefix=None):
|
||||
"""Generate memory usage charts for each test.
|
||||
|
||||
Args:
|
||||
results: Dictionary mapping test IDs to result data
|
||||
output_prefix: Prefix for output file names
|
||||
|
||||
Returns:
|
||||
List of paths to the saved chart files
|
||||
"""
|
||||
if not VISUALIZATION_AVAILABLE:
|
||||
console.print("[yellow]Skipping memory charts - visualization dependencies not available[/yellow]")
|
||||
return []
|
||||
|
||||
output_files = []
|
||||
|
||||
for test_id, result in results.items():
|
||||
if 'memory_samples' not in result:
|
||||
continue
|
||||
|
||||
memory_df = result['memory_samples']
|
||||
|
||||
# Check if we have enough data points
|
||||
if len(memory_df) < 2:
|
||||
continue
|
||||
|
||||
# Try to extract numeric values from memory_info strings
|
||||
try:
|
||||
memory_values = []
|
||||
for mem_str in memory_df['memory_info']:
|
||||
# Extract the number from strings like "142.8 MB"
|
||||
value = float(mem_str.split()[0])
|
||||
memory_values.append(value)
|
||||
|
||||
memory_df['memory_mb'] = memory_values
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Could not parse memory values for {test_id}: {e}[/yellow]")
|
||||
continue
|
||||
|
||||
# Create the plot
|
||||
plt.figure(figsize=(10, 6))
|
||||
|
||||
# Plot memory usage over time
|
||||
plt.plot(memory_df['elapsed_seconds'], memory_df['memory_mb'],
|
||||
color='#88c0d0', marker='o', linewidth=2, markersize=4)
|
||||
|
||||
# Add annotations for chunk processing
|
||||
chunk_size = result.get('chunk_size', 0)
|
||||
url_count = result.get('url_count', 0)
|
||||
if chunk_size > 0 and url_count > 0:
|
||||
# Estimate chunk processing times
|
||||
num_chunks = (url_count + chunk_size - 1) // chunk_size # Ceiling division
|
||||
total_time = result.get('total_time_seconds', memory_df['elapsed_seconds'].max())
|
||||
chunk_times = np.linspace(0, total_time, num_chunks + 1)[1:]
|
||||
|
||||
for i, time_point in enumerate(chunk_times):
|
||||
if time_point <= memory_df['elapsed_seconds'].max():
|
||||
plt.axvline(x=time_point, color='#4c566a', linestyle='--', alpha=0.6)
|
||||
plt.text(time_point, memory_df['memory_mb'].min(), f'Chunk {i+1}',
|
||||
rotation=90, verticalalignment='bottom', fontsize=8, color='#e0e0e0')
|
||||
|
||||
# Set labels and title
|
||||
plt.xlabel('Elapsed Time (seconds)', color='#e0e0e0')
|
||||
plt.ylabel('Memory Usage (MB)', color='#e0e0e0')
|
||||
plt.title(f'Memory Usage During Test {test_id}\n({url_count} URLs, {result.get("workers", "?")} Workers)',
|
||||
color='#e0e0e0')
|
||||
|
||||
# Add grid and set y-axis to start from zero
|
||||
plt.grid(True, alpha=0.3, color='#4c566a')
|
||||
|
||||
# Add test metadata as text
|
||||
info_text = (
|
||||
f"URLs: {url_count}\n"
|
||||
f"Workers: {result.get('workers', 'N/A')}\n"
|
||||
f"Chunk Size: {result.get('chunk_size', 'N/A')}\n"
|
||||
f"Total Time: {result.get('total_time_seconds', 0):.2f}s\n"
|
||||
)
|
||||
|
||||
# Calculate memory growth
|
||||
if len(memory_df) >= 2:
|
||||
first_mem = memory_df.iloc[0]['memory_mb']
|
||||
last_mem = memory_df.iloc[-1]['memory_mb']
|
||||
growth = last_mem - first_mem
|
||||
growth_rate = growth / result.get('total_time_seconds', 1)
|
||||
|
||||
info_text += f"Memory Growth: {growth:.1f} MB\n"
|
||||
info_text += f"Growth Rate: {growth_rate:.2f} MB/s"
|
||||
|
||||
plt.figtext(0.02, 0.02, info_text, fontsize=9, color='#e0e0e0',
|
||||
bbox=dict(facecolor='#3b4252', alpha=0.8, edgecolor='#4c566a'))
|
||||
|
||||
# Save the figure
|
||||
if output_prefix is None:
|
||||
output_file = self.output_dir / f"memory_chart_{test_id}.png"
|
||||
else:
|
||||
output_file = Path(f"{output_prefix}_memory_{test_id}.png")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_file, dpi=100, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
output_files.append(output_file)
|
||||
|
||||
return output_files
|
||||
|
||||
def generate_comparison_report(self, results, title=None, output_file=None):
|
||||
"""Generate a comprehensive comparison report of multiple test runs.
|
||||
|
||||
Args:
|
||||
results: Dictionary mapping test IDs to result data
|
||||
title: Optional title for the report
|
||||
output_file: File path to save the report
|
||||
|
||||
Returns:
|
||||
Path to the saved report file
|
||||
"""
|
||||
if not results:
|
||||
console.print("[yellow]No results to generate comparison report[/yellow]")
|
||||
return None
|
||||
|
||||
if output_file is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_file = self.output_dir / f"comparison_report_{timestamp}.html"
|
||||
|
||||
# Create data for the report
|
||||
rows = []
|
||||
for test_id, data in results.items():
|
||||
# Calculate metrics
|
||||
urls = data.get('url_count', 0)
|
||||
workers = data.get('workers', 0)
|
||||
successful = data.get('successful_urls', 0)
|
||||
failed = data.get('failed_urls', 0)
|
||||
time_seconds = data.get('total_time_seconds', 0)
|
||||
|
||||
# Calculate additional metrics
|
||||
success_rate = (successful / urls) * 100 if urls > 0 else 0
|
||||
urls_per_second = urls / time_seconds if time_seconds > 0 else 0
|
||||
urls_per_worker = urls / workers if workers > 0 else 0
|
||||
|
||||
# Calculate memory growth if available
|
||||
mem_start = None
|
||||
mem_end = None
|
||||
mem_growth = None
|
||||
if 'memory_samples' in data:
|
||||
samples = data['memory_samples']
|
||||
if len(samples) >= 2:
|
||||
try:
|
||||
first_mem = float(samples.iloc[0]['memory_info'].split()[0])
|
||||
last_mem = float(samples.iloc[-1]['memory_info'].split()[0])
|
||||
mem_start = first_mem
|
||||
mem_end = last_mem
|
||||
mem_growth = last_mem - first_mem
|
||||
except:
|
||||
pass
|
||||
|
||||
# Parse timestamp from test_id
|
||||
try:
|
||||
timestamp = datetime.strptime(test_id, "%Y%m%d_%H%M%S")
|
||||
except:
|
||||
timestamp = None
|
||||
|
||||
rows.append({
|
||||
'test_id': test_id,
|
||||
'timestamp': timestamp,
|
||||
'date': timestamp.strftime("%Y-%m-%d %H:%M:%S") if timestamp else "Unknown",
|
||||
'urls': urls,
|
||||
'workers': workers,
|
||||
'chunk_size': data.get('chunk_size', 0),
|
||||
'successful': successful,
|
||||
'failed': failed,
|
||||
'success_rate': success_rate,
|
||||
'time_seconds': time_seconds,
|
||||
'urls_per_second': urls_per_second,
|
||||
'urls_per_worker': urls_per_worker,
|
||||
'memory_start': mem_start,
|
||||
'memory_end': mem_end,
|
||||
'memory_growth': mem_growth
|
||||
})
|
||||
|
||||
# Sort data by timestamp if possible
|
||||
if VISUALIZATION_AVAILABLE:
|
||||
# Convert to DataFrame and sort by timestamp
|
||||
df = pd.DataFrame(rows)
|
||||
if 'timestamp' in df.columns and not df['timestamp'].isna().all():
|
||||
df = df.sort_values('timestamp', ascending=False)
|
||||
else:
|
||||
# Simple sorting without pandas
|
||||
rows.sort(key=lambda x: x.get('timestamp', datetime.now()), reverse=True)
|
||||
df = None
|
||||
|
||||
# Generate HTML report
|
||||
html = []
|
||||
html.append('<!DOCTYPE html>')
|
||||
html.append('<html lang="en">')
|
||||
html.append('<head>')
|
||||
html.append('<meta charset="UTF-8">')
|
||||
html.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
||||
html.append(f'<title>{title or "Crawl4AI Benchmark Comparison"}</title>')
|
||||
html.append('<style>')
|
||||
html.append('''
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
color: #e0e0e0;
|
||||
background-color: #1e1e1e;
|
||||
}
|
||||
h1, h2, h3 {
|
||||
color: #81a1c1;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
th, td {
|
||||
text-align: left;
|
||||
padding: 12px;
|
||||
border-bottom: 1px solid #444;
|
||||
}
|
||||
th {
|
||||
background-color: #2e3440;
|
||||
font-weight: bold;
|
||||
}
|
||||
tr:hover {
|
||||
background-color: #2e3440;
|
||||
}
|
||||
a {
|
||||
color: #88c0d0;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
.chart-container {
|
||||
margin: 30px 0;
|
||||
text-align: center;
|
||||
background-color: #2e3440;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.chart-container img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border: 1px solid #444;
|
||||
box-shadow: 0 0 10px rgba(0,0,0,0.3);
|
||||
}
|
||||
.card {
|
||||
border: 1px solid #444;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
margin-bottom: 20px;
|
||||
background-color: #2e3440;
|
||||
box-shadow: 0 0 10px rgba(0,0,0,0.2);
|
||||
}
|
||||
.highlight {
|
||||
background-color: #3b4252;
|
||||
font-weight: bold;
|
||||
}
|
||||
.status-good {
|
||||
color: #a3be8c;
|
||||
}
|
||||
.status-warning {
|
||||
color: #ebcb8b;
|
||||
}
|
||||
.status-bad {
|
||||
color: #bf616a;
|
||||
}
|
||||
''')
|
||||
html.append('</style>')
|
||||
html.append('</head>')
|
||||
html.append('<body>')
|
||||
|
||||
# Header
|
||||
html.append(f'<h1>{title or "Crawl4AI Benchmark Comparison"}</h1>')
|
||||
html.append(f'<p>Report generated on {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>')
|
||||
|
||||
# Summary section
|
||||
html.append('<div class="card">')
|
||||
html.append('<h2>Summary</h2>')
|
||||
html.append('<p>This report compares the performance of Crawl4AI across multiple test runs.</p>')
|
||||
|
||||
# Summary metrics
|
||||
data_available = (VISUALIZATION_AVAILABLE and df is not None and not df.empty) or (not VISUALIZATION_AVAILABLE and len(rows) > 0)
|
||||
if data_available:
|
||||
# Get the latest test data
|
||||
if VISUALIZATION_AVAILABLE and df is not None and not df.empty:
|
||||
latest_test = df.iloc[0]
|
||||
latest_id = latest_test['test_id']
|
||||
else:
|
||||
latest_test = rows[0] # First row (already sorted by timestamp)
|
||||
latest_id = latest_test['test_id']
|
||||
|
||||
html.append('<h3>Latest Test Results</h3>')
|
||||
html.append('<ul>')
|
||||
html.append(f'<li><strong>Test ID:</strong> {latest_id}</li>')
|
||||
html.append(f'<li><strong>Date:</strong> {latest_test["date"]}</li>')
|
||||
html.append(f'<li><strong>URLs:</strong> {latest_test["urls"]}</li>')
|
||||
html.append(f'<li><strong>Workers:</strong> {latest_test["workers"]}</li>')
|
||||
html.append(f'<li><strong>Success Rate:</strong> {latest_test["success_rate"]:.1f}%</li>')
|
||||
html.append(f'<li><strong>Time:</strong> {latest_test["time_seconds"]:.2f} seconds</li>')
|
||||
html.append(f'<li><strong>Performance:</strong> {latest_test["urls_per_second"]:.1f} URLs/second</li>')
|
||||
|
||||
# Check memory growth (handle both pandas and dict mode)
|
||||
memory_growth_available = False
|
||||
if VISUALIZATION_AVAILABLE and df is not None:
|
||||
if pd.notna(latest_test["memory_growth"]):
|
||||
html.append(f'<li><strong>Memory Growth:</strong> {latest_test["memory_growth"]:.1f} MB</li>')
|
||||
memory_growth_available = True
|
||||
else:
|
||||
if latest_test["memory_growth"] is not None:
|
||||
html.append(f'<li><strong>Memory Growth:</strong> {latest_test["memory_growth"]:.1f} MB</li>')
|
||||
memory_growth_available = True
|
||||
|
||||
html.append('</ul>')
|
||||
|
||||
# If we have more than one test, show trend
|
||||
if (VISUALIZATION_AVAILABLE and df is not None and len(df) > 1) or (not VISUALIZATION_AVAILABLE and len(rows) > 1):
|
||||
if VISUALIZATION_AVAILABLE and df is not None:
|
||||
prev_test = df.iloc[1]
|
||||
else:
|
||||
prev_test = rows[1]
|
||||
|
||||
# Calculate performance change
|
||||
perf_change = ((latest_test["urls_per_second"] / prev_test["urls_per_second"]) - 1) * 100 if prev_test["urls_per_second"] > 0 else 0
|
||||
|
||||
status_class = ""
|
||||
if perf_change > 5:
|
||||
status_class = "status-good"
|
||||
elif perf_change < -5:
|
||||
status_class = "status-bad"
|
||||
|
||||
html.append('<h3>Performance Trend</h3>')
|
||||
html.append('<ul>')
|
||||
html.append(f'<li><strong>Performance Change:</strong> <span class="{status_class}">{perf_change:+.1f}%</span> compared to previous test</li>')
|
||||
|
||||
# Memory trend if available
|
||||
memory_trend_available = False
|
||||
if VISUALIZATION_AVAILABLE and df is not None:
|
||||
if pd.notna(latest_test["memory_growth"]) and pd.notna(prev_test["memory_growth"]):
|
||||
mem_change = latest_test["memory_growth"] - prev_test["memory_growth"]
|
||||
memory_trend_available = True
|
||||
else:
|
||||
if latest_test["memory_growth"] is not None and prev_test["memory_growth"] is not None:
|
||||
mem_change = latest_test["memory_growth"] - prev_test["memory_growth"]
|
||||
memory_trend_available = True
|
||||
|
||||
if memory_trend_available:
|
||||
mem_status = ""
|
||||
if mem_change < -1: # Improved (less growth)
|
||||
mem_status = "status-good"
|
||||
elif mem_change > 1: # Worse (more growth)
|
||||
mem_status = "status-bad"
|
||||
|
||||
html.append(f'<li><strong>Memory Trend:</strong> <span class="{mem_status}">{mem_change:+.1f} MB</span> change in memory growth</li>')
|
||||
|
||||
html.append('</ul>')
|
||||
|
||||
html.append('</div>')
|
||||
|
||||
# Generate performance chart if visualization is available
|
||||
if VISUALIZATION_AVAILABLE:
|
||||
perf_chart = self.generate_performance_chart(results)
|
||||
if perf_chart:
|
||||
html.append('<div class="chart-container">')
|
||||
html.append('<h2>Performance Comparison</h2>')
|
||||
html.append(f'<img src="{os.path.relpath(perf_chart, os.path.dirname(output_file))}" alt="Performance Comparison Chart">')
|
||||
html.append('</div>')
|
||||
else:
|
||||
html.append('<div class="chart-container">')
|
||||
html.append('<h2>Performance Comparison</h2>')
|
||||
html.append('<p>Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.</p>')
|
||||
html.append('</div>')
|
||||
|
||||
# Generate memory charts if visualization is available
|
||||
if VISUALIZATION_AVAILABLE:
|
||||
memory_charts = self.generate_memory_charts(results)
|
||||
if memory_charts:
|
||||
html.append('<div class="chart-container">')
|
||||
html.append('<h2>Memory Usage</h2>')
|
||||
|
||||
for chart in memory_charts:
|
||||
test_id = chart.stem.split('_')[-1]
|
||||
html.append(f'<h3>Test {test_id}</h3>')
|
||||
html.append(f'<img src="{os.path.relpath(chart, os.path.dirname(output_file))}" alt="Memory Chart for {test_id}">')
|
||||
|
||||
html.append('</div>')
|
||||
else:
|
||||
html.append('<div class="chart-container">')
|
||||
html.append('<h2>Memory Usage</h2>')
|
||||
html.append('<p>Charts not available - install visualization dependencies (pandas, matplotlib, seaborn) to enable.</p>')
|
||||
html.append('</div>')
|
||||
|
||||
# Detailed results table
|
||||
html.append('<h2>Detailed Results</h2>')
|
||||
|
||||
# Add the results as an HTML table
|
||||
html.append('<table>')
|
||||
|
||||
# Table headers
|
||||
html.append('<tr>')
|
||||
for col in ['Test ID', 'Date', 'URLs', 'Workers', 'Success %', 'Time (s)', 'URLs/sec', 'Mem Growth (MB)']:
|
||||
html.append(f'<th>{col}</th>')
|
||||
html.append('</tr>')
|
||||
|
||||
# Table rows - handle both pandas DataFrame and list of dicts
|
||||
if VISUALIZATION_AVAILABLE and df is not None:
|
||||
# Using pandas DataFrame
|
||||
for _, row in df.iterrows():
|
||||
html.append('<tr>')
|
||||
html.append(f'<td>{row["test_id"]}</td>')
|
||||
html.append(f'<td>{row["date"]}</td>')
|
||||
html.append(f'<td>{row["urls"]}</td>')
|
||||
html.append(f'<td>{row["workers"]}</td>')
|
||||
html.append(f'<td>{row["success_rate"]:.1f}%</td>')
|
||||
html.append(f'<td>{row["time_seconds"]:.2f}</td>')
|
||||
html.append(f'<td>{row["urls_per_second"]:.1f}</td>')
|
||||
|
||||
# Memory growth cell
|
||||
if pd.notna(row["memory_growth"]):
|
||||
html.append(f'<td>{row["memory_growth"]:.1f}</td>')
|
||||
else:
|
||||
html.append('<td>N/A</td>')
|
||||
|
||||
html.append('</tr>')
|
||||
else:
|
||||
# Using list of dicts (when pandas is not available)
|
||||
for row in rows:
|
||||
html.append('<tr>')
|
||||
html.append(f'<td>{row["test_id"]}</td>')
|
||||
html.append(f'<td>{row["date"]}</td>')
|
||||
html.append(f'<td>{row["urls"]}</td>')
|
||||
html.append(f'<td>{row["workers"]}</td>')
|
||||
html.append(f'<td>{row["success_rate"]:.1f}%</td>')
|
||||
html.append(f'<td>{row["time_seconds"]:.2f}</td>')
|
||||
html.append(f'<td>{row["urls_per_second"]:.1f}</td>')
|
||||
|
||||
# Memory growth cell
|
||||
if row["memory_growth"] is not None:
|
||||
html.append(f'<td>{row["memory_growth"]:.1f}</td>')
|
||||
else:
|
||||
html.append('<td>N/A</td>')
|
||||
|
||||
html.append('</tr>')
|
||||
|
||||
html.append('</table>')
|
||||
|
||||
# Conclusion section
|
||||
html.append('<div class="card">')
|
||||
html.append('<h2>Conclusion</h2>')
|
||||
|
||||
if VISUALIZATION_AVAILABLE and df is not None and not df.empty:
|
||||
# Using pandas for statistics (when available)
|
||||
# Calculate some overall statistics
|
||||
avg_urls_per_sec = df['urls_per_second'].mean()
|
||||
max_urls_per_sec = df['urls_per_second'].max()
|
||||
|
||||
# Determine if we have a trend
|
||||
if len(df) > 1:
|
||||
trend_data = df.sort_values('timestamp')
|
||||
first_perf = trend_data.iloc[0]['urls_per_second']
|
||||
last_perf = trend_data.iloc[-1]['urls_per_second']
|
||||
|
||||
perf_change = ((last_perf / first_perf) - 1) * 100 if first_perf > 0 else 0
|
||||
|
||||
if perf_change > 10:
|
||||
trend_desc = "significantly improved"
|
||||
trend_class = "status-good"
|
||||
elif perf_change > 5:
|
||||
trend_desc = "improved"
|
||||
trend_class = "status-good"
|
||||
elif perf_change < -10:
|
||||
trend_desc = "significantly decreased"
|
||||
trend_class = "status-bad"
|
||||
elif perf_change < -5:
|
||||
trend_desc = "decreased"
|
||||
trend_class = "status-bad"
|
||||
else:
|
||||
trend_desc = "remained stable"
|
||||
trend_class = ""
|
||||
|
||||
html.append(f'<p>Overall performance has <span class="{trend_class}">{trend_desc}</span> over the test period.</p>')
|
||||
|
||||
html.append(f'<p>Average throughput: <strong>{avg_urls_per_sec:.1f}</strong> URLs/second</p>')
|
||||
html.append(f'<p>Maximum throughput: <strong>{max_urls_per_sec:.1f}</strong> URLs/second</p>')
|
||||
|
||||
# Memory leak assessment
|
||||
if 'memory_growth' in df.columns and not df['memory_growth'].isna().all():
|
||||
avg_growth = df['memory_growth'].mean()
|
||||
max_growth = df['memory_growth'].max()
|
||||
|
||||
if avg_growth < 5:
|
||||
leak_assessment = "No significant memory leaks detected"
|
||||
leak_class = "status-good"
|
||||
elif avg_growth < 10:
|
||||
leak_assessment = "Minor memory growth observed"
|
||||
leak_class = "status-warning"
|
||||
else:
|
||||
leak_assessment = "Potential memory leak detected"
|
||||
leak_class = "status-bad"
|
||||
|
||||
html.append(f'<p><span class="{leak_class}">{leak_assessment}</span>. Average memory growth: <strong>{avg_growth:.1f} MB</strong> per test.</p>')
|
||||
else:
|
||||
# Manual calculations without pandas
|
||||
if rows:
|
||||
# Calculate average and max throughput
|
||||
total_urls_per_sec = sum(row['urls_per_second'] for row in rows)
|
||||
avg_urls_per_sec = total_urls_per_sec / len(rows)
|
||||
max_urls_per_sec = max(row['urls_per_second'] for row in rows)
|
||||
|
||||
html.append(f'<p>Average throughput: <strong>{avg_urls_per_sec:.1f}</strong> URLs/second</p>')
|
||||
html.append(f'<p>Maximum throughput: <strong>{max_urls_per_sec:.1f}</strong> URLs/second</p>')
|
||||
|
||||
# Memory assessment (simplified without pandas)
|
||||
growth_values = [row['memory_growth'] for row in rows if row['memory_growth'] is not None]
|
||||
if growth_values:
|
||||
avg_growth = sum(growth_values) / len(growth_values)
|
||||
|
||||
if avg_growth < 5:
|
||||
leak_assessment = "No significant memory leaks detected"
|
||||
leak_class = "status-good"
|
||||
elif avg_growth < 10:
|
||||
leak_assessment = "Minor memory growth observed"
|
||||
leak_class = "status-warning"
|
||||
else:
|
||||
leak_assessment = "Potential memory leak detected"
|
||||
leak_class = "status-bad"
|
||||
|
||||
html.append(f'<p><span class="{leak_class}">{leak_assessment}</span>. Average memory growth: <strong>{avg_growth:.1f} MB</strong> per test.</p>')
|
||||
else:
|
||||
html.append('<p>No test data available for analysis.</p>')
|
||||
|
||||
html.append('</div>')
|
||||
|
||||
# Footer
|
||||
html.append('<div style="margin-top: 30px; text-align: center; color: #777; font-size: 0.9em;">')
|
||||
html.append('<p>Generated by Crawl4AI Benchmark Reporter</p>')
|
||||
html.append('</div>')
|
||||
|
||||
html.append('</body>')
|
||||
html.append('</html>')
|
||||
|
||||
# Write the HTML file
|
||||
with open(output_file, 'w') as f:
|
||||
f.write('\n'.join(html))
|
||||
|
||||
# Print a clickable link for terminals that support it (iTerm, VS Code, etc.)
|
||||
file_url = f"file://{os.path.abspath(output_file)}"
|
||||
console.print(f"[green]Comparison report saved to: {output_file}[/green]")
|
||||
console.print(f"[blue underline]Click to open report: {file_url}[/blue underline]")
|
||||
return output_file
|
||||
|
||||
def run(self, limit=None, output_file=None):
|
||||
"""Generate a full benchmark report.
|
||||
|
||||
Args:
|
||||
limit: Optional limit on number of most recent tests to include
|
||||
output_file: Optional output file path
|
||||
|
||||
Returns:
|
||||
Path to the generated report file
|
||||
"""
|
||||
# Load test results
|
||||
results = self.load_test_results(limit=limit)
|
||||
|
||||
if not results:
|
||||
console.print("[yellow]No test results found. Run some tests first.[/yellow]")
|
||||
return None
|
||||
|
||||
# Generate and display summary table
|
||||
summary_table = self.generate_summary_table(results)
|
||||
console.print(summary_table)
|
||||
|
||||
# Generate comparison report
|
||||
title = f"Crawl4AI Benchmark Report ({len(results)} test runs)"
|
||||
report_file = self.generate_comparison_report(results, title=title, output_file=output_file)
|
||||
|
||||
if report_file:
|
||||
console.print(f"[bold green]Report generated successfully: {report_file}[/bold green]")
|
||||
return report_file
|
||||
else:
|
||||
console.print("[bold red]Failed to generate report[/bold red]")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the benchmark reporter."""
|
||||
parser = argparse.ArgumentParser(description="Generate benchmark reports for Crawl4AI stress tests")
|
||||
|
||||
parser.add_argument("--reports-dir", type=str, default="reports",
|
||||
help="Directory containing test result files")
|
||||
parser.add_argument("--output-dir", type=str, default="benchmark_reports",
|
||||
help="Directory to save generated reports")
|
||||
parser.add_argument("--limit", type=int, default=None,
|
||||
help="Limit to most recent N test results")
|
||||
parser.add_argument("--output-file", type=str, default=None,
|
||||
help="Custom output file path for the report")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create the benchmark reporter
|
||||
reporter = BenchmarkReporter(reports_dir=args.reports_dir, output_dir=args.output_dir)
|
||||
|
||||
# Generate the report
|
||||
report_file = reporter.run(limit=args.limit, output_file=args.output_file)
|
||||
|
||||
if report_file:
|
||||
print(f"Report generated at: {report_file}")
|
||||
return 0
|
||||
else:
|
||||
print("Failed to generate report")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user