feat(tests): implement high volume stress testing framework

Add comprehensive stress testing solution for SDK using arun_many and dispatcher system: - Create test_stress_sdk.py for running high volume crawl tests - Add run_benchmark.py for orchestrating tests with predefined configs - Implement benchmark_report.py for generating performance reports - Add memory tracking and local test site generation - Support both streaming and batch processing modes - Add detailed documentation in README.md The framework enables testing SDK performance, concurrency handling, and memory behavior under high-volume scenarios.
2025-04-17 22:31:51 +08:00
parent 94d486579c
commit 921e0c46b6
7 changed files with 2161 additions and 1 deletions
--- a/tests/memory/run_benchmark.py
+++ b/tests/memory/run_benchmark.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""
+Run a complete Crawl4AI benchmark test using test_stress_sdk.py and generate a report.
+"""
+
+import sys
+import os
+import glob
+import argparse
+import subprocess
+import time
+from datetime import datetime
+
+from rich.console import Console
+from rich.text import Text
+
+console = Console()
+
+# Updated TEST_CONFIGS to use max_sessions
+TEST_CONFIGS = {
+    "quick":   {"urls": 50,   "max_sessions": 4,  "chunk_size": 10, "description": "Quick test (50 URLs, 4 sessions)"},
+    "small":   {"urls": 100,  "max_sessions": 8,  "chunk_size": 20, "description": "Small test (100 URLs, 8 sessions)"},
+    "medium":  {"urls": 500,  "max_sessions": 16, "chunk_size": 50, "description": "Medium test (500 URLs, 16 sessions)"},
+    "large":   {"urls": 1000, "max_sessions": 32, "chunk_size": 100,"description": "Large test (1000 URLs, 32 sessions)"},
+    "extreme": {"urls": 2000, "max_sessions": 64, "chunk_size": 200,"description": "Extreme test (2000 URLs, 64 sessions)"},
+}
+
+# Arguments to forward directly if present in custom_args
+FORWARD_ARGS = {
+    "urls": "--urls",
+    "max_sessions": "--max-sessions",
+    "chunk_size": "--chunk-size",
+    "port": "--port",
+    "monitor_mode": "--monitor-mode",
+}
+# Boolean flags to forward if True
+FORWARD_FLAGS = {
+    "stream": "--stream",
+    "use_rate_limiter": "--use-rate-limiter",
+    "keep_server_alive": "--keep-server-alive",
+    "use_existing_site": "--use-existing-site",
+    "skip_generation": "--skip-generation",
+    "keep_site": "--keep-site",
+    "clean_reports": "--clean-reports", # Note: clean behavior is handled here, but pass flag if needed
+    "clean_site": "--clean-site",     # Note: clean behavior is handled here, but pass flag if needed
+}
+
+def run_benchmark(config_name, custom_args=None, compare=True, clean=False):
+    """Runs the stress test and optionally the report generator."""
+    if config_name not in TEST_CONFIGS and config_name != "custom":
+        console.print(f"[bold red]Unknown configuration: {config_name}[/bold red]")
+        return False
+
+    # Print header
+    title = "Crawl4AI SDK Benchmark Test"
+    if config_name != "custom":
+        title += f" - {TEST_CONFIGS[config_name]['description']}"
+    else:
+        # Safely get custom args for title
+        urls = custom_args.get('urls', '?') if custom_args else '?'
+        sessions = custom_args.get('max_sessions', '?') if custom_args else '?'
+        title += f" - Custom ({urls} URLs, {sessions} sessions)"
+
+    console.print(f"\n[bold blue]{title}[/bold blue]")
+    console.print("=" * (len(title) + 4)) # Adjust underline length
+
+    console.print("\n[bold white]Preparing test...[/bold white]")
+
+    # --- Command Construction ---
+    # Use the new script name
+    cmd = ["python", "test_stress_sdk.py"]
+
+    # Apply config or custom args
+    args_to_use = {}
+    if config_name != "custom":
+        args_to_use = TEST_CONFIGS[config_name].copy()
+        # If custom args are provided (e.g., boolean flags), overlay them
+        if custom_args:
+            args_to_use.update(custom_args)
+    elif custom_args: # Custom config
+        args_to_use = custom_args.copy()
+
+    # Add arguments with values
+    for key, arg_name in FORWARD_ARGS.items():
+        if key in args_to_use:
+            cmd.extend([arg_name, str(args_to_use[key])])
+
+    # Add boolean flags
+    for key, flag_name in FORWARD_FLAGS.items():
+        if args_to_use.get(key, False): # Check if key exists and is True
+             # Special handling for clean flags - apply locally, don't forward?
+             # Decide if test_stress_sdk.py also needs --clean flags or if run_benchmark handles it.
+             # For now, let's assume run_benchmark handles cleaning based on its own --clean flag.
+             # We'll forward other flags.
+            if key not in ["clean_reports", "clean_site"]:
+                 cmd.append(flag_name)
+
+    # Handle the top-level --clean flag for run_benchmark
+    if clean:
+        # Pass clean flags to the stress test script as well, if needed
+        # This assumes test_stress_sdk.py also uses --clean-reports and --clean-site
+        cmd.append("--clean-reports")
+        cmd.append("--clean-site")
+        console.print("[yellow]Applying --clean: Cleaning reports and site before test.[/yellow]")
+        # Actual cleaning logic might reside here or be delegated entirely
+
+    console.print(f"\n[bold white]Running stress test:[/bold white] {' '.join(cmd)}")
+    start = time.time()
+
+    # Execute the stress test script
+    # Use Popen to stream output
+    try:
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', errors='replace')
+        while True:
+            line = proc.stdout.readline()
+            if not line:
+                break
+            console.print(line.rstrip()) # Print line by line
+        proc.wait() # Wait for the process to complete
+    except FileNotFoundError:
+         console.print(f"[bold red]Error: Script 'test_stress_sdk.py' not found. Make sure it's in the correct directory.[/bold red]")
+         return False
+    except Exception as e:
+         console.print(f"[bold red]Error running stress test subprocess: {e}[/bold red]")
+         return False
+
+
+    if proc.returncode != 0:
+        console.print(f"[bold red]Stress test failed with exit code {proc.returncode}[/bold red]")
+        return False
+
+    duration = time.time() - start
+    console.print(f"[bold green]Stress test completed in {duration:.1f} seconds[/bold green]")
+
+    # --- Report Generation (Optional) ---
+    if compare:
+        # Assuming benchmark_report.py exists and works with the generated reports
+        report_script = "benchmark_report.py" # Keep configurable if needed
+        report_cmd = ["python", report_script]
+        console.print(f"\n[bold white]Generating benchmark report: {' '.join(report_cmd)}[/bold white]")
+
+        # Run the report command and capture output
+        try:
+             report_proc = subprocess.run(report_cmd, capture_output=True, text=True, check=False, encoding='utf-8', errors='replace') # Use check=False to handle potential errors
+
+             # Print the captured output from benchmark_report.py
+             if report_proc.stdout:
+                 console.print("\n" + report_proc.stdout)
+             if report_proc.stderr:
+                 console.print("[yellow]Report generator stderr:[/yellow]\n" + report_proc.stderr)
+
+             if report_proc.returncode != 0:
+                 console.print(f"[bold yellow]Benchmark report generation script '{report_script}' failed with exit code {report_proc.returncode}[/bold yellow]")
+                 # Don't return False here, test itself succeeded
+             else:
+                  console.print(f"[bold green]Benchmark report script '{report_script}' completed.[/bold green]")
+
+             # Find and print clickable links to the reports
+             # Assuming reports are saved in 'benchmark_reports' by benchmark_report.py
+             report_dir = "benchmark_reports"
+             if os.path.isdir(report_dir):
+                 report_files = glob.glob(os.path.join(report_dir, "comparison_report_*.html"))
+                 if report_files:
+                     try:
+                         latest_report = max(report_files, key=os.path.getctime)
+                         report_path = os.path.abspath(latest_report)
+                         report_url = pathlib.Path(report_path).as_uri() # Better way to create file URI
+                         console.print(f"[bold cyan]Click to open report: [link={report_url}]{report_url}[/link][/bold cyan]")
+                     except Exception as e:
+                          console.print(f"[yellow]Could not determine latest report: {e}[/yellow]")
+
+                 chart_files = glob.glob(os.path.join(report_dir, "memory_chart_*.png"))
+                 if chart_files:
+                      try:
+                         latest_chart = max(chart_files, key=os.path.getctime)
+                         chart_path = os.path.abspath(latest_chart)
+                         chart_url = pathlib.Path(chart_path).as_uri()
+                         console.print(f"[cyan]Memory chart: [link={chart_url}]{chart_url}[/link][/cyan]")
+                      except Exception as e:
+                           console.print(f"[yellow]Could not determine latest chart: {e}[/yellow]")
+             else:
+                  console.print(f"[yellow]Benchmark report directory '{report_dir}' not found. Cannot link reports.[/yellow]")
+
+        except FileNotFoundError:
+             console.print(f"[bold red]Error: Report script '{report_script}' not found.[/bold red]")
+        except Exception as e:
+             console.print(f"[bold red]Error running report generation subprocess: {e}[/bold red]")
+
+
+    # Prompt to exit
+    console.print("\n[bold green]Benchmark run finished. Press Enter to exit.[/bold green]")
+    try:
+        input() # Wait for user input
+    except EOFError:
+        pass # Handle case where input is piped or unavailable
+
+    return True
+
+def main():
+    parser = argparse.ArgumentParser(description="Run a Crawl4AI SDK benchmark test and generate a report")
+
+    # --- Arguments ---
+    parser.add_argument("config", choices=list(TEST_CONFIGS) + ["custom"],
+                        help="Test configuration: quick, small, medium, large, extreme, or custom")
+
+    # Arguments for 'custom' config or to override presets
+    parser.add_argument("--urls", type=int, help="Number of URLs")
+    parser.add_argument("--max-sessions", type=int, help="Max concurrent sessions (replaces --workers)")
+    parser.add_argument("--chunk-size", type=int, help="URLs per batch (for non-stream logging)")
+    parser.add_argument("--port", type=int, help="HTTP server port")
+    parser.add_argument("--monitor-mode", type=str, choices=["DETAILED", "AGGREGATED"], help="Monitor display mode")
+
+    # Boolean flags / options
+    parser.add_argument("--stream", action="store_true", help="Enable streaming results (disables batch logging)")
+    parser.add_argument("--use-rate-limiter", action="store_true", help="Enable basic rate limiter")
+    parser.add_argument("--no-report", action="store_true", help="Skip generating comparison report")
+    parser.add_argument("--clean", action="store_true", help="Clean up reports and site before running")
+    parser.add_argument("--keep-server-alive", action="store_true", help="Keep HTTP server running after test")
+    parser.add_argument("--use-existing-site", action="store_true", help="Use existing site on specified port")
+    parser.add_argument("--skip-generation", action="store_true", help="Use existing site files without regenerating")
+    parser.add_argument("--keep-site", action="store_true", help="Keep generated site files after test")
+    # Removed url_level_logging as it's implicitly handled by stream/batch mode now
+
+    args = parser.parse_args()
+
+    custom_args = {}
+
+    # Populate custom_args from explicit command-line args
+    if args.urls is not None: custom_args["urls"] = args.urls
+    if args.max_sessions is not None: custom_args["max_sessions"] = args.max_sessions
+    if args.chunk_size is not None: custom_args["chunk_size"] = args.chunk_size
+    if args.port is not None: custom_args["port"] = args.port
+    if args.monitor_mode is not None: custom_args["monitor_mode"] = args.monitor_mode
+    if args.stream: custom_args["stream"] = True
+    if args.use_rate_limiter: custom_args["use_rate_limiter"] = True
+    if args.keep_server_alive: custom_args["keep_server_alive"] = True
+    if args.use_existing_site: custom_args["use_existing_site"] = True
+    if args.skip_generation: custom_args["skip_generation"] = True
+    if args.keep_site: custom_args["keep_site"] = True
+    # Clean flags are handled by the 'clean' argument passed to run_benchmark
+
+    # Validate custom config requirements
+    if args.config == "custom":
+        required_custom = ["urls", "max_sessions", "chunk_size"]
+        missing = [f"--{arg}" for arg in required_custom if arg not in custom_args]
+        if missing:
+            console.print(f"[bold red]Error: 'custom' config requires: {', '.join(missing)}[/bold red]")
+            return 1
+
+    success = run_benchmark(
+        config_name=args.config,
+        custom_args=custom_args, # Pass all collected custom args
+        compare=not args.no_report,
+        clean=args.clean
+    )
+    return 0 if success else 1
+
+if __name__ == "__main__":
+    sys.exit(main())