From 52da8d72bc6c4179ddae29dbb8b72e00235f0562 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 22 Oct 2025 00:35:07 +0000 Subject: [PATCH] test: add comprehensive webhook feature test script Added end-to-end test script that automates webhook feature testing: Script Features (test_webhook_feature.sh): - Automatic branch switching and dependency installation - Redis and server startup/shutdown management - Webhook receiver implementation - Integration test for webhook notifications - Comprehensive cleanup and error handling - Returns to original branch after completion Test Flow: 1. Fetch and checkout webhook feature branch 2. Activate venv and install dependencies 3. Start Redis and Crawl4AI server 4. Submit crawl job with webhook config 5. Verify webhook delivery and payload 6. Clean up all processes and return to original branch Documentation: - WEBHOOK_TEST_README.md with usage instructions - Troubleshooting guide - Exit codes and safety features Usage: ./tests/test_webhook_feature.sh Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude --- tests/WEBHOOK_TEST_README.md | 251 ++++++++++++++++++++++++++++ tests/test_webhook_feature.sh | 305 ++++++++++++++++++++++++++++++++++ 2 files changed, 556 insertions(+) create mode 100644 tests/WEBHOOK_TEST_README.md create mode 100755 tests/test_webhook_feature.sh diff --git a/tests/WEBHOOK_TEST_README.md b/tests/WEBHOOK_TEST_README.md new file mode 100644 index 00000000..4f3c68a0 --- /dev/null +++ b/tests/WEBHOOK_TEST_README.md @@ -0,0 +1,251 @@ +# Webhook Feature Test Script + +This directory contains a comprehensive test script for the webhook feature implementation. + +## Overview + +The `test_webhook_feature.sh` script automates the entire process of testing the webhook feature: + +1. ✅ Fetches and switches to the webhook feature branch +2. ✅ Activates the virtual environment +3. ✅ Installs all required dependencies +4. ✅ Starts Redis server in background +5. ✅ Starts Crawl4AI server in background +6. ✅ Runs webhook integration test +7. ✅ Verifies job completion via webhook +8. ✅ Cleans up and returns to original branch + +## Prerequisites + +- Python 3.10+ +- Virtual environment already created (`venv/` in project root) +- Git repository with the webhook feature branch +- `redis-server` (script will attempt to install if missing) +- `curl` and `lsof` commands available + +## Usage + +### Quick Start + +From the project root: + +```bash +./tests/test_webhook_feature.sh +``` + +Or from the tests directory: + +```bash +cd tests +./test_webhook_feature.sh +``` + +### What the Script Does + +#### Step 1: Branch Management +- Saves your current branch +- Fetches the webhook feature branch from remote +- Switches to the webhook feature branch + +#### Step 2: Environment Setup +- Activates your existing virtual environment +- Installs dependencies from `deploy/docker/requirements.txt` +- Installs Flask for the webhook receiver + +#### Step 3: Service Startup +- Starts Redis server on port 6379 +- Starts Crawl4AI server on port 11235 +- Waits for server health check to pass + +#### Step 4: Webhook Test +- Creates a webhook receiver on port 8080 +- Submits a crawl job for `https://example.com` with webhook config +- Waits for webhook notification (60s timeout) +- Verifies webhook payload contains expected data + +#### Step 5: Cleanup +- Stops webhook receiver +- Stops Crawl4AI server +- Stops Redis server +- Returns to your original branch + +## Expected Output + +``` +[INFO] Starting webhook feature test script +[INFO] Project root: /path/to/crawl4ai +[INFO] Step 1: Fetching PR branch... +[INFO] Current branch: develop +[SUCCESS] Branch fetched +[INFO] Step 2: Switching to branch: claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp +[SUCCESS] Switched to webhook feature branch +[INFO] Step 3: Activating virtual environment... +[SUCCESS] Virtual environment activated +[INFO] Step 4: Installing server dependencies... +[SUCCESS] Dependencies installed +[INFO] Step 5a: Starting Redis... +[SUCCESS] Redis started (PID: 12345) +[INFO] Step 5b: Starting server on port 11235... +[INFO] Server started (PID: 12346) +[INFO] Waiting for server to be ready... +[SUCCESS] Server is ready! +[INFO] Step 6: Creating webhook test script... +[INFO] Running webhook test... + +🚀 Submitting crawl job with webhook... +✅ Job submitted successfully, task_id: crawl_abc123 +⏳ Waiting for webhook notification... + +✅ Webhook received: { + "task_id": "crawl_abc123", + "task_type": "crawl", + "status": "completed", + "timestamp": "2025-10-22T00:00:00.000000+00:00", + "urls": ["https://example.com"], + "data": { ... } +} + +✅ Webhook received! + Task ID: crawl_abc123 + Status: completed + URLs: ['https://example.com'] + ✅ Data included in webhook payload + 📄 Crawled 1 URL(s) + - https://example.com: 1234 chars + +🎉 Webhook test PASSED! + +[INFO] Step 7: Verifying test results... +[SUCCESS] ✅ Webhook test PASSED! +[SUCCESS] All tests completed successfully! 🎉 +[INFO] Cleanup will happen automatically... +[INFO] Starting cleanup... +[INFO] Stopping webhook receiver... +[INFO] Stopping server... +[INFO] Stopping Redis... +[INFO] Switching back to branch: develop +[SUCCESS] Cleanup complete +``` + +## Troubleshooting + +### Server Failed to Start + +If the server fails to start, check the logs: + +```bash +tail -100 /tmp/crawl4ai_server.log +``` + +Common issues: +- Port 11235 already in use: `lsof -ti:11235 | xargs kill -9` +- Missing dependencies: Check that all packages are installed + +### Redis Connection Failed + +Check if Redis is running: + +```bash +redis-cli ping +# Should return: PONG +``` + +If not running: + +```bash +redis-server --port 6379 --daemonize yes +``` + +### Webhook Not Received + +The script has a 60-second timeout for webhook delivery. If the webhook isn't received: + +1. Check server logs: `/tmp/crawl4ai_server.log` +2. Verify webhook receiver is running on port 8080 +3. Check network connectivity between components + +### Script Interruption + +If the script is interrupted (Ctrl+C), cleanup happens automatically via trap. The script will: +- Kill all background processes +- Stop Redis +- Return to your original branch + +To manually cleanup if needed: + +```bash +# Kill processes by port +lsof -ti:11235 | xargs kill -9 # Server +lsof -ti:8080 | xargs kill -9 # Webhook receiver +lsof -ti:6379 | xargs kill -9 # Redis + +# Return to your branch +git checkout develop # or your branch name +``` + +## Testing Different URLs + +To test with a different URL, modify the script or create a custom test: + +```python +payload = { + "urls": ["https://your-url-here.com"], + "browser_config": {"headless": True}, + "crawler_config": {"cache_mode": "bypass"}, + "webhook_config": { + "webhook_url": "http://localhost:8080/webhook", + "webhook_data_in_payload": True + } +} +``` + +## Files Generated + +The script creates temporary files: + +- `/tmp/crawl4ai_server.log` - Server output logs +- `/tmp/test_webhook.py` - Webhook test Python script + +These are not cleaned up automatically so you can review them after the test. + +## Exit Codes + +- `0` - All tests passed successfully +- `1` - Test failed (check output for details) + +## Safety Features + +- ✅ Automatic cleanup on exit, interrupt, or error +- ✅ Returns to original branch on completion +- ✅ Kills all background processes +- ✅ Comprehensive error handling +- ✅ Colored output for easy reading +- ✅ Detailed logging at each step + +## Notes + +- The script uses `set -e` to exit on any command failure +- All background processes are tracked and cleaned up +- The virtual environment must exist before running +- Redis must be available (installed or installable via apt-get/brew) + +## Integration with CI/CD + +This script can be integrated into CI/CD pipelines: + +```yaml +# Example GitHub Actions +- name: Test Webhook Feature + run: | + chmod +x tests/test_webhook_feature.sh + ./tests/test_webhook_feature.sh +``` + +## Support + +If you encounter issues: + +1. Check the troubleshooting section above +2. Review server logs at `/tmp/crawl4ai_server.log` +3. Ensure all prerequisites are met +4. Open an issue with the full output of the script diff --git a/tests/test_webhook_feature.sh b/tests/test_webhook_feature.sh new file mode 100755 index 00000000..20eab2a1 --- /dev/null +++ b/tests/test_webhook_feature.sh @@ -0,0 +1,305 @@ +#!/bin/bash + +############################################################################# +# Webhook Feature Test Script +# +# This script tests the webhook feature implementation by: +# 1. Switching to the webhook feature branch +# 2. Installing dependencies +# 3. Starting the server +# 4. Running webhook tests +# 5. Cleaning up and returning to original branch +# +# Usage: ./test_webhook_feature.sh +############################################################################# + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +BRANCH_NAME="claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp" +VENV_PATH="venv" +SERVER_PORT=11235 +WEBHOOK_PORT=8080 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" + +# PID files for cleanup +REDIS_PID="" +SERVER_PID="" +WEBHOOK_PID="" + +############################################################################# +# Utility Functions +############################################################################# + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +cleanup() { + log_info "Starting cleanup..." + + # Kill webhook receiver if running + if [ ! -z "$WEBHOOK_PID" ] && kill -0 $WEBHOOK_PID 2>/dev/null; then + log_info "Stopping webhook receiver (PID: $WEBHOOK_PID)..." + kill $WEBHOOK_PID 2>/dev/null || true + fi + + # Kill server if running + if [ ! -z "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then + log_info "Stopping server (PID: $SERVER_PID)..." + kill $SERVER_PID 2>/dev/null || true + fi + + # Kill Redis if running + if [ ! -z "$REDIS_PID" ] && kill -0 $REDIS_PID 2>/dev/null; then + log_info "Stopping Redis (PID: $REDIS_PID)..." + kill $REDIS_PID 2>/dev/null || true + fi + + # Also kill by port if PIDs didn't work + lsof -ti:$SERVER_PORT | xargs kill -9 2>/dev/null || true + lsof -ti:$WEBHOOK_PORT | xargs kill -9 2>/dev/null || true + lsof -ti:6379 | xargs kill -9 2>/dev/null || true + + # Return to original branch + if [ ! -z "$ORIGINAL_BRANCH" ]; then + log_info "Switching back to branch: $ORIGINAL_BRANCH" + git checkout $ORIGINAL_BRANCH 2>/dev/null || true + fi + + log_success "Cleanup complete" +} + +# Set trap to cleanup on exit +trap cleanup EXIT INT TERM + +############################################################################# +# Main Script +############################################################################# + +log_info "Starting webhook feature test script" +log_info "Project root: $PROJECT_ROOT" + +cd "$PROJECT_ROOT" + +# Step 1: Save current branch and fetch PR +log_info "Step 1: Fetching PR branch..." +ORIGINAL_BRANCH=$(git rev-parse --abbrev-ref HEAD) +log_info "Current branch: $ORIGINAL_BRANCH" + +git fetch origin $BRANCH_NAME +log_success "Branch fetched" + +# Step 2: Switch to new branch +log_info "Step 2: Switching to branch: $BRANCH_NAME" +git checkout $BRANCH_NAME +log_success "Switched to webhook feature branch" + +# Step 3: Activate virtual environment +log_info "Step 3: Activating virtual environment..." +if [ ! -d "$VENV_PATH" ]; then + log_error "Virtual environment not found at $VENV_PATH" + log_info "Creating virtual environment..." + python3 -m venv $VENV_PATH +fi + +source $VENV_PATH/bin/activate +log_success "Virtual environment activated: $(which python)" + +# Step 4: Install server dependencies +log_info "Step 4: Installing server dependencies..." +pip install -q -r deploy/docker/requirements.txt +log_success "Dependencies installed" + +# Check if Redis is available +log_info "Checking Redis availability..." +if ! command -v redis-server &> /dev/null; then + log_warning "Redis not found, attempting to install..." + if command -v apt-get &> /dev/null; then + sudo apt-get update && sudo apt-get install -y redis-server + elif command -v brew &> /dev/null; then + brew install redis + else + log_error "Cannot install Redis automatically. Please install Redis manually." + exit 1 + fi +fi + +# Step 5: Start Redis in background +log_info "Step 5a: Starting Redis..." +redis-server --port 6379 --daemonize yes +sleep 2 +REDIS_PID=$(pgrep redis-server) +log_success "Redis started (PID: $REDIS_PID)" + +# Step 5b: Start server in background +log_info "Step 5b: Starting server on port $SERVER_PORT..." +cd deploy/docker + +# Start server in background +python3 -m uvicorn server:app --host 0.0.0.0 --port $SERVER_PORT > /tmp/crawl4ai_server.log 2>&1 & +SERVER_PID=$! +cd "$PROJECT_ROOT" + +log_info "Server started (PID: $SERVER_PID)" + +# Wait for server to be ready +log_info "Waiting for server to be ready..." +for i in {1..30}; do + if curl -s http://localhost:$SERVER_PORT/health > /dev/null 2>&1; then + log_success "Server is ready!" + break + fi + if [ $i -eq 30 ]; then + log_error "Server failed to start within 30 seconds" + log_info "Server logs:" + tail -50 /tmp/crawl4ai_server.log + exit 1 + fi + echo -n "." + sleep 1 +done +echo "" + +# Step 6: Create and run webhook test +log_info "Step 6: Creating webhook test script..." + +cat > /tmp/test_webhook.py << 'PYTHON_SCRIPT' +import requests +import json +import time +from flask import Flask, request, jsonify +from threading import Thread, Event + +# Configuration +CRAWL4AI_BASE_URL = "http://localhost:11235" +WEBHOOK_BASE_URL = "http://localhost:8080" + +# Flask app for webhook receiver +app = Flask(__name__) +webhook_received = Event() +webhook_data = {} + +@app.route('/webhook', methods=['POST']) +def handle_webhook(): + global webhook_data + webhook_data = request.json + webhook_received.set() + print(f"\n✅ Webhook received: {json.dumps(webhook_data, indent=2)}") + return jsonify({"status": "received"}), 200 + +def start_webhook_server(): + app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False) + +# Start webhook server in background +webhook_thread = Thread(target=start_webhook_server, daemon=True) +webhook_thread.start() +time.sleep(2) + +print("🚀 Submitting crawl job with webhook...") + +# Submit job with webhook +payload = { + "urls": ["https://example.com"], + "browser_config": {"headless": True}, + "crawler_config": {"cache_mode": "bypass"}, + "webhook_config": { + "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", + "webhook_data_in_payload": True + } +} + +response = requests.post( + f"{CRAWL4AI_BASE_URL}/crawl/job", + json=payload, + headers={"Content-Type": "application/json"} +) + +if not response.ok: + print(f"❌ Failed to submit job: {response.text}") + exit(1) + +task_id = response.json()['task_id'] +print(f"✅ Job submitted successfully, task_id: {task_id}") + +# Wait for webhook (with timeout) +print("⏳ Waiting for webhook notification...") +if webhook_received.wait(timeout=60): + print(f"✅ Webhook received!") + print(f" Task ID: {webhook_data.get('task_id')}") + print(f" Status: {webhook_data.get('status')}") + print(f" URLs: {webhook_data.get('urls')}") + + if webhook_data.get('status') == 'completed': + if 'data' in webhook_data: + print(f" ✅ Data included in webhook payload") + results = webhook_data['data'].get('results', []) + if results: + print(f" 📄 Crawled {len(results)} URL(s)") + for result in results: + print(f" - {result.get('url')}: {len(result.get('markdown', ''))} chars") + print("\n🎉 Webhook test PASSED!") + exit(0) + else: + print(f" ❌ Job failed: {webhook_data.get('error')}") + exit(1) +else: + print("❌ Webhook not received within 60 seconds") + # Try polling as fallback + print("⏳ Trying to poll job status...") + for i in range(10): + status_response = requests.get(f"{CRAWL4AI_BASE_URL}/crawl/job/{task_id}") + if status_response.ok: + status = status_response.json() + print(f" Status: {status.get('status')}") + if status.get('status') in ['completed', 'failed']: + break + time.sleep(2) + exit(1) +PYTHON_SCRIPT + +# Install Flask for webhook receiver +pip install -q flask + +# Run the webhook test +log_info "Running webhook test..." +python3 /tmp/test_webhook.py & +WEBHOOK_PID=$! + +# Wait for test to complete +wait $WEBHOOK_PID +TEST_EXIT_CODE=$? + +# Step 7: Verify results +log_info "Step 7: Verifying test results..." +if [ $TEST_EXIT_CODE -eq 0 ]; then + log_success "✅ Webhook test PASSED!" +else + log_error "❌ Webhook test FAILED (exit code: $TEST_EXIT_CODE)" + log_info "Server logs:" + tail -100 /tmp/crawl4ai_server.log + exit 1 +fi + +# Step 8: Cleanup happens automatically via trap +log_success "All tests completed successfully! 🎉" +log_info "Cleanup will happen automatically..."