diff --git a/tests/WEBHOOK_TEST_README.md b/tests/WEBHOOK_TEST_README.md new file mode 100644 index 00000000..4f3c68a0 --- /dev/null +++ b/tests/WEBHOOK_TEST_README.md @@ -0,0 +1,251 @@ +# Webhook Feature Test Script + +This directory contains a comprehensive test script for the webhook feature implementation. + +## Overview + +The `test_webhook_feature.sh` script automates the entire process of testing the webhook feature: + +1. ✅ Fetches and switches to the webhook feature branch +2. ✅ Activates the virtual environment +3. ✅ Installs all required dependencies +4. ✅ Starts Redis server in background +5. ✅ Starts Crawl4AI server in background +6. ✅ Runs webhook integration test +7. ✅ Verifies job completion via webhook +8. ✅ Cleans up and returns to original branch + +## Prerequisites + +- Python 3.10+ +- Virtual environment already created (`venv/` in project root) +- Git repository with the webhook feature branch +- `redis-server` (script will attempt to install if missing) +- `curl` and `lsof` commands available + +## Usage + +### Quick Start + +From the project root: + +```bash +./tests/test_webhook_feature.sh +``` + +Or from the tests directory: + +```bash +cd tests +./test_webhook_feature.sh +``` + +### What the Script Does + +#### Step 1: Branch Management +- Saves your current branch +- Fetches the webhook feature branch from remote +- Switches to the webhook feature branch + +#### Step 2: Environment Setup +- Activates your existing virtual environment +- Installs dependencies from `deploy/docker/requirements.txt` +- Installs Flask for the webhook receiver + +#### Step 3: Service Startup +- Starts Redis server on port 6379 +- Starts Crawl4AI server on port 11235 +- Waits for server health check to pass + +#### Step 4: Webhook Test +- Creates a webhook receiver on port 8080 +- Submits a crawl job for `https://example.com` with webhook config +- Waits for webhook notification (60s timeout) +- Verifies webhook payload contains expected data + +#### Step 5: Cleanup +- Stops webhook receiver +- Stops Crawl4AI server +- Stops Redis server +- Returns to your original branch + +## Expected Output + +``` +[INFO] Starting webhook feature test script +[INFO] Project root: /path/to/crawl4ai +[INFO] Step 1: Fetching PR branch... +[INFO] Current branch: develop +[SUCCESS] Branch fetched +[INFO] Step 2: Switching to branch: claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp +[SUCCESS] Switched to webhook feature branch +[INFO] Step 3: Activating virtual environment... +[SUCCESS] Virtual environment activated +[INFO] Step 4: Installing server dependencies... +[SUCCESS] Dependencies installed +[INFO] Step 5a: Starting Redis... +[SUCCESS] Redis started (PID: 12345) +[INFO] Step 5b: Starting server on port 11235... +[INFO] Server started (PID: 12346) +[INFO] Waiting for server to be ready... +[SUCCESS] Server is ready! +[INFO] Step 6: Creating webhook test script... +[INFO] Running webhook test... + +🚀 Submitting crawl job with webhook... +✅ Job submitted successfully, task_id: crawl_abc123 +⏳ Waiting for webhook notification... + +✅ Webhook received: { + "task_id": "crawl_abc123", + "task_type": "crawl", + "status": "completed", + "timestamp": "2025-10-22T00:00:00.000000+00:00", + "urls": ["https://example.com"], + "data": { ... } +} + +✅ Webhook received! + Task ID: crawl_abc123 + Status: completed + URLs: ['https://example.com'] + ✅ Data included in webhook payload + 📄 Crawled 1 URL(s) + - https://example.com: 1234 chars + +🎉 Webhook test PASSED! + +[INFO] Step 7: Verifying test results... +[SUCCESS] ✅ Webhook test PASSED! +[SUCCESS] All tests completed successfully! 🎉 +[INFO] Cleanup will happen automatically... +[INFO] Starting cleanup... +[INFO] Stopping webhook receiver... +[INFO] Stopping server... +[INFO] Stopping Redis... +[INFO] Switching back to branch: develop +[SUCCESS] Cleanup complete +``` + +## Troubleshooting + +### Server Failed to Start + +If the server fails to start, check the logs: + +```bash +tail -100 /tmp/crawl4ai_server.log +``` + +Common issues: +- Port 11235 already in use: `lsof -ti:11235 | xargs kill -9` +- Missing dependencies: Check that all packages are installed + +### Redis Connection Failed + +Check if Redis is running: + +```bash +redis-cli ping +# Should return: PONG +``` + +If not running: + +```bash +redis-server --port 6379 --daemonize yes +``` + +### Webhook Not Received + +The script has a 60-second timeout for webhook delivery. If the webhook isn't received: + +1. Check server logs: `/tmp/crawl4ai_server.log` +2. Verify webhook receiver is running on port 8080 +3. Check network connectivity between components + +### Script Interruption + +If the script is interrupted (Ctrl+C), cleanup happens automatically via trap. The script will: +- Kill all background processes +- Stop Redis +- Return to your original branch + +To manually cleanup if needed: + +```bash +# Kill processes by port +lsof -ti:11235 | xargs kill -9 # Server +lsof -ti:8080 | xargs kill -9 # Webhook receiver +lsof -ti:6379 | xargs kill -9 # Redis + +# Return to your branch +git checkout develop # or your branch name +``` + +## Testing Different URLs + +To test with a different URL, modify the script or create a custom test: + +```python +payload = { + "urls": ["https://your-url-here.com"], + "browser_config": {"headless": True}, + "crawler_config": {"cache_mode": "bypass"}, + "webhook_config": { + "webhook_url": "http://localhost:8080/webhook", + "webhook_data_in_payload": True + } +} +``` + +## Files Generated + +The script creates temporary files: + +- `/tmp/crawl4ai_server.log` - Server output logs +- `/tmp/test_webhook.py` - Webhook test Python script + +These are not cleaned up automatically so you can review them after the test. + +## Exit Codes + +- `0` - All tests passed successfully +- `1` - Test failed (check output for details) + +## Safety Features + +- ✅ Automatic cleanup on exit, interrupt, or error +- ✅ Returns to original branch on completion +- ✅ Kills all background processes +- ✅ Comprehensive error handling +- ✅ Colored output for easy reading +- ✅ Detailed logging at each step + +## Notes + +- The script uses `set -e` to exit on any command failure +- All background processes are tracked and cleaned up +- The virtual environment must exist before running +- Redis must be available (installed or installable via apt-get/brew) + +## Integration with CI/CD + +This script can be integrated into CI/CD pipelines: + +```yaml +# Example GitHub Actions +- name: Test Webhook Feature + run: | + chmod +x tests/test_webhook_feature.sh + ./tests/test_webhook_feature.sh +``` + +## Support + +If you encounter issues: + +1. Check the troubleshooting section above +2. Review server logs at `/tmp/crawl4ai_server.log` +3. Ensure all prerequisites are met +4. Open an issue with the full output of the script diff --git a/tests/test_webhook_feature.sh b/tests/test_webhook_feature.sh new file mode 100755 index 00000000..20eab2a1 --- /dev/null +++ b/tests/test_webhook_feature.sh @@ -0,0 +1,305 @@ +#!/bin/bash + +############################################################################# +# Webhook Feature Test Script +# +# This script tests the webhook feature implementation by: +# 1. Switching to the webhook feature branch +# 2. Installing dependencies +# 3. Starting the server +# 4. Running webhook tests +# 5. Cleaning up and returning to original branch +# +# Usage: ./test_webhook_feature.sh +############################################################################# + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +BRANCH_NAME="claude/implement-webhook-crawl-feature-011CULZY1Jy8N5MUkZqXkRVp" +VENV_PATH="venv" +SERVER_PORT=11235 +WEBHOOK_PORT=8080 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" + +# PID files for cleanup +REDIS_PID="" +SERVER_PID="" +WEBHOOK_PID="" + +############################################################################# +# Utility Functions +############################################################################# + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +cleanup() { + log_info "Starting cleanup..." + + # Kill webhook receiver if running + if [ ! -z "$WEBHOOK_PID" ] && kill -0 $WEBHOOK_PID 2>/dev/null; then + log_info "Stopping webhook receiver (PID: $WEBHOOK_PID)..." + kill $WEBHOOK_PID 2>/dev/null || true + fi + + # Kill server if running + if [ ! -z "$SERVER_PID" ] && kill -0 $SERVER_PID 2>/dev/null; then + log_info "Stopping server (PID: $SERVER_PID)..." + kill $SERVER_PID 2>/dev/null || true + fi + + # Kill Redis if running + if [ ! -z "$REDIS_PID" ] && kill -0 $REDIS_PID 2>/dev/null; then + log_info "Stopping Redis (PID: $REDIS_PID)..." + kill $REDIS_PID 2>/dev/null || true + fi + + # Also kill by port if PIDs didn't work + lsof -ti:$SERVER_PORT | xargs kill -9 2>/dev/null || true + lsof -ti:$WEBHOOK_PORT | xargs kill -9 2>/dev/null || true + lsof -ti:6379 | xargs kill -9 2>/dev/null || true + + # Return to original branch + if [ ! -z "$ORIGINAL_BRANCH" ]; then + log_info "Switching back to branch: $ORIGINAL_BRANCH" + git checkout $ORIGINAL_BRANCH 2>/dev/null || true + fi + + log_success "Cleanup complete" +} + +# Set trap to cleanup on exit +trap cleanup EXIT INT TERM + +############################################################################# +# Main Script +############################################################################# + +log_info "Starting webhook feature test script" +log_info "Project root: $PROJECT_ROOT" + +cd "$PROJECT_ROOT" + +# Step 1: Save current branch and fetch PR +log_info "Step 1: Fetching PR branch..." +ORIGINAL_BRANCH=$(git rev-parse --abbrev-ref HEAD) +log_info "Current branch: $ORIGINAL_BRANCH" + +git fetch origin $BRANCH_NAME +log_success "Branch fetched" + +# Step 2: Switch to new branch +log_info "Step 2: Switching to branch: $BRANCH_NAME" +git checkout $BRANCH_NAME +log_success "Switched to webhook feature branch" + +# Step 3: Activate virtual environment +log_info "Step 3: Activating virtual environment..." +if [ ! -d "$VENV_PATH" ]; then + log_error "Virtual environment not found at $VENV_PATH" + log_info "Creating virtual environment..." + python3 -m venv $VENV_PATH +fi + +source $VENV_PATH/bin/activate +log_success "Virtual environment activated: $(which python)" + +# Step 4: Install server dependencies +log_info "Step 4: Installing server dependencies..." +pip install -q -r deploy/docker/requirements.txt +log_success "Dependencies installed" + +# Check if Redis is available +log_info "Checking Redis availability..." +if ! command -v redis-server &> /dev/null; then + log_warning "Redis not found, attempting to install..." + if command -v apt-get &> /dev/null; then + sudo apt-get update && sudo apt-get install -y redis-server + elif command -v brew &> /dev/null; then + brew install redis + else + log_error "Cannot install Redis automatically. Please install Redis manually." + exit 1 + fi +fi + +# Step 5: Start Redis in background +log_info "Step 5a: Starting Redis..." +redis-server --port 6379 --daemonize yes +sleep 2 +REDIS_PID=$(pgrep redis-server) +log_success "Redis started (PID: $REDIS_PID)" + +# Step 5b: Start server in background +log_info "Step 5b: Starting server on port $SERVER_PORT..." +cd deploy/docker + +# Start server in background +python3 -m uvicorn server:app --host 0.0.0.0 --port $SERVER_PORT > /tmp/crawl4ai_server.log 2>&1 & +SERVER_PID=$! +cd "$PROJECT_ROOT" + +log_info "Server started (PID: $SERVER_PID)" + +# Wait for server to be ready +log_info "Waiting for server to be ready..." +for i in {1..30}; do + if curl -s http://localhost:$SERVER_PORT/health > /dev/null 2>&1; then + log_success "Server is ready!" + break + fi + if [ $i -eq 30 ]; then + log_error "Server failed to start within 30 seconds" + log_info "Server logs:" + tail -50 /tmp/crawl4ai_server.log + exit 1 + fi + echo -n "." + sleep 1 +done +echo "" + +# Step 6: Create and run webhook test +log_info "Step 6: Creating webhook test script..." + +cat > /tmp/test_webhook.py << 'PYTHON_SCRIPT' +import requests +import json +import time +from flask import Flask, request, jsonify +from threading import Thread, Event + +# Configuration +CRAWL4AI_BASE_URL = "http://localhost:11235" +WEBHOOK_BASE_URL = "http://localhost:8080" + +# Flask app for webhook receiver +app = Flask(__name__) +webhook_received = Event() +webhook_data = {} + +@app.route('/webhook', methods=['POST']) +def handle_webhook(): + global webhook_data + webhook_data = request.json + webhook_received.set() + print(f"\n✅ Webhook received: {json.dumps(webhook_data, indent=2)}") + return jsonify({"status": "received"}), 200 + +def start_webhook_server(): + app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False) + +# Start webhook server in background +webhook_thread = Thread(target=start_webhook_server, daemon=True) +webhook_thread.start() +time.sleep(2) + +print("🚀 Submitting crawl job with webhook...") + +# Submit job with webhook +payload = { + "urls": ["https://example.com"], + "browser_config": {"headless": True}, + "crawler_config": {"cache_mode": "bypass"}, + "webhook_config": { + "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", + "webhook_data_in_payload": True + } +} + +response = requests.post( + f"{CRAWL4AI_BASE_URL}/crawl/job", + json=payload, + headers={"Content-Type": "application/json"} +) + +if not response.ok: + print(f"❌ Failed to submit job: {response.text}") + exit(1) + +task_id = response.json()['task_id'] +print(f"✅ Job submitted successfully, task_id: {task_id}") + +# Wait for webhook (with timeout) +print("⏳ Waiting for webhook notification...") +if webhook_received.wait(timeout=60): + print(f"✅ Webhook received!") + print(f" Task ID: {webhook_data.get('task_id')}") + print(f" Status: {webhook_data.get('status')}") + print(f" URLs: {webhook_data.get('urls')}") + + if webhook_data.get('status') == 'completed': + if 'data' in webhook_data: + print(f" ✅ Data included in webhook payload") + results = webhook_data['data'].get('results', []) + if results: + print(f" 📄 Crawled {len(results)} URL(s)") + for result in results: + print(f" - {result.get('url')}: {len(result.get('markdown', ''))} chars") + print("\n🎉 Webhook test PASSED!") + exit(0) + else: + print(f" ❌ Job failed: {webhook_data.get('error')}") + exit(1) +else: + print("❌ Webhook not received within 60 seconds") + # Try polling as fallback + print("⏳ Trying to poll job status...") + for i in range(10): + status_response = requests.get(f"{CRAWL4AI_BASE_URL}/crawl/job/{task_id}") + if status_response.ok: + status = status_response.json() + print(f" Status: {status.get('status')}") + if status.get('status') in ['completed', 'failed']: + break + time.sleep(2) + exit(1) +PYTHON_SCRIPT + +# Install Flask for webhook receiver +pip install -q flask + +# Run the webhook test +log_info "Running webhook test..." +python3 /tmp/test_webhook.py & +WEBHOOK_PID=$! + +# Wait for test to complete +wait $WEBHOOK_PID +TEST_EXIT_CODE=$? + +# Step 7: Verify results +log_info "Step 7: Verifying test results..." +if [ $TEST_EXIT_CODE -eq 0 ]; then + log_success "✅ Webhook test PASSED!" +else + log_error "❌ Webhook test FAILED (exit code: $TEST_EXIT_CODE)" + log_info "Server logs:" + tail -100 /tmp/crawl4ai_server.log + exit 1 +fi + +# Step 8: Cleanup happens automatically via trap +log_success "All tests completed successfully! 🎉" +log_info "Cleanup will happen automatically..."