#!/usr/bin/env python3 """ Crawl4AI v0.7.6 Release Demo ============================ This demo showcases the major feature in v0.7.6: **Webhook Support for Docker Job Queue API** Features Demonstrated: 1. Asynchronous job processing with webhook notifications 2. Webhook support for /crawl/job endpoint 3. Webhook support for /llm/job endpoint 4. Notification-only vs data-in-payload modes 5. Custom webhook headers for authentication 6. Structured extraction with JSON schemas 7. Exponential backoff retry for reliable delivery Prerequisites: - Crawl4AI Docker container running on localhost:11235 - Flask installed: pip install flask requests - LLM API key configured (for LLM examples) Usage: python docs/releases_review/demo_v0.7.6.py """ import requests import json import time from flask import Flask, request, jsonify from threading import Thread # Configuration CRAWL4AI_BASE_URL = "http://localhost:11235" WEBHOOK_BASE_URL = "http://localhost:8080" # Flask app for webhook receiver app = Flask(__name__) received_webhooks = [] @app.route('/webhook', methods=['POST']) def webhook_handler(): """Universal webhook handler for both crawl and LLM extraction jobs.""" payload = request.json task_id = payload['task_id'] task_type = payload['task_type'] status = payload['status'] print(f"\n{'='*70}") print(f"šŸ“¬ Webhook Received!") print(f" Task ID: {task_id}") print(f" Task Type: {task_type}") print(f" Status: {status}") print(f" Timestamp: {payload['timestamp']}") if status == 'completed': if 'data' in payload: print(f" āœ… Data included in webhook") if task_type == 'crawl': results = payload['data'].get('results', []) print(f" šŸ“Š Crawled {len(results)} URL(s)") elif task_type == 'llm_extraction': extracted = payload['data'].get('extracted_content', {}) print(f" šŸ¤– Extracted: {json.dumps(extracted, indent=6)}") else: print(f" šŸ“„ Notification only (fetch data separately)") elif status == 'failed': print(f" āŒ Error: {payload.get('error', 'Unknown')}") print(f"{'='*70}\n") received_webhooks.append(payload) return jsonify({"status": "received"}), 200 def start_webhook_server(): """Start Flask webhook server in background.""" app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False) def demo_1_crawl_webhook_notification_only(): """Demo 1: Crawl job with webhook notification (data fetched separately).""" print("\n" + "="*70) print("DEMO 1: Crawl Job - Webhook Notification Only") print("="*70) print("Submitting crawl job with webhook notification...") payload = { "urls": ["https://example.com"], "browser_config": {"headless": True}, "crawler_config": {"cache_mode": "bypass"}, "webhook_config": { "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", "webhook_data_in_payload": False, "webhook_headers": { "X-Demo": "v0.7.6", "X-Type": "crawl" } } } response = requests.post(f"{CRAWL4AI_BASE_URL}/crawl/job", json=payload) if response.ok: task_id = response.json()['task_id'] print(f"āœ… Job submitted: {task_id}") print("ā³ Webhook will notify when complete...") return task_id else: print(f"āŒ Failed: {response.text}") return None def demo_2_crawl_webhook_with_data(): """Demo 2: Crawl job with full data in webhook payload.""" print("\n" + "="*70) print("DEMO 2: Crawl Job - Webhook with Full Data") print("="*70) print("Submitting crawl job with data included in webhook...") payload = { "urls": ["https://www.python.org"], "browser_config": {"headless": True}, "crawler_config": {"cache_mode": "bypass"}, "webhook_config": { "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", "webhook_data_in_payload": True, "webhook_headers": { "X-Demo": "v0.7.6", "X-Type": "crawl-with-data" } } } response = requests.post(f"{CRAWL4AI_BASE_URL}/crawl/job", json=payload) if response.ok: task_id = response.json()['task_id'] print(f"āœ… Job submitted: {task_id}") print("ā³ Webhook will include full results...") return task_id else: print(f"āŒ Failed: {response.text}") return None def demo_3_llm_webhook_notification_only(): """Demo 3: LLM extraction with webhook notification (NEW in v0.7.6!).""" print("\n" + "="*70) print("DEMO 3: LLM Extraction - Webhook Notification Only (NEW!)") print("="*70) print("Submitting LLM extraction job with webhook notification...") payload = { "url": "https://www.example.com", "q": "Extract the main heading and description from this page", "provider": "openai/gpt-4o-mini", "cache": False, "webhook_config": { "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", "webhook_data_in_payload": False, "webhook_headers": { "X-Demo": "v0.7.6", "X-Type": "llm" } } } response = requests.post(f"{CRAWL4AI_BASE_URL}/llm/job", json=payload) if response.ok: task_id = response.json()['task_id'] print(f"āœ… Job submitted: {task_id}") print("ā³ Webhook will notify when LLM extraction completes...") return task_id else: print(f"āŒ Failed: {response.text}") return None def demo_4_llm_webhook_with_schema(): """Demo 4: LLM extraction with JSON schema and data in webhook (NEW in v0.7.6!).""" print("\n" + "="*70) print("DEMO 4: LLM Extraction - Schema + Full Data in Webhook (NEW!)") print("="*70) print("Submitting LLM extraction with JSON schema...") schema = { "type": "object", "properties": { "title": {"type": "string", "description": "Page title"}, "description": {"type": "string", "description": "Page description"}, "main_topics": { "type": "array", "items": {"type": "string"}, "description": "Main topics covered" } }, "required": ["title"] } payload = { "url": "https://www.python.org", "q": "Extract the title, description, and main topics from this website", "schema": json.dumps(schema), "provider": "openai/gpt-4o-mini", "cache": False, "webhook_config": { "webhook_url": f"{WEBHOOK_BASE_URL}/webhook", "webhook_data_in_payload": True, "webhook_headers": { "X-Demo": "v0.7.6", "X-Type": "llm-with-schema" } } } response = requests.post(f"{CRAWL4AI_BASE_URL}/llm/job", json=payload) if response.ok: task_id = response.json()['task_id'] print(f"āœ… Job submitted: {task_id}") print("ā³ Webhook will include structured extraction results...") return task_id else: print(f"āŒ Failed: {response.text}") return None def demo_5_global_webhook_config(): """Demo 5: Using global webhook configuration from config.yml.""" print("\n" + "="*70) print("DEMO 5: Global Webhook Configuration") print("="*70) print("šŸ’” You can configure a default webhook URL in config.yml:") print(""" webhooks: enabled: true default_url: "https://myapp.com/webhooks/default" data_in_payload: false retry: max_attempts: 5 initial_delay_ms: 1000 max_delay_ms: 32000 timeout_ms: 30000 """) print("Then submit jobs WITHOUT webhook_config - they'll use the default!") print("This is useful for consistent webhook handling across all jobs.") def demo_6_webhook_retry_logic(): """Demo 6: Webhook retry mechanism with exponential backoff.""" print("\n" + "="*70) print("DEMO 6: Webhook Retry Logic") print("="*70) print("šŸ”„ Webhook delivery uses exponential backoff retry:") print(" • Max attempts: 5") print(" • Delays: 1s → 2s → 4s → 8s → 16s") print(" • Timeout: 30s per attempt") print(" • Retries on: 5xx errors, network errors, timeouts") print(" • No retry on: 4xx client errors") print("\nThis ensures reliable webhook delivery even with temporary failures!") def print_summary(): """Print demo summary and results.""" print("\n" + "="*70) print("šŸ“Š DEMO SUMMARY") print("="*70) print(f"Total webhooks received: {len(received_webhooks)}") crawl_webhooks = [w for w in received_webhooks if w['task_type'] == 'crawl'] llm_webhooks = [w for w in received_webhooks if w['task_type'] == 'llm_extraction'] print(f"\nBreakdown:") print(f" šŸ•·ļø Crawl jobs: {len(crawl_webhooks)}") print(f" šŸ¤– LLM extraction jobs: {len(llm_webhooks)}") print(f"\nDetails:") for i, webhook in enumerate(received_webhooks, 1): icon = "šŸ•·ļø" if webhook['task_type'] == 'crawl' else "šŸ¤–" print(f" {i}. {icon} {webhook['task_id']}: {webhook['status']}") print("\n" + "="*70) print("✨ v0.7.6 KEY FEATURES DEMONSTRATED:") print("="*70) print("āœ… Webhook support for /crawl/job") print("āœ… Webhook support for /llm/job (NEW!)") print("āœ… Notification-only mode (fetch data separately)") print("āœ… Data-in-payload mode (get full results in webhook)") print("āœ… Custom headers for authentication") print("āœ… JSON schema for structured LLM extraction") print("āœ… Exponential backoff retry for reliable delivery") print("āœ… Global webhook configuration support") print("āœ… Universal webhook handler for both job types") print("\nšŸ’” Benefits:") print(" • No more polling - get instant notifications") print(" • Better resource utilization") print(" • Reliable delivery with automatic retries") print(" • Consistent API across crawl and LLM jobs") print(" • Production-ready webhook infrastructure") def main(): """Run all demos.""" print("\n" + "="*70) print("šŸš€ Crawl4AI v0.7.6 Release Demo") print("="*70) print("Feature: Webhook Support for Docker Job Queue API") print("="*70) # Check if server is running try: health = requests.get(f"{CRAWL4AI_BASE_URL}/health", timeout=5) print(f"āœ… Crawl4AI server is running") except: print(f"āŒ Cannot connect to Crawl4AI at {CRAWL4AI_BASE_URL}") print("Please start Docker container:") print(" docker run -d -p 11235:11235 --env-file .llm.env unclecode/crawl4ai:0.7.6") return # Start webhook server print(f"\n🌐 Starting webhook server at {WEBHOOK_BASE_URL}...") webhook_thread = Thread(target=start_webhook_server, daemon=True) webhook_thread.start() time.sleep(2) # Run demos demo_1_crawl_webhook_notification_only() time.sleep(5) demo_2_crawl_webhook_with_data() time.sleep(5) demo_3_llm_webhook_notification_only() time.sleep(5) demo_4_llm_webhook_with_schema() time.sleep(5) demo_5_global_webhook_config() demo_6_webhook_retry_logic() # Wait for webhooks print("\nā³ Waiting for all webhooks to arrive...") time.sleep(30) # Print summary print_summary() print("\n" + "="*70) print("āœ… Demo completed!") print("="*70) print("\nšŸ“š Documentation:") print(" • deploy/docker/WEBHOOK_EXAMPLES.md") print(" • docs/examples/docker_webhook_example.py") print("\nšŸ”— Upgrade:") print(" docker pull unclecode/crawl4ai:0.7.6") if __name__ == "__main__": main()