crawl4ai/docs/releases_review/demo_v0.7.7.py

#!/usr/bin/env python3
"""
Crawl4AI v0.7.7 Release Demo
============================

This demo showcases the major feature in v0.7.7:
**Self-Hosting with Real-time Monitoring Dashboard**

Features Demonstrated:
1. System health monitoring with live metrics
2. Real-time request tracking (active & completed)
3. Browser pool management (permanent/hot/cold pools)
4. Monitor API endpoints for programmatic access
5. WebSocket streaming for real-time updates
6. Control actions (kill browser, cleanup, restart)
7. Production metrics (efficiency, reuse rates, memory)

Prerequisites:
- Crawl4AI Docker container running on localhost:11235
- Python packages: pip install httpx websockets

Usage:
python docs/releases_review/demo_v0.7.7.py
"""

import asyncio
import httpx
import json
import time
from datetime import datetime
from typing import Dict, Any

# Configuration
CRAWL4AI_BASE_URL = "http://localhost:11235"
MONITOR_DASHBOARD_URL = f"{CRAWL4AI_BASE_URL}/dashboard"


def print_section(title: str, description: str = ""):
    """Print a formatted section header"""
    print(f"\n{'=' * 70}")
    print(f"📊 {title}")
    if description:
        print(f"{description}")
    print(f"{'=' * 70}\n")


def print_subsection(title: str):
    """Print a formatted subsection header"""
    print(f"\n{'-' * 70}")
    print(f"{title}")
    print(f"{'-' * 70}")


async def check_server_health():
    """Check if Crawl4AI server is running"""
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            response = await client.get(f"{CRAWL4AI_BASE_URL}/health")
            return response.status_code == 200
    except:
        return False


async def demo_1_system_health_overview():
    """Demo 1: System Health Overview - Live metrics and pool status"""
    print_section(
        "Demo 1: System Health Overview",
        "Real-time monitoring of system resources and browser pool"
    )

    async with httpx.AsyncClient(timeout=30.0) as client:
        print("🔍 Fetching system health metrics...")

        try:
            response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/health")
            health = response.json()

            print("\n✅ System Health Report:")
            print(f"\n🖥️  Container Metrics:")
            print(f"   • CPU Usage: {health['container']['cpu_percent']:.1f}%")
            print(f"   • Memory Usage: {health['container']['memory_percent']:.1f}% "
                  f"({health['container']['memory_mb']:.0f} MB)")
            print(f"   • Network RX: {health['container']['network_rx_mb']:.2f} MB")
            print(f"   • Network TX: {health['container']['network_tx_mb']:.2f} MB")
            print(f"   • Uptime: {health['container']['uptime_seconds']:.0f}s")

            print(f"\n🌐 Browser Pool Status:")
            print(f"   Permanent Browser:")
            print(f"   • Active: {health['pool']['permanent']['active']}")
            print(f"   • Total Requests: {health['pool']['permanent']['total_requests']}")

            print(f"   Hot Pool (Frequently Used Configs):")
            print(f"   • Count: {health['pool']['hot']['count']}")
            print(f"   • Total Requests: {health['pool']['hot']['total_requests']}")

            print(f"   Cold Pool (On-Demand Configs):")
            print(f"   • Count: {health['pool']['cold']['count']}")
            print(f"   • Total Requests: {health['pool']['cold']['total_requests']}")

            print(f"\n📈 Overall Statistics:")
            print(f"   • Total Requests: {health['stats']['total_requests']}")
            print(f"   • Success Rate: {health['stats']['success_rate_percent']:.1f}%")
            print(f"   • Avg Latency: {health['stats']['avg_latency_ms']:.0f}ms")

            print(f"\n💡 Dashboard URL: {MONITOR_DASHBOARD_URL}")

        except Exception as e:
            print(f"❌ Error fetching health: {e}")


async def demo_2_request_tracking():
    """Demo 2: Real-time Request Tracking - Generate and monitor requests"""
    print_section(
        "Demo 2: Real-time Request Tracking",
        "Submit crawl jobs and watch them in real-time"
    )

    async with httpx.AsyncClient(timeout=60.0) as client:
        print("🚀 Submitting crawl requests...")

        # Submit multiple requests
        urls_to_crawl = [
            "https://httpbin.org/html",
            "https://httpbin.org/json",
            "https://example.com"
        ]

        tasks = []
        for url in urls_to_crawl:
            task = client.post(
                f"{CRAWL4AI_BASE_URL}/crawl",
                json={"urls": [url], "crawler_config": {}}
            )
            tasks.append(task)

        print(f"   • Submitting {len(urls_to_crawl)} requests in parallel...")
        results = await asyncio.gather(*tasks, return_exceptions=True)

        successful = sum(1 for r in results if not isinstance(r, Exception) and r.status_code == 200)
        print(f"   ✅ {successful}/{len(urls_to_crawl)} requests submitted")

        # Check request tracking
        print("\n📊 Checking request tracking...")
        await asyncio.sleep(2)  # Wait for requests to process

        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/requests")
        requests_data = response.json()

        print(f"\n📋 Request Status:")
        print(f"   • Active Requests: {len(requests_data['active'])}")
        print(f"   • Completed Requests: {len(requests_data['completed'])}")

        if requests_data['completed']:
            print(f"\n📝 Recent Completed Requests:")
            for req in requests_data['completed'][:3]:
                status_icon = "✅" if req['success'] else "❌"
                print(f"   {status_icon} {req['endpoint']} - {req['latency_ms']:.0f}ms")


async def demo_3_browser_pool_management():
    """Demo 3: Browser Pool Management - 3-tier architecture in action"""
    print_section(
        "Demo 3: Browser Pool Management",
        "Understanding permanent, hot, and cold browser pools"
    )

    async with httpx.AsyncClient(timeout=60.0) as client:
        print("🌊 Testing browser pool with different configurations...")

        # Test 1: Default config (permanent browser)
        print("\n🔥 Test 1: Default Config → Permanent Browser")
        for i in range(3):
            await client.post(
                f"{CRAWL4AI_BASE_URL}/crawl",
                json={"urls": [f"https://httpbin.org/html?req={i}"], "crawler_config": {}}
            )
            print(f"   • Request {i+1}/3 sent (should use permanent browser)")

        await asyncio.sleep(2)

        # Test 2: Custom viewport (cold → hot promotion after 3 uses)
        print("\n♨️  Test 2: Custom Viewport → Cold Pool (promoting to Hot)")
        viewport_config = {"viewport": {"width": 1280, "height": 720}}
        for i in range(4):
            await client.post(
                f"{CRAWL4AI_BASE_URL}/crawl",
                json={
                    "urls": [f"https://httpbin.org/json?viewport={i}"],
                    "browser_config": viewport_config,
                    "crawler_config": {}
                }
            )
            print(f"   • Request {i+1}/4 sent (cold→hot promotion after 3rd use)")

        await asyncio.sleep(2)

        # Check browser pool status
        print("\n📊 Browser Pool Report:")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/browsers")
        browsers = response.json()

        print(f"\n🎯 Pool Summary:")
        print(f"   • Total Browsers: {browsers['summary']['total_count']}")
        print(f"   • Total Memory: {browsers['summary']['total_memory_mb']} MB")
        print(f"   • Reuse Rate: {browsers['summary']['reuse_rate_percent']:.1f}%")

        print(f"\n📋 Browser Pool Details:")
        if browsers['permanent']:
            for browser in browsers['permanent']:
                print(f"   🔥 Permanent: {browser['browser_id'][:8]}... | "
                      f"Requests: {browser['request_count']} | "
                      f"Memory: {browser['memory_mb']:.0f} MB")

        if browsers['hot']:
            for browser in browsers['hot']:
                print(f"   ♨️  Hot: {browser['browser_id'][:8]}... | "
                      f"Requests: {browser['request_count']} | "
                      f"Memory: {browser['memory_mb']:.0f} MB")

        if browsers['cold']:
            for browser in browsers['cold']:
                print(f"   ❄️  Cold: {browser['browser_id'][:8]}... | "
                      f"Requests: {browser['request_count']} | "
                      f"Memory: {browser['memory_mb']:.0f} MB")


async def demo_4_monitor_api_endpoints():
    """Demo 4: Monitor API Endpoints - Complete API surface"""
    print_section(
        "Demo 4: Monitor API Endpoints",
        "Programmatic access to all monitoring data"
    )

    async with httpx.AsyncClient(timeout=30.0) as client:
        print("🔌 Testing Monitor API endpoints...")

        # Endpoint performance statistics
        print_subsection("Endpoint Performance Statistics")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/endpoints/stats")
        endpoint_stats = response.json()

        print("\n📊 Per-Endpoint Analytics:")
        for endpoint, stats in endpoint_stats.items():
            print(f"   {endpoint}:")
            print(f"      • Requests: {stats['count']}")
            print(f"      • Avg Latency: {stats['avg_latency_ms']:.0f}ms")
            print(f"      • Success Rate: {stats['success_rate_percent']:.1f}%")

        # Timeline data for charts
        print_subsection("Timeline Data (for Charts)")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/timeline?minutes=5")
        timeline = response.json()

        print(f"\n📈 Timeline Metrics (last 5 minutes):")
        print(f"   • Data Points: {len(timeline['memory'])}")
        if timeline['memory']:
            latest = timeline['memory'][-1]
            print(f"   • Latest Memory: {latest['value']:.1f}%")
            print(f"   • Timestamp: {latest['timestamp']}")

        # Janitor logs
        print_subsection("Janitor Cleanup Events")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/logs/janitor?limit=3")
        janitor_logs = response.json()

        print(f"\n🧹 Recent Cleanup Activities:")
        if janitor_logs:
            for log in janitor_logs[:3]:
                print(f"   • {log['timestamp']}: {log['message']}")
        else:
            print("   (No cleanup events yet - janitor runs periodically)")

        # Error logs
        print_subsection("Error Monitoring")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/logs/errors?limit=3")
        error_logs = response.json()

        print(f"\n❌ Recent Errors:")
        if error_logs:
            for log in error_logs[:3]:
                print(f"   • {log['timestamp']}: {log['error_type']}")
                print(f"     {log['message'][:100]}...")
        else:
            print("   ✅ No recent errors!")


async def demo_5_websocket_streaming():
    """Demo 5: WebSocket Streaming - Real-time updates"""
    print_section(
        "Demo 5: WebSocket Streaming",
        "Live monitoring with 2-second update intervals"
    )

    print("⚡ WebSocket Streaming Demo")
    print("\n💡 The monitoring dashboard uses WebSocket for real-time updates")
    print(f"   • Connection: ws://localhost:11235/monitor/ws")
    print(f"   • Update Interval: 2 seconds")
    print(f"   • Data: Health, requests, browsers, memory, errors")

    print("\n📝 Sample WebSocket Integration Code:")
    print("""
    import websockets
    import json

    async def monitor_realtime():
        uri = "ws://localhost:11235/monitor/ws"
        async with websockets.connect(uri) as websocket:
            while True:
                data = await websocket.recv()
                update = json.loads(data)

                print(f"Memory: {update['health']['container']['memory_percent']:.1f}%")
                print(f"Active Requests: {len(update['requests']['active'])}")
                print(f"Browser Pool: {update['health']['pool']['permanent']['active']}")
    """)

    print("\n🌐 Open the dashboard to see WebSocket in action:")
    print(f"   {MONITOR_DASHBOARD_URL}")


async def demo_6_control_actions():
    """Demo 6: Control Actions - Manual browser management"""
    print_section(
        "Demo 6: Control Actions",
        "Manual control over browser pool and cleanup"
    )

    async with httpx.AsyncClient(timeout=30.0) as client:
        print("🎮 Testing control actions...")

        # Force cleanup
        print_subsection("Force Immediate Cleanup")
        print("🧹 Triggering manual cleanup...")
        try:
            response = await client.post(f"{CRAWL4AI_BASE_URL}/monitor/actions/cleanup")
            if response.status_code == 200:
                result = response.json()
                print(f"   ✅ Cleanup completed")
                print(f"   • Browsers cleaned: {result.get('cleaned_count', 0)}")
                print(f"   • Memory freed: {result.get('memory_freed_mb', 0):.1f} MB")
            else:
                print(f"   ⚠️  Response: {response.status_code}")
        except Exception as e:
            print(f"   ℹ️  Cleanup action: {e}")

        # Get browser list for potential kill/restart
        print_subsection("Browser Management")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/browsers")
        browsers = response.json()

        cold_browsers = browsers.get('cold', [])
        if cold_browsers:
            browser_id = cold_browsers[0]['browser_id']
            print(f"\n🎯 Example: Kill specific browser")
            print(f"   POST /monitor/actions/kill_browser")
            print(f"   JSON: {{'browser_id': '{browser_id[:16]}...'}}")
            print(f"   → Kills the browser and frees resources")

        print(f"\n🔄 Example: Restart browser")
        print(f"   POST /monitor/actions/restart_browser")
        print(f"   JSON: {{'browser_id': 'browser_id_here'}}")
        print(f"   → Restart a specific browser instance")

        # Reset statistics
        print_subsection("Reset Statistics")
        print("📊 Statistics can be reset for fresh monitoring:")
        print(f"   POST /monitor/stats/reset")
        print(f"   → Clears all accumulated statistics")


async def demo_7_production_metrics():
    """Demo 7: Production Metrics - Key indicators for operations"""
    print_section(
        "Demo 7: Production Metrics",
        "Critical metrics for production monitoring"
    )

    async with httpx.AsyncClient(timeout=30.0) as client:
        print("📊 Key Production Metrics:")

        # Overall health
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/health")
        health = response.json()

        # Browser efficiency
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/browsers")
        browsers = response.json()

        print("\n🎯 Critical Metrics to Track:")

        print(f"\n1️⃣  Memory Usage Trends")
        print(f"   • Current: {health['container']['memory_percent']:.1f}%")
        print(f"   • Alert if: >80%")
        print(f"   • Action: Trigger cleanup or scale")

        print(f"\n2️⃣  Request Success Rate")
        print(f"   • Current: {health['stats']['success_rate_percent']:.1f}%")
        print(f"   • Target: >95%")
        print(f"   • Alert if: <90%")

        print(f"\n3️⃣  Average Latency")
        print(f"   • Current: {health['stats']['avg_latency_ms']:.0f}ms")
        print(f"   • Target: <2000ms")
        print(f"   • Alert if: >5000ms")

        print(f"\n4️⃣  Browser Pool Efficiency")
        print(f"   • Reuse Rate: {browsers['summary']['reuse_rate_percent']:.1f}%")
        print(f"   • Target: >80%")
        print(f"   • Indicates: Effective browser pooling")

        print(f"\n5️⃣  Total Browsers")
        print(f"   • Current: {browsers['summary']['total_count']}")
        print(f"   • Alert if: >20 (possible leak)")
        print(f"   • Check: Janitor is running correctly")

        print(f"\n6️⃣  Error Frequency")
        response = await client.get(f"{CRAWL4AI_BASE_URL}/monitor/logs/errors?limit=10")
        errors = response.json()
        print(f"   • Recent Errors: {len(errors)}")
        print(f"   • Alert if: >10 in last hour")
        print(f"   • Action: Review error patterns")

        print("\n💡 Integration Examples:")
        print("   • Prometheus: Scrape /monitor/health")
        print("   • Alerting: Monitor memory, success rate, latency")
        print("   • Dashboards: WebSocket streaming to custom UI")
        print("   • Log Aggregation: Collect /monitor/logs/* endpoints")


async def demo_8_self_hosting_value():
    """Demo 8: Self-Hosting Value Proposition"""
    print_section(
        "Demo 8: Why Self-Host Crawl4AI?",
        "The value proposition of owning your infrastructure"
    )

    print("🎯 Self-Hosting Benefits:\n")

    print("🔒 Data Privacy & Security")
    print("   • Your data never leaves your infrastructure")
    print("   • No third-party access to crawled content")
    print("   • Keep sensitive workflows behind your firewall")

    print("\n💰 Cost Control")
    print("   • No per-request pricing or rate limits")
    print("   • Predictable infrastructure costs")
    print("   • Scale based on your actual needs")

    print("\n🎯 Full Customization")
    print("   • Complete control over browser configs")
    print("   • Custom hooks and strategies")
    print("   • Tailored monitoring and alerting")

    print("\n📊 Complete Transparency")
    print("   • Real-time monitoring dashboard")
    print("   • Full visibility into system performance")
    print("   • Detailed request and error tracking")

    print("\n⚡ Performance & Flexibility")
    print("   • Direct access, no network overhead")
    print("   • Integrate with existing infrastructure")
    print("   • Custom resource allocation")

    print("\n🛡️ Enterprise-Grade Operations")
    print("   • Prometheus integration ready")
    print("   • WebSocket for real-time dashboards")
    print("   • Full API for automation")
    print("   • Manual controls for troubleshooting")

    print(f"\n🌐 Get Started:")
    print(f"   docker pull unclecode/crawl4ai:0.7.7")
    print(f"   docker run -d -p 11235:11235 --shm-size=1g unclecode/crawl4ai:0.7.7")
    print(f"   # Visit: {MONITOR_DASHBOARD_URL}")


def print_summary():
    """Print comprehensive demo summary"""
    print("\n" + "=" * 70)
    print("📊 DEMO SUMMARY - Crawl4AI v0.7.7")
    print("=" * 70)

    print("\n✨ Features Demonstrated:")
    print("=" * 70)
    print("✅ System Health Overview")
    print("   → Real-time CPU, memory, network, and uptime monitoring")

    print("\n✅ Request Tracking")
    print("   → Active and completed request monitoring with full details")

    print("\n✅ Browser Pool Management")
    print("   → 3-tier architecture: Permanent, Hot, and Cold pools")
    print("   → Automatic promotion and cleanup")

    print("\n✅ Monitor API Endpoints")
    print("   → Complete REST API for programmatic access")
    print("   → Health, requests, browsers, timeline, logs, errors")

    print("\n✅ WebSocket Streaming")
    print("   → Real-time updates every 2 seconds")
    print("   → Build custom dashboards with live data")

    print("\n✅ Control Actions")
    print("   → Manual browser management (kill, restart)")
    print("   → Force cleanup and statistics reset")

    print("\n✅ Production Metrics")
    print("   → 6 critical metrics for operational excellence")
    print("   → Prometheus integration patterns")

    print("\n✅ Self-Hosting Value")
    print("   → Data privacy, cost control, full customization")
    print("   → Enterprise-grade transparency and control")

    print("\n" + "=" * 70)
    print("🎯 What's New in v0.7.7?")
    print("=" * 70)
    print("• 📊 Complete Real-time Monitoring System")
    print("• 🌐 Interactive Web Dashboard (/dashboard)")
    print("• 🔌 Comprehensive Monitor API")
    print("• ⚡ WebSocket Streaming (2-second updates)")
    print("• 🎮 Manual Control Actions")
    print("• 📈 Production Integration Examples")
    print("• 🏭 Prometheus, Alerting, Log Aggregation")
    print("• 🔥 Smart Browser Pool (Permanent/Hot/Cold)")
    print("• 🧹 Automatic Janitor Cleanup")
    print("• 📋 Full Request & Error Tracking")

    print("\n" + "=" * 70)
    print("💡 Why This Matters")
    print("=" * 70)
    print("Before v0.7.7: Docker was just a containerized crawler")
    print("After v0.7.7: Complete self-hosting platform with enterprise monitoring")
    print("\nYou now have:")
    print("  • Full visibility into what's happening inside")
    print("  • Real-time operational dashboards")
    print("  • Complete control over browser resources")
    print("  • Production-ready observability")
    print("  • Zero external dependencies")

    print("\n" + "=" * 70)
    print("📚 Next Steps")
    print("=" * 70)
    print(f"1. Open the dashboard: {MONITOR_DASHBOARD_URL}")
    print("2. Read the docs: https://docs.crawl4ai.com/basic/self-hosting/")
    print("3. Try the Monitor API endpoints yourself")
    print("4. Set up Prometheus integration for production")
    print("5. Build custom dashboards with WebSocket streaming")

    print("\n" + "=" * 70)
    print("🔗 Resources")
    print("=" * 70)
    print(f"• Dashboard: {MONITOR_DASHBOARD_URL}")
    print(f"• Health API: {CRAWL4AI_BASE_URL}/monitor/health")
    print(f"• Documentation: https://docs.crawl4ai.com/")
    print(f"• GitHub: https://github.com/unclecode/crawl4ai")

    print("\n" + "=" * 70)
    print("🎉 You're now in control of your web crawling destiny!")
    print("=" * 70)


async def main():
    """Run all demos"""
    print("\n" + "=" * 70)
    print("🚀 Crawl4AI v0.7.7 Release Demo")
    print("=" * 70)
    print("Feature: Self-Hosting with Real-time Monitoring Dashboard")
    print("=" * 70)

    # Check if server is running
    print("\n🔍 Checking Crawl4AI server...")
    server_running = await check_server_health()

    if not server_running:
        print(f"❌ Cannot connect to Crawl4AI at {CRAWL4AI_BASE_URL}")
        print("\nPlease start the Docker container:")
        print("  docker pull unclecode/crawl4ai:0.7.7")
        print("  docker run -d -p 11235:11235 --shm-size=1g unclecode/crawl4ai:0.7.7")
        print("\nThen re-run this demo.")
        return

    print(f"✅ Crawl4AI server is running!")
    print(f"📊 Dashboard available at: {MONITOR_DASHBOARD_URL}")

    # Run all demos
    demos = [
        demo_1_system_health_overview,
        demo_2_request_tracking,
        demo_3_browser_pool_management,
        demo_4_monitor_api_endpoints,
        demo_5_websocket_streaming,
        demo_6_control_actions,
        demo_7_production_metrics,
        demo_8_self_hosting_value,
    ]

    for i, demo_func in enumerate(demos, 1):
        try:
            await demo_func()

            if i < len(demos):
                await asyncio.sleep(2)  # Brief pause between demos

        except KeyboardInterrupt:
            print(f"\n\n⚠️  Demo interrupted by user")
            return
        except Exception as e:
            print(f"\n❌ Demo {i} error: {e}")
            print("Continuing to next demo...\n")
            continue

    # Print comprehensive summary
    print_summary()

    print("\n" + "=" * 70)
    print("✅ Demo completed!")
    print("=" * 70)


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        print("\n\n👋 Demo stopped by user. Thanks for trying Crawl4AI v0.7.7!")
    except Exception as e:
        print(f"\n\n❌ Demo failed: {e}")
        print("Make sure the Docker container is running:")
        print("  docker run -d -p 11235:11235 --shm-size=1g unclecode/crawl4ai:0.7.7")