diff --git a/deploy/docker/api.py b/deploy/docker/api.py index 605b0c8a..64ac4a85 100644 --- a/deploy/docker/api.py +++ b/deploy/docker/api.py @@ -460,12 +460,22 @@ async def handle_crawl_request( hooks_config: Optional[dict] = None ) -> dict: """Handle non-streaming crawl requests with optional hooks.""" + # Track request start + request_id = f"req_{uuid4().hex[:8]}" + try: + from monitor import get_monitor + await get_monitor().track_request_start( + request_id, "/crawl", urls[0] if urls else "batch", browser_config + ) + except: + pass # Monitor not critical + start_mem_mb = _get_memory_mb() # <--- Get memory before start_time = time.time() mem_delta_mb = None peak_mem_mb = start_mem_mb hook_manager = None - + try: urls = [('https://' + url) if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")) else url for url in urls] browser_config = BrowserConfig.load(browser_config) @@ -570,7 +580,16 @@ async def handle_crawl_request( "server_memory_delta_mb": mem_delta_mb, "server_peak_memory_mb": peak_mem_mb } - + + # Track request completion + try: + from monitor import get_monitor + await get_monitor().track_request_end( + request_id, success=True, pool_hit=True, status_code=200 + ) + except: + pass + # Add hooks information if hooks were used if hooks_config and hook_manager: from hook_manager import UserHookManager @@ -599,6 +618,16 @@ async def handle_crawl_request( except Exception as e: logger.error(f"Crawl error: {str(e)}", exc_info=True) + + # Track request error + try: + from monitor import get_monitor + await get_monitor().track_request_end( + request_id, success=False, error=str(e), status_code=500 + ) + except: + pass + if 'crawler' in locals() and crawler.ready: # Check if crawler was initialized and started # try: # await crawler.close() diff --git a/deploy/docker/server.py b/deploy/docker/server.py index efb1cecb..364f4457 100644 --- a/deploy/docker/server.py +++ b/deploy/docker/server.py @@ -174,6 +174,15 @@ app.mount( name="monitor_ui", ) +# ── static assets (logo, etc) ──────────────────────────────── +ASSETS_DIR = pathlib.Path(__file__).parent / "static" / "assets" +if ASSETS_DIR.exists(): + app.mount( + "/static/assets", + StaticFiles(directory=ASSETS_DIR), + name="assets", + ) + @app.get("/") async def root(): diff --git a/deploy/docker/static/assets/crawl4ai-logo.jpg b/deploy/docker/static/assets/crawl4ai-logo.jpg new file mode 100644 index 00000000..6a808c04 Binary files /dev/null and b/deploy/docker/static/assets/crawl4ai-logo.jpg differ diff --git a/deploy/docker/static/assets/crawl4ai-logo.png b/deploy/docker/static/assets/crawl4ai-logo.png new file mode 100644 index 00000000..ed82a3cc Binary files /dev/null and b/deploy/docker/static/assets/crawl4ai-logo.png differ diff --git a/deploy/docker/static/assets/logo.png b/deploy/docker/static/assets/logo.png new file mode 100644 index 00000000..25911853 Binary files /dev/null and b/deploy/docker/static/assets/logo.png differ diff --git a/deploy/docker/static/monitor/index.html b/deploy/docker/static/monitor/index.html index 2beb9467..f5931fe3 100644 --- a/deploy/docker/static/monitor/index.html +++ b/deploy/docker/static/monitor/index.html @@ -79,7 +79,8 @@

- 📊 Crawl4AI Monitor + Crawl4AI + Monitor GitHub stars @@ -90,7 +91,7 @@
@@ -170,85 +171,78 @@ - -
-
- - - - -
- -
- -
-
-

Active Requests (0)

- + +
+ +
+
+

📝 Requests (0 active)

+ +
+
+
+
No active requests
- -
-
-
No active requests
-
- -

Recent Completed

-
-
No completed requests
-
+

Recent Completed

+
+
No completed requests
+
- -
+ +
@@ -313,34 +307,14 @@ // ========== State Management ========== let autoRefresh = true; let refreshInterval; - const REFRESH_RATE = 5000; // 5 seconds + const REFRESH_RATE = 1000; // 1 second - // ========== Tab Switching ========== - document.querySelectorAll('.activity-tab').forEach(btn => { - btn.addEventListener('click', () => { - const tab = btn.dataset.tab; - - // Update tabs - document.querySelectorAll('.activity-tab').forEach(b => { - b.classList.remove('bg-dark', 'text-primary'); - }); - btn.classList.add('bg-dark', 'text-primary'); - - // Update content - document.querySelectorAll('.activity-content').forEach(c => c.classList.add('hidden')); - document.getElementById(`tab-${tab}`).classList.remove('hidden'); - - // Fetch specific data - if (tab === 'browsers') fetchBrowsers(); - if (tab === 'janitor') fetchJanitorLog(); - if (tab === 'errors') fetchErrors(); - }); - }); + // No more tabs - all sections visible at once! // ========== Auto-refresh Toggle ========== document.getElementById('auto-refresh-toggle').addEventListener('click', function() { autoRefresh = !autoRefresh; - this.textContent = autoRefresh ? 'ON ⚡5s' : 'OFF'; + this.textContent = autoRefresh ? 'ON ⚡1s' : 'OFF'; this.classList.toggle('bg-primary'); this.classList.toggle('bg-dark'); this.classList.toggle('text-dark'); @@ -367,6 +341,9 @@ await Promise.all([ fetchHealth(), fetchRequests(), + fetchBrowsers(), + fetchJanitorLog(), + fetchErrors(), fetchEndpointStats(), fetchTimeline() ]); @@ -475,29 +452,24 @@ const tbody = document.getElementById('browsers-table-body'); if (data.browsers.length === 0) { - tbody.innerHTML = 'No browsers'; + tbody.innerHTML = 'No browsers'; } else { tbody.innerHTML = data.browsers.map(b => { const typeIcon = b.type === 'permanent' ? '🔥' : b.type === 'hot' ? '♨️' : '❄️'; const typeColor = b.type === 'permanent' ? 'text-primary' : b.type === 'hot' ? 'text-accent' : 'text-light'; return ` - - ${typeIcon} ${b.type.toUpperCase()} - ${b.sig} - ${formatSeconds(b.age_seconds)} - ${formatSeconds(b.last_used_seconds)} ago - ${b.memory_mb} MB - ${b.hits} - + + ${typeIcon} + ${b.sig} + ${formatSeconds(b.age_seconds)} + ${formatSeconds(b.last_used_seconds)} + ${b.hits} + ${b.killable ? ` - - + ` : ` - + `} diff --git a/deploy/docker/tests/demo_monitor_dashboard.py b/deploy/docker/tests/demo_monitor_dashboard.py new file mode 100755 index 00000000..699988a5 --- /dev/null +++ b/deploy/docker/tests/demo_monitor_dashboard.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +""" +Monitor Dashboard Demo Script +Generates varied activity to showcase all monitoring features for video recording. +""" +import httpx +import asyncio +import time +from datetime import datetime + +BASE_URL = "http://localhost:11235" + +async def demo_dashboard(): + print("🎬 Monitor Dashboard Demo - Starting...\n") + print(f"📊 Dashboard: {BASE_URL}/dashboard") + print("=" * 60) + + async with httpx.AsyncClient(timeout=60.0) as client: + + # Phase 1: Simple requests (permanent browser) + print("\n🔷 Phase 1: Testing permanent browser pool") + print("-" * 60) + for i in range(5): + print(f" {i+1}/5 Request to /crawl (default config)...") + try: + r = await client.post( + f"{BASE_URL}/crawl", + json={"urls": [f"https://httpbin.org/html?req={i}"], "crawler_config": {}} + ) + print(f" ✅ Status: {r.status_code}, Time: {r.elapsed.total_seconds():.2f}s") + except Exception as e: + print(f" ❌ Error: {e}") + await asyncio.sleep(1) # Small delay between requests + + # Phase 2: Create variant browsers (different configs) + print("\n🔶 Phase 2: Testing cold→hot pool promotion") + print("-" * 60) + viewports = [ + {"width": 1920, "height": 1080}, + {"width": 1280, "height": 720}, + {"width": 800, "height": 600} + ] + + for idx, viewport in enumerate(viewports): + print(f" Viewport {viewport['width']}x{viewport['height']}:") + for i in range(4): # 4 requests each to trigger promotion at 3 + try: + r = await client.post( + f"{BASE_URL}/crawl", + json={ + "urls": [f"https://httpbin.org/json?v={idx}&r={i}"], + "browser_config": {"viewport": viewport}, + "crawler_config": {} + } + ) + print(f" {i+1}/4 ✅ {r.status_code} - Should see cold→hot after 3 uses") + except Exception as e: + print(f" {i+1}/4 ❌ {e}") + await asyncio.sleep(0.5) + + # Phase 3: Concurrent burst (stress pool) + print("\n🔷 Phase 3: Concurrent burst (10 parallel)") + print("-" * 60) + tasks = [] + for i in range(10): + tasks.append( + client.post( + f"{BASE_URL}/crawl", + json={"urls": [f"https://httpbin.org/delay/2?burst={i}"], "crawler_config": {}} + ) + ) + + print(" Sending 10 concurrent requests...") + start = time.time() + results = await asyncio.gather(*tasks, return_exceptions=True) + elapsed = time.time() - start + + successes = sum(1 for r in results if not isinstance(r, Exception) and r.status_code == 200) + print(f" ✅ {successes}/10 succeeded in {elapsed:.2f}s") + + # Phase 4: Multi-endpoint coverage + print("\n🔶 Phase 4: Testing multiple endpoints") + print("-" * 60) + endpoints = [ + ("/md", {"url": "https://httpbin.org/html", "f": "fit", "c": "0"}), + ("/screenshot", {"url": "https://httpbin.org/html"}), + ("/pdf", {"url": "https://httpbin.org/html"}), + ] + + for endpoint, payload in endpoints: + print(f" Testing {endpoint}...") + try: + if endpoint == "/md": + r = await client.post(f"{BASE_URL}{endpoint}", json=payload) + else: + r = await client.post(f"{BASE_URL}{endpoint}", json=payload) + print(f" ✅ {r.status_code}") + except Exception as e: + print(f" ❌ {e}") + await asyncio.sleep(1) + + # Phase 5: Intentional error (to populate errors tab) + print("\n🔷 Phase 5: Generating error examples") + print("-" * 60) + print(" Triggering invalid URL error...") + try: + r = await client.post( + f"{BASE_URL}/crawl", + json={"urls": ["invalid://bad-url"], "crawler_config": {}} + ) + print(f" Response: {r.status_code}") + except Exception as e: + print(f" ✅ Error captured: {type(e).__name__}") + + # Phase 6: Wait for janitor activity + print("\n🔶 Phase 6: Waiting for janitor cleanup...") + print("-" * 60) + print(" Idle for 40s to allow janitor to clean cold pool browsers...") + for i in range(40, 0, -10): + print(f" {i}s remaining... (Check dashboard for cleanup events)") + await asyncio.sleep(10) + + # Phase 7: Final stats check + print("\n🔷 Phase 7: Final dashboard state") + print("-" * 60) + + r = await client.get(f"{BASE_URL}/monitor/health") + health = r.json() + print(f" Memory: {health['container']['memory_percent']:.1f}%") + print(f" Browsers: Perm={health['pool']['permanent']['active']}, " + f"Hot={health['pool']['hot']['count']}, Cold={health['pool']['cold']['count']}") + + r = await client.get(f"{BASE_URL}/monitor/endpoints/stats") + stats = r.json() + print(f"\n Endpoint Stats:") + for endpoint, data in stats.items(): + print(f" {endpoint}: {data['count']} req, " + f"{data['avg_latency_ms']:.0f}ms avg, " + f"{data['success_rate_percent']:.1f}% success") + + r = await client.get(f"{BASE_URL}/monitor/browsers") + browsers = r.json() + print(f"\n Pool Efficiency:") + print(f" Total browsers: {browsers['summary']['total_count']}") + print(f" Memory usage: {browsers['summary']['total_memory_mb']} MB") + print(f" Reuse rate: {browsers['summary']['reuse_rate_percent']:.1f}%") + + print("\n" + "=" * 60) + print("✅ Demo complete! Dashboard is now populated with rich data.") + print(f"\n📹 Recording tip: Refresh {BASE_URL}/dashboard") + print(" You should see:") + print(" • Active & completed requests") + print(" • Browser pool (permanent + hot/cold)") + print(" • Janitor cleanup events") + print(" • Endpoint analytics") + print(" • Memory timeline") + +if __name__ == "__main__": + try: + asyncio.run(demo_dashboard()) + except KeyboardInterrupt: + print("\n\n⚠️ Demo interrupted by user") + except Exception as e: + print(f"\n\n❌ Demo failed: {e}") diff --git a/deploy/docker/tests/test_monitor_demo.py b/deploy/docker/tests/test_monitor_demo.py new file mode 100644 index 00000000..2dbff5b1 --- /dev/null +++ b/deploy/docker/tests/test_monitor_demo.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Quick test to generate monitor dashboard activity""" +import httpx +import asyncio + +async def test_dashboard(): + async with httpx.AsyncClient(timeout=30.0) as client: + print("📊 Generating dashboard activity...") + + # Test 1: Simple crawl + print("\n1️⃣ Running simple crawl...") + r1 = await client.post( + "http://localhost:11235/crawl", + json={"urls": ["https://httpbin.org/html"], "crawler_config": {}} + ) + print(f" Status: {r1.status_code}") + + # Test 2: Multiple URLs + print("\n2️⃣ Running multi-URL crawl...") + r2 = await client.post( + "http://localhost:11235/crawl", + json={ + "urls": [ + "https://httpbin.org/html", + "https://httpbin.org/json" + ], + "crawler_config": {} + } + ) + print(f" Status: {r2.status_code}") + + # Test 3: Check monitor health + print("\n3️⃣ Checking monitor health...") + r3 = await client.get("http://localhost:11235/monitor/health") + health = r3.json() + print(f" Memory: {health['container']['memory_percent']}%") + print(f" Browsers: {health['pool']['permanent']['active']}") + + # Test 4: Check requests + print("\n4️⃣ Checking request log...") + r4 = await client.get("http://localhost:11235/monitor/requests") + reqs = r4.json() + print(f" Active: {len(reqs['active'])}") + print(f" Completed: {len(reqs['completed'])}") + + # Test 5: Check endpoint stats + print("\n5️⃣ Checking endpoint stats...") + r5 = await client.get("http://localhost:11235/monitor/endpoints/stats") + stats = r5.json() + for endpoint, data in stats.items(): + print(f" {endpoint}: {data['count']} requests, {data['avg_latency_ms']}ms avg") + + print("\n✅ Dashboard should now show activity!") + print(f"\n🌐 Open: http://localhost:11235/dashboard") + +if __name__ == "__main__": + asyncio.run(test_dashboard())