diff --git a/deploy/docker/crawler_pool.py b/deploy/docker/crawler_pool.py
index 226e3680..95593b3f 100644
--- a/deploy/docker/crawler_pool.py
+++ b/deploy/docker/crawler_pool.py
@@ -57,6 +57,14 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
if USAGE_COUNT[sig] >= 3:
logger.info(f"โฌ๏ธ Promoting to hot pool (sig={sig[:8]}, count={USAGE_COUNT[sig]})")
HOT_POOL[sig] = COLD_POOL.pop(sig)
+
+ # Track promotion in monitor
+ try:
+ from monitor import get_monitor
+ get_monitor().track_janitor_event("promote", sig, {"count": USAGE_COUNT[sig]})
+ except:
+ pass
+
return HOT_POOL[sig]
logger.info(f"โ๏ธ Using cold pool browser (sig={sig[:8]})")
@@ -124,23 +132,39 @@ async def janitor():
# Clean cold pool
for sig in list(COLD_POOL.keys()):
if now - LAST_USED.get(sig, now) > cold_ttl:
- logger.info(f"๐งน Closing cold browser (sig={sig[:8]}, idle={now - LAST_USED[sig]:.0f}s)")
+ idle_time = now - LAST_USED[sig]
+ logger.info(f"๐งน Closing cold browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
with suppress(Exception):
await COLD_POOL[sig].close()
COLD_POOL.pop(sig, None)
LAST_USED.pop(sig, None)
USAGE_COUNT.pop(sig, None)
+ # Track in monitor
+ try:
+ from monitor import get_monitor
+ get_monitor().track_janitor_event("close_cold", sig, {"idle_seconds": int(idle_time), "ttl": cold_ttl})
+ except:
+ pass
+
# Clean hot pool (more conservative)
for sig in list(HOT_POOL.keys()):
if now - LAST_USED.get(sig, now) > hot_ttl:
- logger.info(f"๐งน Closing hot browser (sig={sig[:8]}, idle={now - LAST_USED[sig]:.0f}s)")
+ idle_time = now - LAST_USED[sig]
+ logger.info(f"๐งน Closing hot browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
with suppress(Exception):
await HOT_POOL[sig].close()
HOT_POOL.pop(sig, None)
LAST_USED.pop(sig, None)
USAGE_COUNT.pop(sig, None)
+ # Track in monitor
+ try:
+ from monitor import get_monitor
+ get_monitor().track_janitor_event("close_hot", sig, {"idle_seconds": int(idle_time), "ttl": hot_ttl})
+ except:
+ pass
+
# Log pool stats
if mem_pct > 60:
logger.info(f"๐ Pool: hot={len(HOT_POOL)}, cold={len(COLD_POOL)}, mem={mem_pct:.1f}%")
diff --git a/deploy/docker/monitor.py b/deploy/docker/monitor.py
new file mode 100644
index 00000000..3735280c
--- /dev/null
+++ b/deploy/docker/monitor.py
@@ -0,0 +1,305 @@
+# monitor.py - Real-time monitoring stats with Redis persistence
+import time
+import json
+import asyncio
+from typing import Dict, List, Optional
+from datetime import datetime, timezone
+from collections import deque
+from redis import asyncio as aioredis
+from utils import get_container_memory_percent
+import psutil
+import logging
+
+logger = logging.getLogger(__name__)
+
+class MonitorStats:
+ """Tracks real-time server stats with Redis persistence."""
+
+ def __init__(self, redis: aioredis.Redis):
+ self.redis = redis
+ self.start_time = time.time()
+
+ # In-memory queues (fast reads, Redis backup)
+ self.active_requests: Dict[str, Dict] = {} # id -> request info
+ self.completed_requests: deque = deque(maxlen=100) # Last 100
+ self.janitor_events: deque = deque(maxlen=100)
+ self.errors: deque = deque(maxlen=100)
+
+ # Endpoint stats (persisted in Redis)
+ self.endpoint_stats: Dict[str, Dict] = {} # endpoint -> {count, total_time, errors, ...}
+
+ # Timeline data (5min window, 5s resolution = 60 points)
+ self.memory_timeline: deque = deque(maxlen=60)
+ self.requests_timeline: deque = deque(maxlen=60)
+ self.browser_timeline: deque = deque(maxlen=60)
+
+ async def track_request_start(self, request_id: str, endpoint: str, url: str, config: Dict = None):
+ """Track new request start."""
+ req_info = {
+ "id": request_id,
+ "endpoint": endpoint,
+ "url": url[:100], # Truncate long URLs
+ "start_time": time.time(),
+ "config_sig": config.get("sig", "default") if config else "default",
+ "mem_start": psutil.Process().memory_info().rss / (1024 * 1024)
+ }
+ self.active_requests[request_id] = req_info
+
+ # Increment endpoint counter
+ if endpoint not in self.endpoint_stats:
+ self.endpoint_stats[endpoint] = {
+ "count": 0, "total_time": 0, "errors": 0,
+ "pool_hits": 0, "success": 0
+ }
+ self.endpoint_stats[endpoint]["count"] += 1
+
+ # Persist to Redis (fire and forget)
+ asyncio.create_task(self._persist_endpoint_stats())
+
+ async def track_request_end(self, request_id: str, success: bool, error: str = None,
+ pool_hit: bool = True, status_code: int = 200):
+ """Track request completion."""
+ if request_id not in self.active_requests:
+ return
+
+ req_info = self.active_requests.pop(request_id)
+ end_time = time.time()
+ elapsed = end_time - req_info["start_time"]
+ mem_end = psutil.Process().memory_info().rss / (1024 * 1024)
+ mem_delta = mem_end - req_info["mem_start"]
+
+ # Update stats
+ endpoint = req_info["endpoint"]
+ if endpoint in self.endpoint_stats:
+ self.endpoint_stats[endpoint]["total_time"] += elapsed
+ if success:
+ self.endpoint_stats[endpoint]["success"] += 1
+ else:
+ self.endpoint_stats[endpoint]["errors"] += 1
+ if pool_hit:
+ self.endpoint_stats[endpoint]["pool_hits"] += 1
+
+ # Add to completed queue
+ completed = {
+ **req_info,
+ "end_time": end_time,
+ "elapsed": round(elapsed, 2),
+ "mem_delta": round(mem_delta, 1),
+ "success": success,
+ "error": error,
+ "status_code": status_code,
+ "pool_hit": pool_hit
+ }
+ self.completed_requests.append(completed)
+
+ # Track errors
+ if not success and error:
+ self.errors.append({
+ "timestamp": end_time,
+ "endpoint": endpoint,
+ "url": req_info["url"],
+ "error": error,
+ "request_id": request_id
+ })
+
+ await self._persist_endpoint_stats()
+
+ def track_janitor_event(self, event_type: str, sig: str, details: Dict):
+ """Track janitor cleanup events."""
+ self.janitor_events.append({
+ "timestamp": time.time(),
+ "type": event_type, # "close_cold", "close_hot", "promote"
+ "sig": sig[:8],
+ "details": details
+ })
+
+ async def update_timeline(self):
+ """Update timeline data points (called every 5s)."""
+ now = time.time()
+ mem_pct = get_container_memory_percent()
+
+ # Count requests in last 5s
+ recent_reqs = sum(1 for req in self.completed_requests
+ if now - req.get("end_time", 0) < 5)
+
+ # Browser counts (need to import from crawler_pool)
+ from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL
+ browser_count = {
+ "permanent": 1 if PERMANENT else 0,
+ "hot": len(HOT_POOL),
+ "cold": len(COLD_POOL)
+ }
+
+ self.memory_timeline.append({"time": now, "value": mem_pct})
+ self.requests_timeline.append({"time": now, "value": recent_reqs})
+ self.browser_timeline.append({"time": now, "browsers": browser_count})
+
+ async def _persist_endpoint_stats(self):
+ """Persist endpoint stats to Redis."""
+ try:
+ await self.redis.set(
+ "monitor:endpoint_stats",
+ json.dumps(self.endpoint_stats),
+ ex=86400 # 24h TTL
+ )
+ except Exception as e:
+ logger.warning(f"Failed to persist endpoint stats: {e}")
+
+ async def load_from_redis(self):
+ """Load persisted stats from Redis."""
+ try:
+ data = await self.redis.get("monitor:endpoint_stats")
+ if data:
+ self.endpoint_stats = json.loads(data)
+ logger.info("Loaded endpoint stats from Redis")
+ except Exception as e:
+ logger.warning(f"Failed to load from Redis: {e}")
+
+ def get_health_summary(self) -> Dict:
+ """Get current system health snapshot."""
+ mem_pct = get_container_memory_percent()
+ cpu_pct = psutil.cpu_percent(interval=0.1)
+
+ # Network I/O (delta since last call)
+ net = psutil.net_io_counters()
+
+ # Pool status
+ from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LAST_USED
+ permanent_mem = 270 if PERMANENT else 0 # Estimate
+ hot_mem = len(HOT_POOL) * 180 # Estimate 180MB per browser
+ cold_mem = len(COLD_POOL) * 180
+
+ return {
+ "container": {
+ "memory_percent": round(mem_pct, 1),
+ "cpu_percent": round(cpu_pct, 1),
+ "network_sent_mb": round(net.bytes_sent / (1024**2), 2),
+ "network_recv_mb": round(net.bytes_recv / (1024**2), 2),
+ "uptime_seconds": int(time.time() - self.start_time)
+ },
+ "pool": {
+ "permanent": {"active": PERMANENT is not None, "memory_mb": permanent_mem},
+ "hot": {"count": len(HOT_POOL), "memory_mb": hot_mem},
+ "cold": {"count": len(COLD_POOL), "memory_mb": cold_mem},
+ "total_memory_mb": permanent_mem + hot_mem + cold_mem
+ },
+ "janitor": {
+ "next_cleanup_estimate": "adaptive", # Would need janitor state
+ "memory_pressure": "LOW" if mem_pct < 60 else "MEDIUM" if mem_pct < 80 else "HIGH"
+ }
+ }
+
+ def get_active_requests(self) -> List[Dict]:
+ """Get list of currently active requests."""
+ now = time.time()
+ return [
+ {
+ **req,
+ "elapsed": round(now - req["start_time"], 1),
+ "status": "running"
+ }
+ for req in self.active_requests.values()
+ ]
+
+ def get_completed_requests(self, limit: int = 50, filter_status: str = "all") -> List[Dict]:
+ """Get recent completed requests."""
+ requests = list(self.completed_requests)[-limit:]
+ if filter_status == "success":
+ requests = [r for r in requests if r.get("success")]
+ elif filter_status == "error":
+ requests = [r for r in requests if not r.get("success")]
+ return requests
+
+ def get_browser_list(self) -> List[Dict]:
+ """Get detailed browser pool information."""
+ from crawler_pool import PERMANENT, HOT_POOL, COLD_POOL, LAST_USED, USAGE_COUNT, DEFAULT_CONFIG_SIG
+
+ browsers = []
+ now = time.time()
+
+ if PERMANENT:
+ browsers.append({
+ "type": "permanent",
+ "sig": DEFAULT_CONFIG_SIG[:8] if DEFAULT_CONFIG_SIG else "unknown",
+ "age_seconds": int(now - self.start_time),
+ "last_used_seconds": int(now - LAST_USED.get(DEFAULT_CONFIG_SIG, now)),
+ "memory_mb": 270,
+ "hits": USAGE_COUNT.get(DEFAULT_CONFIG_SIG, 0),
+ "killable": False
+ })
+
+ for sig, crawler in HOT_POOL.items():
+ browsers.append({
+ "type": "hot",
+ "sig": sig[:8],
+ "age_seconds": int(now - self.start_time), # Approximation
+ "last_used_seconds": int(now - LAST_USED.get(sig, now)),
+ "memory_mb": 180, # Estimate
+ "hits": USAGE_COUNT.get(sig, 0),
+ "killable": True
+ })
+
+ for sig, crawler in COLD_POOL.items():
+ browsers.append({
+ "type": "cold",
+ "sig": sig[:8],
+ "age_seconds": int(now - self.start_time),
+ "last_used_seconds": int(now - LAST_USED.get(sig, now)),
+ "memory_mb": 180,
+ "hits": USAGE_COUNT.get(sig, 0),
+ "killable": True
+ })
+
+ return browsers
+
+ def get_endpoint_stats_summary(self) -> Dict[str, Dict]:
+ """Get aggregated endpoint statistics."""
+ summary = {}
+ for endpoint, stats in self.endpoint_stats.items():
+ count = stats["count"]
+ avg_time = (stats["total_time"] / count) if count > 0 else 0
+ success_rate = (stats["success"] / count * 100) if count > 0 else 0
+ pool_hit_rate = (stats["pool_hits"] / count * 100) if count > 0 else 0
+
+ summary[endpoint] = {
+ "count": count,
+ "avg_latency_ms": round(avg_time * 1000, 1),
+ "success_rate_percent": round(success_rate, 1),
+ "pool_hit_rate_percent": round(pool_hit_rate, 1),
+ "errors": stats["errors"]
+ }
+ return summary
+
+ def get_timeline_data(self, metric: str, window: str = "5m") -> Dict:
+ """Get timeline data for charts."""
+ # For now, only 5m window supported
+ if metric == "memory":
+ data = list(self.memory_timeline)
+ elif metric == "requests":
+ data = list(self.requests_timeline)
+ elif metric == "browsers":
+ data = list(self.browser_timeline)
+ else:
+ return {"timestamps": [], "values": []}
+
+ return {
+ "timestamps": [int(d["time"]) for d in data],
+ "values": [d.get("value", d.get("browsers")) for d in data]
+ }
+
+ def get_janitor_log(self, limit: int = 100) -> List[Dict]:
+ """Get recent janitor events."""
+ return list(self.janitor_events)[-limit:]
+
+ def get_errors_log(self, limit: int = 100) -> List[Dict]:
+ """Get recent errors."""
+ return list(self.errors)[-limit:]
+
+# Global instance (initialized in server.py)
+monitor_stats: Optional[MonitorStats] = None
+
+def get_monitor() -> MonitorStats:
+ """Get global monitor instance."""
+ if monitor_stats is None:
+ raise RuntimeError("Monitor not initialized")
+ return monitor_stats
diff --git a/deploy/docker/monitor_routes.py b/deploy/docker/monitor_routes.py
new file mode 100644
index 00000000..e7451468
--- /dev/null
+++ b/deploy/docker/monitor_routes.py
@@ -0,0 +1,322 @@
+# monitor_routes.py - Monitor API endpoints
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional
+from monitor import get_monitor
+import logging
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/monitor", tags=["monitor"])
+
+
+@router.get("/health")
+async def get_health():
+ """Get current system health snapshot."""
+ try:
+ monitor = get_monitor()
+ return monitor.get_health_summary()
+ except Exception as e:
+ logger.error(f"Error getting health: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/requests")
+async def get_requests(status: str = "all", limit: int = 50):
+ """Get active and completed requests.
+
+ Args:
+ status: Filter by 'active', 'completed', 'success', 'error', or 'all'
+ limit: Max number of completed requests to return (default 50)
+ """
+ try:
+ monitor = get_monitor()
+
+ if status == "active":
+ return {"active": monitor.get_active_requests(), "completed": []}
+ elif status == "completed":
+ return {"active": [], "completed": monitor.get_completed_requests(limit)}
+ elif status in ["success", "error"]:
+ return {"active": [], "completed": monitor.get_completed_requests(limit, status)}
+ else: # "all"
+ return {
+ "active": monitor.get_active_requests(),
+ "completed": monitor.get_completed_requests(limit)
+ }
+ except Exception as e:
+ logger.error(f"Error getting requests: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/browsers")
+async def get_browsers():
+ """Get detailed browser pool information."""
+ try:
+ monitor = get_monitor()
+ browsers = monitor.get_browser_list()
+
+ # Calculate summary stats
+ total_browsers = len(browsers)
+ total_memory = sum(b["memory_mb"] for b in browsers)
+
+ # Calculate reuse rate from recent requests
+ recent = monitor.get_completed_requests(100)
+ pool_hits = sum(1 for r in recent if r.get("pool_hit", False))
+ reuse_rate = (pool_hits / len(recent) * 100) if recent else 0
+
+ return {
+ "browsers": browsers,
+ "summary": {
+ "total_count": total_browsers,
+ "total_memory_mb": total_memory,
+ "reuse_rate_percent": round(reuse_rate, 1)
+ }
+ }
+ except Exception as e:
+ logger.error(f"Error getting browsers: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/endpoints/stats")
+async def get_endpoint_stats():
+ """Get aggregated endpoint statistics."""
+ try:
+ monitor = get_monitor()
+ return monitor.get_endpoint_stats_summary()
+ except Exception as e:
+ logger.error(f"Error getting endpoint stats: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/timeline")
+async def get_timeline(metric: str = "memory", window: str = "5m"):
+ """Get timeline data for charts.
+
+ Args:
+ metric: 'memory', 'requests', or 'browsers'
+ window: Time window (only '5m' supported for now)
+ """
+ try:
+ monitor = get_monitor()
+ return monitor.get_timeline_data(metric, window)
+ except Exception as e:
+ logger.error(f"Error getting timeline: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/logs/janitor")
+async def get_janitor_log(limit: int = 100):
+ """Get recent janitor cleanup events."""
+ try:
+ monitor = get_monitor()
+ return {"events": monitor.get_janitor_log(limit)}
+ except Exception as e:
+ logger.error(f"Error getting janitor log: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.get("/logs/errors")
+async def get_errors_log(limit: int = 100):
+ """Get recent errors."""
+ try:
+ monitor = get_monitor()
+ return {"errors": monitor.get_errors_log(limit)}
+ except Exception as e:
+ logger.error(f"Error getting errors log: {e}")
+ raise HTTPException(500, str(e))
+
+
+# ========== Control Actions ==========
+
+class KillBrowserRequest(BaseModel):
+ sig: str
+
+
+@router.post("/actions/cleanup")
+async def force_cleanup():
+ """Force immediate janitor cleanup (kills idle cold pool browsers)."""
+ try:
+ from crawler_pool import COLD_POOL, LAST_USED, USAGE_COUNT, LOCK
+ import time
+ from contextlib import suppress
+
+ killed_count = 0
+ now = time.time()
+
+ async with LOCK:
+ for sig in list(COLD_POOL.keys()):
+ # Kill all cold pool browsers immediately
+ logger.info(f"๐งน Force cleanup: closing cold browser (sig={sig[:8]})")
+ with suppress(Exception):
+ await COLD_POOL[sig].close()
+ COLD_POOL.pop(sig, None)
+ LAST_USED.pop(sig, None)
+ USAGE_COUNT.pop(sig, None)
+ killed_count += 1
+
+ monitor = get_monitor()
+ monitor.track_janitor_event("force_cleanup", "manual", {"killed": killed_count})
+
+ return {"success": True, "killed_browsers": killed_count}
+ except Exception as e:
+ logger.error(f"Error during force cleanup: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.post("/actions/kill_browser")
+async def kill_browser(req: KillBrowserRequest):
+ """Kill a specific browser by signature (hot or cold only).
+
+ Args:
+ sig: Browser config signature (first 8 chars)
+ """
+ try:
+ from crawler_pool import HOT_POOL, COLD_POOL, LAST_USED, USAGE_COUNT, LOCK, DEFAULT_CONFIG_SIG
+ from contextlib import suppress
+
+ # Find full signature matching prefix
+ target_sig = None
+ pool_type = None
+
+ async with LOCK:
+ # Check hot pool
+ for sig in HOT_POOL.keys():
+ if sig.startswith(req.sig):
+ target_sig = sig
+ pool_type = "hot"
+ break
+
+ # Check cold pool
+ if not target_sig:
+ for sig in COLD_POOL.keys():
+ if sig.startswith(req.sig):
+ target_sig = sig
+ pool_type = "cold"
+ break
+
+ # Check if trying to kill permanent
+ if DEFAULT_CONFIG_SIG and DEFAULT_CONFIG_SIG.startswith(req.sig):
+ raise HTTPException(403, "Cannot kill permanent browser. Use restart instead.")
+
+ if not target_sig:
+ raise HTTPException(404, f"Browser with sig={req.sig} not found")
+
+ # Kill the browser
+ if pool_type == "hot":
+ browser = HOT_POOL.pop(target_sig)
+ else:
+ browser = COLD_POOL.pop(target_sig)
+
+ with suppress(Exception):
+ await browser.close()
+
+ LAST_USED.pop(target_sig, None)
+ USAGE_COUNT.pop(target_sig, None)
+
+ logger.info(f"๐ช Killed {pool_type} browser (sig={target_sig[:8]})")
+
+ monitor = get_monitor()
+ monitor.track_janitor_event("kill_browser", target_sig, {"pool": pool_type, "manual": True})
+
+ return {"success": True, "killed_sig": target_sig[:8], "pool_type": pool_type}
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error killing browser: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.post("/actions/restart_browser")
+async def restart_browser(req: KillBrowserRequest):
+ """Restart a browser (kill + recreate). Works for permanent too.
+
+ Args:
+ sig: Browser config signature (first 8 chars), or "permanent"
+ """
+ try:
+ from crawler_pool import (PERMANENT, HOT_POOL, COLD_POOL, LAST_USED,
+ USAGE_COUNT, LOCK, DEFAULT_CONFIG_SIG, init_permanent)
+ from crawl4ai import AsyncWebCrawler, BrowserConfig
+ from contextlib import suppress
+ import time
+
+ # Handle permanent browser restart
+ if req.sig == "permanent" or (DEFAULT_CONFIG_SIG and DEFAULT_CONFIG_SIG.startswith(req.sig)):
+ async with LOCK:
+ if PERMANENT:
+ with suppress(Exception):
+ await PERMANENT.close()
+
+ # Reinitialize permanent
+ from utils import load_config
+ config = load_config()
+ await init_permanent(BrowserConfig(
+ extra_args=config["crawler"]["browser"].get("extra_args", []),
+ **config["crawler"]["browser"].get("kwargs", {}),
+ ))
+
+ logger.info("๐ Restarted permanent browser")
+ return {"success": True, "restarted": "permanent"}
+
+ # Handle hot/cold browser restart
+ target_sig = None
+ pool_type = None
+ browser_config = None
+
+ async with LOCK:
+ # Find browser
+ for sig in HOT_POOL.keys():
+ if sig.startswith(req.sig):
+ target_sig = sig
+ pool_type = "hot"
+ # Would need to reconstruct config (not stored currently)
+ break
+
+ if not target_sig:
+ for sig in COLD_POOL.keys():
+ if sig.startswith(req.sig):
+ target_sig = sig
+ pool_type = "cold"
+ break
+
+ if not target_sig:
+ raise HTTPException(404, f"Browser with sig={req.sig} not found")
+
+ # Kill existing
+ if pool_type == "hot":
+ browser = HOT_POOL.pop(target_sig)
+ else:
+ browser = COLD_POOL.pop(target_sig)
+
+ with suppress(Exception):
+ await browser.close()
+
+ # Note: We can't easily recreate with same config without storing it
+ # For now, just kill and let new requests create fresh ones
+ LAST_USED.pop(target_sig, None)
+ USAGE_COUNT.pop(target_sig, None)
+
+ logger.info(f"๐ Restarted {pool_type} browser (sig={target_sig[:8]})")
+
+ monitor = get_monitor()
+ monitor.track_janitor_event("restart_browser", target_sig, {"pool": pool_type})
+
+ return {"success": True, "restarted_sig": target_sig[:8], "note": "Browser will be recreated on next request"}
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error restarting browser: {e}")
+ raise HTTPException(500, str(e))
+
+
+@router.post("/stats/reset")
+async def reset_stats():
+ """Reset today's endpoint counters."""
+ try:
+ monitor = get_monitor()
+ monitor.endpoint_stats.clear()
+ await monitor._persist_endpoint_stats()
+
+ return {"success": True, "message": "Endpoint stats reset"}
+ except Exception as e:
+ logger.error(f"Error resetting stats: {e}")
+ raise HTTPException(500, str(e))
diff --git a/deploy/docker/server.py b/deploy/docker/server.py
index 30639852..efb1cecb 100644
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -16,6 +16,7 @@ from fastapi import Request, Depends
from fastapi.responses import FileResponse
import base64
import re
+import logging
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
from api import (
handle_markdown_request, handle_llm_qa,
@@ -112,15 +113,40 @@ AsyncWebCrawler.arun = capped_arun
@asynccontextmanager
async def lifespan(_: FastAPI):
from crawler_pool import init_permanent
+ from monitor import MonitorStats
+ import monitor as monitor_module
+
+ # Initialize monitor
+ monitor_module.monitor_stats = MonitorStats(redis)
+ await monitor_module.monitor_stats.load_from_redis()
+
+ # Initialize browser pool
await init_permanent(BrowserConfig(
extra_args=config["crawler"]["browser"].get("extra_args", []),
**config["crawler"]["browser"].get("kwargs", {}),
))
+
+ # Start background tasks
app.state.janitor = asyncio.create_task(janitor())
+ app.state.timeline_updater = asyncio.create_task(_timeline_updater())
+
yield
+
+ # Cleanup
app.state.janitor.cancel()
+ app.state.timeline_updater.cancel()
await close_all()
+async def _timeline_updater():
+ """Update timeline data every 5 seconds."""
+ from monitor import get_monitor
+ while True:
+ await asyncio.sleep(5)
+ try:
+ await get_monitor().update_timeline()
+ except Exception as e:
+ logger.warning(f"Timeline update error: {e}")
+
# โโโโโโโโโโโโโโโโโโโโโ FastAPI instance โโโโโโโโโโโโโโโโโโโโโโ
app = FastAPI(
title=config["app"]["title"],
@@ -138,6 +164,16 @@ app.mount(
name="play",
)
+# โโ static monitor dashboard โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+MONITOR_DIR = pathlib.Path(__file__).parent / "static" / "monitor"
+if not MONITOR_DIR.exists():
+ raise RuntimeError(f"Monitor assets not found at {MONITOR_DIR}")
+app.mount(
+ "/dashboard",
+ StaticFiles(directory=MONITOR_DIR, html=True),
+ name="monitor_ui",
+)
+
@app.get("/")
async def root():
@@ -221,6 +257,12 @@ def _safe_eval_config(expr: str) -> dict:
# โโ job router โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
app.include_router(init_job_router(redis, config, token_dep))
+# โโ monitor router โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+from monitor_routes import router as monitor_router
+app.include_router(monitor_router)
+
+logger = logging.getLogger(__name__)
+
# โโโโโโโโโโโโโโโโโโโโโโโโ Endpoints โโโโโโโโโโโโโโโโโโโโโโโโโโ
@app.post("/token")
async def get_token(req: TokenRequest):
diff --git a/deploy/docker/static/monitor/index.html b/deploy/docker/static/monitor/index.html
new file mode 100644
index 00000000..2beb9467
--- /dev/null
+++ b/deploy/docker/static/monitor/index.html
@@ -0,0 +1,813 @@
+
+
+
+
+
+ Crawl4AI Monitor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ System Health
+
+
+
+
+
+
+
+
+
+
+
+ Network
+ --
+
+
โฌ0 MB / โฌ0 MB
+
+
+
+
+
+ Uptime
+ --
+
+
Updated: never
+
+
+
+
+
+
+
+ ๐ฅ Permanent:
+ INACTIVE (0MB)
+
+
+ โจ๏ธ Hot:
+ 0 (0MB)
+
+
+ โ๏ธ Cold:
+ 0 (0MB)
+
+
+
+ Janitor: adaptive |
+ Memory pressure: LOW
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Active Requests (0)
+
+
+
+
+
+
+
Recent Completed
+
+
No completed requests
+
+
+
+
+
+
+
+
Browser Pool (0 browsers, 0 MB)
+
+ Reuse rate: --%
+
+
+
+
+
+
+
+ | Type |
+ Signature |
+ Age |
+ Last Used |
+ Memory |
+ Hits |
+ Actions |
+
+
+
+ | No browsers |
+
+
+
+
+
+
+
+
Cleanup Events (Last 100)
+
+
+
+
+
+
Recent Errors (Last 100)
+
+
+
+
+
+
+
+
+
+ Endpoint Analytics
+
+
+
+
+ | Endpoint |
+ Count |
+ Avg Latency |
+ Success% |
+ Pool% |
+
+
+
+ | No data |
+
+
+
+
+
+
+
+
+
Resource Timeline (5min)
+
+
+
+
+
+
+
+
+
+ Control Actions
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/deploy/docker/static/playground/index.html b/deploy/docker/static/playground/index.html
index 553e6765..510a6620 100644
--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -167,11 +167,14 @@
-
-
-
+
+
Monitor
+
+
+
+