feat(monitor): add real-time crawler monitoring system with memory management

Implements a comprehensive monitoring and visualization system for tracking web crawler operations in real-time. The system includes: - Terminal-based dashboard with rich UI for displaying task statuses - Memory pressure monitoring and adaptive dispatch control - Queue statistics and performance metrics tracking - Detailed task progress visualization - Stress testing framework for memory management This addition helps operators track crawler performance and manage memory usage more effectively.
2025-03-12 19:05:24 +08:00
parent 9547bada3a
commit 1630fbdafe
8 changed files with 1956 additions and 321 deletions
--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -33,13 +33,12 @@ from .content_filter_strategy import (
    LLMContentFilter,
    RelevantContentFilter,
 )
-from .models import CrawlResult, MarkdownGenerationResult
+from .models import CrawlResult, MarkdownGenerationResult, DisplayMode
+from .components.crawler_monitor import CrawlerMonitor
 from .async_dispatcher import (
    MemoryAdaptiveDispatcher,
    SemaphoreDispatcher,
    RateLimiter,
-    CrawlerMonitor,
-    DisplayMode,
    BaseDispatcher,
 )
 from .docker_client import Crawl4aiDockerClient
--- a/crawl4ai/async_dispatcher.py
+++ b/crawl4ai/async_dispatcher.py
@@ -4,17 +4,15 @@ from .models import (
    CrawlResult,
    CrawlerTaskResult,
    CrawlStatus,
-    DisplayMode,
-    CrawlStats,
    DomainState,
 )

-from rich.live import Live
-from rich.table import Table
-from rich.console import Console
-from rich import box
-from datetime import timedelta, datetime
+from .components.crawler_monitor import CrawlerMonitor
+
+from .types import AsyncWebCrawler
+
 from collections.abc import AsyncGenerator
+
 import time
 import psutil
 import asyncio
@@ -24,8 +22,6 @@ from urllib.parse import urlparse
 import random
 from abc import ABC, abstractmethod

-from math import inf as infinity
-

 class RateLimiter:
    def __init__(
@@ -87,201 +83,6 @@ class RateLimiter:
        return True


-class CrawlerMonitor:
-    def __init__(
-        self,
-        max_visible_rows: int = 15,
-        display_mode: DisplayMode = DisplayMode.DETAILED,
-    ):
-        self.console = Console()
-        self.max_visible_rows = max_visible_rows
-        self.display_mode = display_mode
-        self.stats: Dict[str, CrawlStats] = {}
-        self.process = psutil.Process()
-        self.start_time = time.time()
-        self.live = Live(self._create_table(), refresh_per_second=2)
-
-    def start(self):
-        self.live.start()
-
-    def stop(self):
-        self.live.stop()
-
-    def add_task(self, task_id: str, url: str):
-        self.stats[task_id] = CrawlStats(
-            task_id=task_id, url=url, status=CrawlStatus.QUEUED
-        )
-        self.live.update(self._create_table())
-
-    def update_task(self, task_id: str, **kwargs):
-        if task_id in self.stats:
-            for key, value in kwargs.items():
-                setattr(self.stats[task_id], key, value)
-            self.live.update(self._create_table())
-
-    def _create_aggregated_table(self) -> Table:
-        """Creates a compact table showing only aggregated statistics"""
-        table = Table(
-            box=box.ROUNDED,
-            title="Crawler Status Overview",
-            title_style="bold magenta",
-            header_style="bold blue",
-            show_lines=True,
-        )
-
-        # Calculate statistics
-        total_tasks = len(self.stats)
-        queued = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.QUEUED
-        )
-        in_progress = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.IN_PROGRESS
-        )
-        completed = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.COMPLETED
-        )
-        failed = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.FAILED
-        )
-
-        # Memory statistics
-        current_memory = self.process.memory_info().rss / (1024 * 1024)
-        total_task_memory = sum(stat.memory_usage for stat in self.stats.values())
-        peak_memory = max(
-            (stat.peak_memory for stat in self.stats.values()), default=0.0
-        )
-
-        # Duration
-        duration = time.time() - self.start_time
-
-        # Create status row
-        table.add_column("Status", style="bold cyan")
-        table.add_column("Count", justify="right")
-        table.add_column("Percentage", justify="right")
-
-        table.add_row("Total Tasks", str(total_tasks), "100%")
-        table.add_row(
-            "[yellow]In Queue[/yellow]",
-            str(queued),
-            f"{(queued / total_tasks * 100):.1f}%" if total_tasks > 0 else "0%",
-        )
-        table.add_row(
-            "[blue]In Progress[/blue]",
-            str(in_progress),
-            f"{(in_progress / total_tasks * 100):.1f}%" if total_tasks > 0 else "0%",
-        )
-        table.add_row(
-            "[green]Completed[/green]",
-            str(completed),
-            f"{(completed / total_tasks * 100):.1f}%" if total_tasks > 0 else "0%",
-        )
-        table.add_row(
-            "[red]Failed[/red]",
-            str(failed),
-            f"{(failed / total_tasks * 100):.1f}%" if total_tasks > 0 else "0%",
-        )
-
-        # Add memory information
-        table.add_section()
-        table.add_row(
-            "[magenta]Current Memory[/magenta]", f"{current_memory:.1f} MB", ""
-        )
-        table.add_row(
-            "[magenta]Total Task Memory[/magenta]", f"{total_task_memory:.1f} MB", ""
-        )
-        table.add_row(
-            "[magenta]Peak Task Memory[/magenta]", f"{peak_memory:.1f} MB", ""
-        )
-        table.add_row(
-            "[yellow]Runtime[/yellow]",
-            str(timedelta(seconds=int(duration))),
-            "",
-        )
-
-        return table
-
-    def _create_detailed_table(self) -> Table:
-        table = Table(
-            box=box.ROUNDED,
-            title="Crawler Performance Monitor",
-            title_style="bold magenta",
-            header_style="bold blue",
-        )
-
-        # Add columns
-        table.add_column("Task ID", style="cyan", no_wrap=True)
-        table.add_column("URL", style="cyan", no_wrap=True)
-        table.add_column("Status", style="bold")
-        table.add_column("Memory (MB)", justify="right")
-        table.add_column("Peak (MB)", justify="right")
-        table.add_column("Duration", justify="right")
-        table.add_column("Info", style="italic")
-
-        # Add summary row
-        total_memory = sum(stat.memory_usage for stat in self.stats.values())
-        active_count = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.IN_PROGRESS
-        )
-        completed_count = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.COMPLETED
-        )
-        failed_count = sum(
-            1 for stat in self.stats.values() if stat.status == CrawlStatus.FAILED
-        )
-
-        table.add_row(
-            "[bold yellow]SUMMARY",
-            f"Total: {len(self.stats)}",
-            f"Active: {active_count}",
-            f"{total_memory:.1f}",
-            f"{self.process.memory_info().rss / (1024 * 1024):.1f}",
-            str(
-                timedelta(
-                    seconds=int(time.time() - self.start_time)
-                )
-            ),
-            f"✓{completed_count} ✗{failed_count}",
-            style="bold",
-        )
-
-        table.add_section()
-
-        # Add rows for each task
-        visible_stats = sorted(
-            self.stats.values(),
-            key=lambda x: (
-                x.status != CrawlStatus.IN_PROGRESS,
-                x.status != CrawlStatus.QUEUED,
-                x.end_time or infinity,
-            ),
-        )[: self.max_visible_rows]
-
-        for stat in visible_stats:
-            status_style = {
-                CrawlStatus.QUEUED: "white",
-                CrawlStatus.IN_PROGRESS: "yellow",
-                CrawlStatus.COMPLETED: "green",
-                CrawlStatus.FAILED: "red",
-            }[stat.status]
-
-            table.add_row(
-                stat.task_id[:8],  # Show first 8 chars of task ID
-                stat.url[:40] + "..." if len(stat.url) > 40 else stat.url,
-                f"[{status_style}]{stat.status.value}[/{status_style}]",
-                f"{stat.memory_usage:.1f}",
-                f"{stat.peak_memory:.1f}",
-                stat.duration,
-                stat.error_message[:40] if stat.error_message else "",
-            )
-
-        return table
-
-    def _create_table(self) -> Table:
-        """Creates the appropriate table based on display mode"""
-        if self.display_mode == DisplayMode.AGGREGATED:
-            return self._create_aggregated_table()
-        return self._create_detailed_table()
-

 class BaseDispatcher(ABC):
    def __init__(
@@ -309,7 +110,7 @@ class BaseDispatcher(ABC):
    async def run_urls(
        self,
        urls: List[str],
-        crawler: "AsyncWebCrawler",  # noqa: F821
+        crawler: AsyncWebCrawler,  # noqa: F821
        config: CrawlerRunConfig,
        monitor: Optional[CrawlerMonitor] = None,
    ) -> List[CrawlerTaskResult]:
@@ -320,71 +121,144 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
    def __init__(
        self,
        memory_threshold_percent: float = 90.0,
+        critical_threshold_percent: float = 95.0,  # New critical threshold
+        recovery_threshold_percent: float = 85.0,  # New recovery threshold
        check_interval: float = 1.0,
        max_session_permit: int = 20,
-        memory_wait_timeout: float = 300.0,  # 5 minutes default timeout
+        fairness_timeout: float = 600.0,  # 10 minutes before prioritizing long-waiting URLs
        rate_limiter: Optional[RateLimiter] = None,
        monitor: Optional[CrawlerMonitor] = None,
    ):
        super().__init__(rate_limiter, monitor)
        self.memory_threshold_percent = memory_threshold_percent
+        self.critical_threshold_percent = critical_threshold_percent
+        self.recovery_threshold_percent = recovery_threshold_percent
        self.check_interval = check_interval
        self.max_session_permit = max_session_permit
-        self.memory_wait_timeout = memory_wait_timeout
-        self.result_queue = asyncio.Queue()  # Queue for storing results
-
+        self.fairness_timeout = fairness_timeout
+        self.result_queue = asyncio.Queue()
+        self.task_queue = asyncio.PriorityQueue()  # Priority queue for better management
+        self.memory_pressure_mode = False  # Flag to indicate when we're in memory pressure mode
+        self.current_memory_percent = 0.0  # Track current memory usage
+        
+    async def _memory_monitor_task(self):
+        """Background task to continuously monitor memory usage and update state"""
+        while True:
+            self.current_memory_percent = psutil.virtual_memory().percent
+            
+            # Enter memory pressure mode if we cross the threshold
+            if not self.memory_pressure_mode and self.current_memory_percent >= self.memory_threshold_percent:
+                self.memory_pressure_mode = True
+                if self.monitor:
+                    self.monitor.update_memory_status("PRESSURE")
+            
+            # Exit memory pressure mode if we go below recovery threshold
+            elif self.memory_pressure_mode and self.current_memory_percent <= self.recovery_threshold_percent:
+                self.memory_pressure_mode = False
+                if self.monitor:
+                    self.monitor.update_memory_status("NORMAL")
+            
+            # In critical mode, we might need to take more drastic action
+            if self.current_memory_percent >= self.critical_threshold_percent:
+                if self.monitor:
+                    self.monitor.update_memory_status("CRITICAL")
+                # We could implement additional memory-saving measures here
+                
+            await asyncio.sleep(self.check_interval)
+    
+    def _get_priority_score(self, wait_time: float, retry_count: int) -> float:
+        """Calculate priority score (lower is higher priority)
+        - URLs waiting longer than fairness_timeout get higher priority
+        - More retry attempts decreases priority
+        """
+        if wait_time > self.fairness_timeout:
+            # High priority for long-waiting URLs
+            return -wait_time
+        # Standard priority based on retries
+        return retry_count
+    
    async def crawl_url(
        self,
        url: str,
        config: CrawlerRunConfig,
        task_id: str,
+        retry_count: int = 0,
    ) -> CrawlerTaskResult:
        start_time = time.time()
        error_message = ""
        memory_usage = peak_memory = 0.0
-
+        
+        # Get starting memory for accurate measurement
+        process = psutil.Process()
+        start_memory = process.memory_info().rss / (1024 * 1024)
+        
        try:
            if self.monitor:
                self.monitor.update_task(
-                    task_id, status=CrawlStatus.IN_PROGRESS, start_time=start_time
+                    task_id, 
+                    status=CrawlStatus.IN_PROGRESS, 
+                    start_time=start_time,
+                    retry_count=retry_count
                )
+                
            self.concurrent_sessions += 1
-
+            
            if self.rate_limiter:
                await self.rate_limiter.wait_if_needed(url)
-
-            process = psutil.Process()
-            start_memory = process.memory_info().rss / (1024 * 1024)
+                
+            # Check if we're in critical memory state
+            if self.current_memory_percent >= self.critical_threshold_percent:
+                # Requeue this task with increased priority and retry count
+                enqueue_time = time.time()
+                priority = self._get_priority_score(enqueue_time - start_time, retry_count + 1)
+                await self.task_queue.put((priority, (url, task_id, retry_count + 1, enqueue_time)))
+                
+                # Update monitoring
+                if self.monitor:
+                    self.monitor.update_task(
+                        task_id,
+                        status=CrawlStatus.QUEUED,
+                        error_message="Requeued due to critical memory pressure"
+                    )
+                
+                # Return placeholder result with requeued status
+                return CrawlerTaskResult(
+                    task_id=task_id,
+                    url=url,
+                    result=CrawlResult(
+                        url=url, html="", metadata={"status": "requeued"}, 
+                        success=False, error_message="Requeued due to critical memory pressure"
+                    ),
+                    memory_usage=0,
+                    peak_memory=0,
+                    start_time=start_time,
+                    end_time=time.time(),
+                    error_message="Requeued due to critical memory pressure",
+                    retry_count=retry_count + 1
+                )
+            
+            # Execute the crawl
            result = await self.crawler.arun(url, config=config, session_id=task_id)
+            
+            # Measure memory usage
            end_memory = process.memory_info().rss / (1024 * 1024)
-
            memory_usage = peak_memory = end_memory - start_memory
-
+            
+            # Handle rate limiting
            if self.rate_limiter and result.status_code:
                if not self.rate_limiter.update_delay(url, result.status_code):
                    error_message = f"Rate limit retry count exceeded for domain {urlparse(url).netloc}"
                    if self.monitor:
                        self.monitor.update_task(task_id, status=CrawlStatus.FAILED)
-                    result = CrawlerTaskResult(
-                        task_id=task_id,
-                        url=url,
-                        result=result,
-                        memory_usage=memory_usage,
-                        peak_memory=peak_memory,
-                        start_time=start_time,
-                        end_time=time.time(),
-                        error_message=error_message,
-                    )
-                    await self.result_queue.put(result)
-                    return result
-
+                        
+            # Update status based on result
            if not result.success:
                error_message = result.error_message
                if self.monitor:
                    self.monitor.update_task(task_id, status=CrawlStatus.FAILED)
            elif self.monitor:
                self.monitor.update_task(task_id, status=CrawlStatus.COMPLETED)
-
+                
        except Exception as e:
            error_message = str(e)
            if self.monitor:
@@ -392,7 +266,7 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
            result = CrawlResult(
                url=url, html="", metadata={}, success=False, error_message=str(e)
            )
-
+            
        finally:
            end_time = time.time()
            if self.monitor:
@@ -402,9 +276,10 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
                    memory_usage=memory_usage,
                    peak_memory=peak_memory,
                    error_message=error_message,
+                    retry_count=retry_count
                )
            self.concurrent_sessions -= 1
-
+            
        return CrawlerTaskResult(
            task_id=task_id,
            url=url,
@@ -414,116 +289,240 @@ class MemoryAdaptiveDispatcher(BaseDispatcher):
            start_time=start_time,
            end_time=end_time,
            error_message=error_message,
+            retry_count=retry_count
        )
-
+        
    async def run_urls(
        self,
        urls: List[str],
-        crawler: "AsyncWebCrawler",  # noqa: F821
+        crawler: AsyncWebCrawler,
        config: CrawlerRunConfig,
    ) -> List[CrawlerTaskResult]:
        self.crawler = crawler
-
+        
+        # Start the memory monitor task
+        memory_monitor = asyncio.create_task(self._memory_monitor_task())
+        
        if self.monitor:
            self.monitor.start()
-
+            
+        results = []
+        
        try:
-            pending_tasks = []
-            active_tasks = []
-            task_queue = []
-
-            for url in urls:
-                task_id = str(uuid.uuid4())
-                if self.monitor:
-                    self.monitor.add_task(task_id, url)
-                task_queue.append((url, task_id))
-
-            while task_queue or active_tasks:
-                wait_start_time = time.time()
-                while len(active_tasks) < self.max_session_permit and task_queue:
-                    if psutil.virtual_memory().percent >= self.memory_threshold_percent:
-                        # Check if we've exceeded the timeout
-                        if time.time() - wait_start_time > self.memory_wait_timeout:
-                            raise MemoryError(
-                                f"Memory usage above threshold ({self.memory_threshold_percent}%) for more than {self.memory_wait_timeout} seconds"
-                            )
-                        await asyncio.sleep(self.check_interval)
-                        continue
-
-                    url, task_id = task_queue.pop(0)
-                    task = asyncio.create_task(self.crawl_url(url, config, task_id))
-                    active_tasks.append(task)
-
-                if not active_tasks:
-                    await asyncio.sleep(self.check_interval)
-                    continue
-
-                done, pending = await asyncio.wait(
-                    active_tasks, return_when=asyncio.FIRST_COMPLETED
-                )
-
-                pending_tasks.extend(done)
-                active_tasks = list(pending)
-
-            return await asyncio.gather(*pending_tasks)
-        finally:
-            if self.monitor:
-                self.monitor.stop()
-
-    async def run_urls_stream(
-        self,
-        urls: List[str],
-        crawler: "AsyncWebCrawler", # noqa: F821
-        config: CrawlerRunConfig,
-    ) -> AsyncGenerator[CrawlerTaskResult, None]:
-        self.crawler = crawler
-        if self.monitor:
-            self.monitor.start()
-
-        try:
-            active_tasks = []
-            task_queue = []
-            completed_count = 0
-            total_urls = len(urls)
-
            # Initialize task queue
            for url in urls:
                task_id = str(uuid.uuid4())
                if self.monitor:
                    self.monitor.add_task(task_id, url)
-                task_queue.append((url, task_id))
-
-            while completed_count < total_urls:
-                # Start new tasks if memory permits
-                while len(active_tasks) < self.max_session_permit and task_queue:
-                    if psutil.virtual_memory().percent >= self.memory_threshold_percent:
-                        await asyncio.sleep(self.check_interval)
-                        continue
-
-                    url, task_id = task_queue.pop(0)
-                    task = asyncio.create_task(self.crawl_url(url, config, task_id))
-                    active_tasks.append(task)
-
-                if not active_tasks and not task_queue:
-                    break
-
-                # Wait for any task to complete and yield results
+                # Add to queue with initial priority 0, retry count 0, and current time
+                await self.task_queue.put((0, (url, task_id, 0, time.time())))
+                
+            active_tasks = []
+            
+            # Process until both queues are empty
+            while not self.task_queue.empty() or active_tasks:
+                # If memory pressure is low, start new tasks
+                if not self.memory_pressure_mode and len(active_tasks) < self.max_session_permit:
+                    try:
+                        # Try to get a task with timeout to avoid blocking indefinitely
+                        priority, (url, task_id, retry_count, enqueue_time) = await asyncio.wait_for(
+                            self.task_queue.get(), timeout=0.1
+                        )
+                        
+                        # Create and start the task
+                        task = asyncio.create_task(
+                            self.crawl_url(url, config, task_id, retry_count)
+                        )
+                        active_tasks.append(task)
+                        
+                        # Update waiting time in monitor
+                        if self.monitor:
+                            wait_time = time.time() - enqueue_time
+                            self.monitor.update_task(
+                                task_id, 
+                                wait_time=wait_time,
+                                status=CrawlStatus.IN_PROGRESS
+                            )
+                            
+                    except asyncio.TimeoutError:
+                        # No tasks in queue, that's fine
+                        pass
+                        
+                # Wait for completion even if queue is starved
                if active_tasks:
                    done, pending = await asyncio.wait(
                        active_tasks, timeout=0.1, return_when=asyncio.FIRST_COMPLETED
                    )
+                    
+                    # Process completed tasks
                    for completed_task in done:
                        result = await completed_task
-                        completed_count += 1
-                        yield result
+                        results.append(result)
+                        
+                    # Update active tasks list
                    active_tasks = list(pending)
                else:
-                    await asyncio.sleep(self.check_interval)
+                    # If no active tasks but still waiting, sleep briefly
+                    await asyncio.sleep(self.check_interval / 2)
+                    
+                # Update priorities for waiting tasks if needed
+                await self._update_queue_priorities()
+                
+            return results

+        except Exception as e:
+            if self.monitor:
+                self.monitor.update_memory_status(f"QUEUE_ERROR: {str(e)}")                
+        
        finally:
+            # Clean up
+            memory_monitor.cancel()
            if self.monitor:
                self.monitor.stop()
-
+                
+    async def _update_queue_priorities(self):
+        """Periodically update priorities of items in the queue to prevent starvation"""
+        # Skip if queue is empty
+        if self.task_queue.empty():
+            return
+            
+        # Use a drain-and-refill approach to update all priorities
+        temp_items = []
+        
+        # Drain the queue (with a safety timeout to prevent blocking)
+        try:
+            drain_start = time.time()
+            while not self.task_queue.empty() and time.time() - drain_start < 5.0:  # 5 second safety timeout
+                try:
+                    # Get item from queue with timeout
+                    priority, (url, task_id, retry_count, enqueue_time) = await asyncio.wait_for(
+                        self.task_queue.get(), timeout=0.1
+                    )
+                    
+                    # Calculate new priority based on current wait time
+                    current_time = time.time()
+                    wait_time = current_time - enqueue_time
+                    new_priority = self._get_priority_score(wait_time, retry_count)
+                    
+                    # Store with updated priority
+                    temp_items.append((new_priority, (url, task_id, retry_count, enqueue_time)))
+                    
+                    # Update monitoring stats for this task
+                    if self.monitor and task_id in self.monitor.stats:
+                        self.monitor.update_task(task_id, wait_time=wait_time)
+                        
+                except asyncio.TimeoutError:
+                    # Queue might be empty or very slow
+                    break
+        except Exception as e:
+            # If anything goes wrong, make sure we refill the queue with what we've got
+            self.monitor.update_memory_status(f"QUEUE_ERROR: {str(e)}")
+        
+        # Calculate queue statistics
+        if temp_items and self.monitor:
+            total_queued = len(temp_items)
+            wait_times = [item[1][3] for item in temp_items]
+            highest_wait_time = time.time() - min(wait_times) if wait_times else 0
+            avg_wait_time = sum(time.time() - t for t in wait_times) / len(wait_times) if wait_times else 0
+            
+            # Update queue statistics in monitor
+            self.monitor.update_queue_statistics(
+                total_queued=total_queued,
+                highest_wait_time=highest_wait_time,
+                avg_wait_time=avg_wait_time
+            )
+        
+        # Sort by priority (lowest number = highest priority)
+        temp_items.sort(key=lambda x: x[0])
+        
+        # Refill the queue with updated priorities
+        for item in temp_items:
+            await self.task_queue.put(item)
+                
+    async def run_urls_stream(
+        self,
+        urls: List[str],
+        crawler: AsyncWebCrawler,
+        config: CrawlerRunConfig,
+    ) -> AsyncGenerator[CrawlerTaskResult, None]:
+        self.crawler = crawler
+        
+        # Start the memory monitor task
+        memory_monitor = asyncio.create_task(self._memory_monitor_task())
+        
+        if self.monitor:
+            self.monitor.start()
+            
+        try:
+            # Initialize task queue
+            for url in urls:
+                task_id = str(uuid.uuid4())
+                if self.monitor:
+                    self.monitor.add_task(task_id, url)
+                # Add to queue with initial priority 0, retry count 0, and current time
+                await self.task_queue.put((0, (url, task_id, 0, time.time())))
+                
+            active_tasks = []
+            completed_count = 0
+            total_urls = len(urls)
+            
+            while completed_count < total_urls:
+                # If memory pressure is low, start new tasks
+                if not self.memory_pressure_mode and len(active_tasks) < self.max_session_permit:
+                    try:
+                        # Try to get a task with timeout
+                        priority, (url, task_id, retry_count, enqueue_time) = await asyncio.wait_for(
+                            self.task_queue.get(), timeout=0.1
+                        )
+                        
+                        # Create and start the task
+                        task = asyncio.create_task(
+                            self.crawl_url(url, config, task_id, retry_count)
+                        )
+                        active_tasks.append(task)
+                        
+                        # Update waiting time in monitor
+                        if self.monitor:
+                            wait_time = time.time() - enqueue_time
+                            self.monitor.update_task(
+                                task_id, 
+                                wait_time=wait_time,
+                                status=CrawlStatus.IN_PROGRESS
+                            )
+                            
+                    except asyncio.TimeoutError:
+                        # No tasks in queue, that's fine
+                        pass
+                        
+                # Process completed tasks and yield results
+                if active_tasks:
+                    done, pending = await asyncio.wait(
+                        active_tasks, timeout=0.1, return_when=asyncio.FIRST_COMPLETED
+                    )
+                    
+                    for completed_task in done:
+                        result = await completed_task
+                        
+                        # Only count as completed if it wasn't requeued
+                        if "requeued" not in result.error_message:
+                            completed_count += 1
+                            yield result
+                        
+                    # Update active tasks list
+                    active_tasks = list(pending)
+                else:
+                    # If no active tasks but still waiting, sleep briefly
+                    await asyncio.sleep(self.check_interval / 2)
+                
+                # Update priorities for waiting tasks if needed
+                await self._update_queue_priorities()
+                
+        finally:
+            # Clean up
+            memory_monitor.cancel()
+            if self.monitor:
+                self.monitor.stop()
+                

 class SemaphoreDispatcher(BaseDispatcher):
    def __init__(
@@ -620,7 +619,7 @@ class SemaphoreDispatcher(BaseDispatcher):

    async def run_urls(
        self,
-        crawler: "AsyncWebCrawler",  # noqa: F821
+        crawler: AsyncWebCrawler,  # noqa: F821
        urls: List[str],
        config: CrawlerRunConfig,
    ) -> List[CrawlerTaskResult]:
@@ -644,4 +643,4 @@ class SemaphoreDispatcher(BaseDispatcher):
            return await asyncio.gather(*tasks, return_exceptions=True)
        finally:
            if self.monitor:
-                self.monitor.stop()
+                self.monitor.stop()
--- a/crawl4ai/components/crawler_monitor.py
+++ b/crawl4ai/components/crawler_monitor.py
@@ -0,0 +1,837 @@
+import time
+import uuid
+import threading
+import psutil
+from datetime import datetime, timedelta
+from typing import Dict, Optional, List
+import threading
+from rich.console import Console
+from rich.layout import Layout
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+from rich.live import Live
+from rich import box
+from ..models import CrawlStatus
+
+class TerminalUI:
+    """Terminal user interface for CrawlerMonitor using rich library."""
+    
+    def __init__(self, refresh_rate: float = 1.0, max_width: int = 120):
+        """
+        Initialize the terminal UI.
+        
+        Args:
+            refresh_rate: How often to refresh the UI (in seconds)
+            max_width: Maximum width of the UI in characters
+        """
+        self.console = Console(width=max_width)
+        self.layout = Layout()
+        self.refresh_rate = refresh_rate
+        self.stop_event = threading.Event()
+        self.ui_thread = None
+        self.monitor = None  # Will be set by CrawlerMonitor
+        self.max_width = max_width
+        
+        # Setup layout - vertical layout (top to bottom)
+        self.layout.split(
+            Layout(name="header", size=3),
+            Layout(name="pipeline_status", size=10),
+            Layout(name="task_details", ratio=1),
+            Layout(name="footer", size=3)  # Increased footer size to fit all content
+        )
+        
+    def start(self, monitor):
+        """Start the UI thread."""
+        self.monitor = monitor
+        self.stop_event.clear()
+        self.ui_thread = threading.Thread(target=self._ui_loop)
+        self.ui_thread.daemon = True
+        self.ui_thread.start()
+        
+    def stop(self):
+        """Stop the UI thread."""
+        if self.ui_thread and self.ui_thread.is_alive():
+            self.stop_event.set()
+            # Only try to join if we're not in the UI thread
+            # This prevents "cannot join current thread" errors
+            if threading.current_thread() != self.ui_thread:
+                self.ui_thread.join(timeout=5.0)
+    
+    def _ui_loop(self):
+        """Main UI rendering loop."""
+        import sys
+        import select
+        import termios
+        import tty
+        
+        # Setup terminal for non-blocking input
+        old_settings = termios.tcgetattr(sys.stdin)
+        try:
+            tty.setcbreak(sys.stdin.fileno())
+            
+            # Use Live display to render the UI
+            with Live(self.layout, refresh_per_second=1/self.refresh_rate, screen=True) as live:
+                self.live = live  # Store the live display for updates
+                
+                # Main UI loop
+                while not self.stop_event.is_set():
+                    self._update_display()
+                    
+                    # Check for key press (non-blocking)
+                    if select.select([sys.stdin], [], [], 0)[0]:
+                        key = sys.stdin.read(1)
+                        # Check for 'q' to quit
+                        if key == 'q':
+                            # Signal stop but don't call monitor.stop() from UI thread
+                            # as it would cause the thread to try to join itself
+                            self.stop_event.set()
+                            self.monitor.is_running = False
+                            break
+                    
+                    time.sleep(self.refresh_rate)
+                    
+                    # Just check if the monitor was stopped
+                    if not self.monitor.is_running:
+                        break
+        finally:
+            # Restore terminal settings
+            termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
+    
+    def _update_display(self):
+        """Update the terminal display with current statistics."""
+        if not self.monitor:
+            return
+            
+        # Update crawler status panel
+        self.layout["header"].update(self._create_status_panel())
+        
+        # Update pipeline status panel and task details panel
+        self.layout["pipeline_status"].update(self._create_pipeline_panel())
+        self.layout["task_details"].update(self._create_task_details_panel())
+        
+        # Update footer
+        self.layout["footer"].update(self._create_footer())
+    
+    def _create_status_panel(self) -> Panel:
+        """Create the crawler status panel."""
+        summary = self.monitor.get_summary()
+        
+        # Format memory status with icon
+        memory_status = self.monitor.get_memory_status()
+        memory_icon = "🟢"  # Default NORMAL
+        if memory_status == "PRESSURE":
+            memory_icon = "🟠"
+        elif memory_status == "CRITICAL":
+            memory_icon = "🔴"
+        
+        # Get current memory usage
+        current_memory = psutil.Process().memory_info().rss / (1024 * 1024)  # MB
+        memory_percent = (current_memory / psutil.virtual_memory().total) * 100
+        
+        # Format runtime
+        runtime = self.monitor._format_time(time.time() - self.monitor.start_time if self.monitor.start_time else 0)
+        
+        # Create the status text
+        status_text = Text()
+        status_text.append(f"Web Crawler Dashboard | Runtime: {runtime} | Memory: {memory_percent:.1f}% {memory_icon}\n")
+        status_text.append(f"Status: {memory_status} | URLs: {summary['urls_completed']}/{summary['urls_total']} | ")
+        status_text.append(f"Peak Mem: {summary['peak_memory_percent']:.1f}% at {self.monitor._format_time(summary['peak_memory_time'])}")
+        
+        return Panel(status_text, title="Crawler Status", border_style="blue")
+    
+    def _create_pipeline_panel(self) -> Panel:
+        """Create the pipeline status panel."""
+        summary = self.monitor.get_summary()
+        queue_stats = self.monitor.get_queue_stats()
+        
+        # Create a table for status counts
+        table = Table(show_header=True, box=None)
+        table.add_column("Status", style="cyan")
+        table.add_column("Count", justify="right")
+        table.add_column("Percentage", justify="right")
+        table.add_column("Stat", style="cyan")
+        table.add_column("Value", justify="right")
+        
+        # Calculate overall progress
+        progress = f"{summary['urls_completed']}/{summary['urls_total']}"
+        progress_percent = f"{summary['completion_percentage']:.1f}%"
+        
+        # Add rows for each status
+        table.add_row(
+            "Overall Progress", 
+            progress, 
+            progress_percent,
+            "Est. Completion", 
+            summary.get('estimated_completion_time', "N/A")
+        )
+        
+        # Add rows for each status
+        status_counts = summary['status_counts']
+        total = summary['urls_total'] or 1  # Avoid division by zero
+        
+        # Status rows
+        table.add_row(
+            "Completed", 
+            str(status_counts.get(CrawlStatus.COMPLETED.name, 0)),
+            f"{status_counts.get(CrawlStatus.COMPLETED.name, 0) / total * 100:.1f}%",
+            "Avg. Time/URL",
+            f"{summary.get('avg_task_duration', 0):.2f}s"
+        )
+        
+        table.add_row(
+            "Failed", 
+            str(status_counts.get(CrawlStatus.FAILED.name, 0)),
+            f"{status_counts.get(CrawlStatus.FAILED.name, 0) / total * 100:.1f}%",
+            "Concurrent Tasks",
+            str(status_counts.get(CrawlStatus.IN_PROGRESS.name, 0))
+        )
+        
+        table.add_row(
+            "In Progress", 
+            str(status_counts.get(CrawlStatus.IN_PROGRESS.name, 0)),
+            f"{status_counts.get(CrawlStatus.IN_PROGRESS.name, 0) / total * 100:.1f}%",
+            "Queue Size",
+            str(queue_stats['total_queued'])
+        )
+        
+        table.add_row(
+            "Queued", 
+            str(status_counts.get(CrawlStatus.QUEUED.name, 0)),
+            f"{status_counts.get(CrawlStatus.QUEUED.name, 0) / total * 100:.1f}%",
+            "Max Wait Time",
+            f"{queue_stats['highest_wait_time']:.1f}s"
+        )
+        
+        # Requeued is a special case as it's not a status
+        requeued_count = summary.get('requeued_count', 0)
+        table.add_row(
+            "Requeued", 
+            str(requeued_count),
+            f"{summary.get('requeue_rate', 0):.1f}%",
+            "Avg Wait Time",
+            f"{queue_stats['avg_wait_time']:.1f}s"
+        )
+        
+        # Add empty row for spacing
+        table.add_row(
+            "", 
+            "",
+            "",
+            "Requeue Rate",
+            f"{summary.get('requeue_rate', 0):.1f}%"
+        )
+        
+        return Panel(table, title="Pipeline Status", border_style="green")
+    
+    def _create_task_details_panel(self) -> Panel:
+        """Create the task details panel."""
+        # Create a table for task details
+        table = Table(show_header=True, expand=True)
+        table.add_column("Task ID", style="cyan", no_wrap=True, width=10)
+        table.add_column("URL", style="blue", ratio=3)
+        table.add_column("Status", style="green", width=15)
+        table.add_column("Memory", justify="right", width=8)
+        table.add_column("Peak", justify="right", width=8)
+        table.add_column("Duration", justify="right", width=10)
+        
+        # Get all task stats
+        task_stats = self.monitor.get_all_task_stats()
+        
+        # Add summary row
+        active_tasks = sum(1 for stats in task_stats.values() 
+                          if stats['status'] == CrawlStatus.IN_PROGRESS.name)
+        
+        total_memory = sum(stats['memory_usage'] for stats in task_stats.values())
+        total_peak = sum(stats['peak_memory'] for stats in task_stats.values())
+        
+        # Summary row with separators
+        table.add_row(
+            "SUMMARY", 
+            f"Total: {len(task_stats)}", 
+            f"Active: {active_tasks}",
+            f"{total_memory:.1f}",
+            f"{total_peak:.1f}",
+            "N/A"
+        )
+        
+        # Add a separator
+        table.add_row("—" * 10, "—" * 20, "—" * 10, "—" * 8, "—" * 8, "—" * 10)
+        
+        # Status icons
+        status_icons = {
+            CrawlStatus.QUEUED.name: "⏳",
+            CrawlStatus.IN_PROGRESS.name: "🔄",
+            CrawlStatus.COMPLETED.name: "✅",
+            CrawlStatus.FAILED.name: "❌"
+        }
+        
+        # Calculate how many rows we can display based on available space
+        # We can display more rows now that we have a dedicated panel
+        display_count = min(len(task_stats), 20)  # Display up to 20 tasks
+        
+        # Add rows for each task
+        for task_id, stats in sorted(
+            list(task_stats.items())[:display_count],
+            # Sort: 1. IN_PROGRESS first, 2. QUEUED, 3. COMPLETED/FAILED by recency
+            key=lambda x: (
+                0 if x[1]['status'] == CrawlStatus.IN_PROGRESS.name else 
+                1 if x[1]['status'] == CrawlStatus.QUEUED.name else 
+                2,
+                -1 * (x[1].get('end_time', 0) or 0)  # Most recent first
+            )
+        ):
+            # Truncate task_id and URL for display
+            short_id = task_id[:8]
+            url = stats['url']
+            if len(url) > 50:  # Allow longer URLs in the dedicated panel
+                url = url[:47] + "..."
+                
+            # Format status with icon
+            status = f"{status_icons.get(stats['status'], '?')} {stats['status']}"
+            
+            # Add row
+            table.add_row(
+                short_id,
+                url,
+                status,
+                f"{stats['memory_usage']:.1f}",
+                f"{stats['peak_memory']:.1f}",
+                stats['duration'] if 'duration' in stats else "0:00"
+            )
+        
+        return Panel(table, title="Task Details", border_style="yellow")
+    
+    def _create_footer(self) -> Panel:
+        """Create the footer panel."""
+        from rich.columns import Columns
+        from rich.align import Align
+        
+        memory_status = self.monitor.get_memory_status()
+        memory_icon = "🟢"  # Default NORMAL
+        if memory_status == "PRESSURE":
+            memory_icon = "🟠"
+        elif memory_status == "CRITICAL":
+            memory_icon = "🔴"
+        
+        # Left section - memory status
+        left_text = Text()
+        left_text.append("Memory Status: ", style="bold")
+        status_style = "green" if memory_status == "NORMAL" else "yellow" if memory_status == "PRESSURE" else "red bold"
+        left_text.append(f"{memory_icon} {memory_status}", style=status_style)
+        
+        # Center section - copyright
+        center_text = Text("© Crawl4AI 2025 | Made by UnclecCode", style="cyan italic")
+        
+        # Right section - quit instruction
+        right_text = Text()
+        right_text.append("Press ", style="bold")
+        right_text.append("q", style="white on blue")
+        right_text.append(" to quit", style="bold")
+        
+        # Create columns with the three sections
+        footer_content = Columns(
+            [
+                Align.left(left_text),
+                Align.center(center_text),
+                Align.right(right_text)
+            ],
+            expand=True
+        )
+        
+        # Create a more visible footer panel
+        return Panel(
+            footer_content,
+            border_style="white",
+            padding=(0, 1)  # Add padding for better visibility
+        )
+
+
+class CrawlerMonitor:
+    """
+    Comprehensive monitoring and visualization system for tracking web crawler operations in real-time.
+    Provides a terminal-based dashboard that displays task statuses, memory usage, queue statistics,
+    and performance metrics.
+    """
+    
+    def __init__(
+        self,
+        urls_total: int = 0,
+        refresh_rate: float = 1.0,
+        enable_ui: bool = True,
+        max_width: int = 120
+    ):
+        """
+        Initialize the CrawlerMonitor.
+        
+        Args:
+            urls_total: Total number of URLs to be crawled
+            refresh_rate: How often to refresh the UI (in seconds)
+            enable_ui: Whether to display the terminal UI
+            max_width: Maximum width of the UI in characters
+        """
+        # Core monitoring attributes
+        self.stats = {}  # Task ID -> stats dict
+        self.memory_status = "NORMAL"
+        self.start_time = None
+        self.end_time = None
+        self.is_running = False
+        self.queue_stats = {
+            "total_queued": 0,
+            "highest_wait_time": 0.0,
+            "avg_wait_time": 0.0
+        }
+        self.urls_total = urls_total
+        self.urls_completed = 0
+        self.peak_memory_percent = 0.0
+        self.peak_memory_time = 0.0
+        
+        # Status counts
+        self.status_counts = {
+            CrawlStatus.QUEUED.name: 0,
+            CrawlStatus.IN_PROGRESS.name: 0,
+            CrawlStatus.COMPLETED.name: 0,
+            CrawlStatus.FAILED.name: 0
+        }
+        
+        # Requeue tracking
+        self.requeued_count = 0
+        
+        # Thread-safety
+        self._lock = threading.RLock()
+        
+        # Terminal UI
+        self.enable_ui = enable_ui
+        self.terminal_ui = TerminalUI(
+            refresh_rate=refresh_rate, 
+            max_width=max_width
+        ) if enable_ui else None
+    
+    def start(self):
+        """
+        Start the monitoring session.
+        
+        - Initializes the start_time
+        - Sets is_running to True
+        - Starts the terminal UI if enabled
+        """
+        with self._lock:
+            self.start_time = time.time()
+            self.is_running = True
+            
+            # Start the terminal UI
+            if self.enable_ui and self.terminal_ui:
+                self.terminal_ui.start(self)
+    
+    def stop(self):
+        """
+        Stop the monitoring session.
+        
+        - Records end_time
+        - Sets is_running to False
+        - Stops the terminal UI
+        - Generates final summary statistics
+        """
+        with self._lock:
+            self.end_time = time.time()
+            self.is_running = False
+            
+            # Stop the terminal UI
+            if self.enable_ui and self.terminal_ui:
+                self.terminal_ui.stop()
+    
+    def add_task(self, task_id: str, url: str):
+        """
+        Register a new task with the monitor.
+        
+        Args:
+            task_id: Unique identifier for the task
+            url: URL being crawled
+            
+        The task is initialized with:
+            - status: QUEUED
+            - url: The URL to crawl
+            - enqueue_time: Current time
+            - memory_usage: 0
+            - peak_memory: 0
+            - wait_time: 0
+            - retry_count: 0
+        """
+        with self._lock:
+            self.stats[task_id] = {
+                "task_id": task_id,
+                "url": url,
+                "status": CrawlStatus.QUEUED.name,
+                "enqueue_time": time.time(),
+                "start_time": None,
+                "end_time": None,
+                "memory_usage": 0.0,
+                "peak_memory": 0.0,
+                "error_message": "",
+                "wait_time": 0.0,
+                "retry_count": 0,
+                "duration": "0:00",
+                "counted_requeue": False
+            }
+            
+            # Update status counts
+            self.status_counts[CrawlStatus.QUEUED.name] += 1
+    
+    def update_task(
+        self, 
+        task_id: str, 
+        status: Optional[CrawlStatus] = None,
+        start_time: Optional[float] = None,
+        end_time: Optional[float] = None,
+        memory_usage: Optional[float] = None,
+        peak_memory: Optional[float] = None,
+        error_message: Optional[str] = None,
+        retry_count: Optional[int] = None,
+        wait_time: Optional[float] = None
+    ):
+        """
+        Update statistics for a specific task.
+        
+        Args:
+            task_id: Unique identifier for the task
+            status: New status (QUEUED, IN_PROGRESS, COMPLETED, FAILED)
+            start_time: When task execution started
+            end_time: When task execution ended
+            memory_usage: Current memory usage in MB
+            peak_memory: Maximum memory usage in MB
+            error_message: Error description if failed
+            retry_count: Number of retry attempts
+            wait_time: Time spent in queue
+            
+        Updates task statistics and updates status counts.
+        If status changes, decrements old status count and 
+        increments new status count.
+        """
+        with self._lock:
+            # Check if task exists
+            if task_id not in self.stats:
+                return
+            
+            task_stats = self.stats[task_id]
+            
+            # Update status counts if status is changing
+            old_status = task_stats["status"]
+            if status and status.name != old_status:
+                self.status_counts[old_status] -= 1
+                self.status_counts[status.name] += 1
+                
+                # Track completion
+                if status == CrawlStatus.COMPLETED:
+                    self.urls_completed += 1
+                
+                # Track requeues
+                if old_status in [CrawlStatus.COMPLETED.name, CrawlStatus.FAILED.name] and not task_stats.get("counted_requeue", False):
+                    self.requeued_count += 1
+                    task_stats["counted_requeue"] = True
+            
+            # Update task statistics
+            if status:
+                task_stats["status"] = status.name
+            if start_time is not None:
+                task_stats["start_time"] = start_time
+            if end_time is not None:
+                task_stats["end_time"] = end_time
+            if memory_usage is not None:
+                task_stats["memory_usage"] = memory_usage
+                
+                # Update peak memory if necessary
+                current_percent = (memory_usage / psutil.virtual_memory().total) * 100
+                if current_percent > self.peak_memory_percent:
+                    self.peak_memory_percent = current_percent
+                    self.peak_memory_time = time.time()
+                
+            if peak_memory is not None:
+                task_stats["peak_memory"] = peak_memory
+            if error_message is not None:
+                task_stats["error_message"] = error_message
+            if retry_count is not None:
+                task_stats["retry_count"] = retry_count
+            if wait_time is not None:
+                task_stats["wait_time"] = wait_time
+            
+            # Calculate duration
+            if task_stats["start_time"]:
+                end = task_stats["end_time"] or time.time()
+                duration = end - task_stats["start_time"]
+                task_stats["duration"] = self._format_time(duration)
+    
+    def update_memory_status(self, status: str):
+        """
+        Update the current memory status.
+        
+        Args:
+            status: Memory status (NORMAL, PRESSURE, CRITICAL, or custom)
+            
+        Also updates the UI to reflect the new status.
+        """
+        with self._lock:
+            self.memory_status = status
+    
+    def update_queue_statistics(
+        self,
+        total_queued: int,
+        highest_wait_time: float,
+        avg_wait_time: float
+    ):
+        """
+        Update statistics related to the task queue.
+        
+        Args:
+            total_queued: Number of tasks currently in queue
+            highest_wait_time: Longest wait time of any queued task
+            avg_wait_time: Average wait time across all queued tasks
+        """
+        with self._lock:
+            self.queue_stats = {
+                "total_queued": total_queued,
+                "highest_wait_time": highest_wait_time,
+                "avg_wait_time": avg_wait_time
+            }
+    
+    def get_task_stats(self, task_id: str) -> Dict:
+        """
+        Get statistics for a specific task.
+        
+        Args:
+            task_id: Unique identifier for the task
+            
+        Returns:
+            Dictionary containing all task statistics
+        """
+        with self._lock:
+            return self.stats.get(task_id, {}).copy()
+    
+    def get_all_task_stats(self) -> Dict[str, Dict]:
+        """
+        Get statistics for all tasks.
+        
+        Returns:
+            Dictionary mapping task_ids to their statistics
+        """
+        with self._lock:
+            return self.stats.copy()
+    
+    def get_memory_status(self) -> str:
+        """
+        Get the current memory status.
+        
+        Returns:
+            Current memory status string
+        """
+        with self._lock:
+            return self.memory_status
+    
+    def get_queue_stats(self) -> Dict:
+        """
+        Get current queue statistics.
+        
+        Returns:
+            Dictionary with queue statistics including:
+            - total_queued: Number of tasks in queue
+            - highest_wait_time: Longest wait time
+            - avg_wait_time: Average wait time
+        """
+        with self._lock:
+            return self.queue_stats.copy()
+    
+    def get_summary(self) -> Dict:
+        """
+        Get a summary of all crawler statistics.
+        
+        Returns:
+            Dictionary containing:
+            - runtime: Total runtime in seconds
+            - urls_total: Total URLs to process
+            - urls_completed: Number of completed URLs
+            - completion_percentage: Percentage complete
+            - status_counts: Count of tasks in each status
+            - memory_status: Current memory status
+            - peak_memory_percent: Highest memory usage
+            - peak_memory_time: When peak memory occurred
+            - avg_task_duration: Average task processing time
+            - estimated_completion_time: Projected finish time
+            - requeue_rate: Percentage of tasks requeued
+        """
+        with self._lock:
+            # Calculate runtime
+            current_time = time.time()
+            runtime = current_time - (self.start_time or current_time)
+            
+            # Calculate completion percentage
+            completion_percentage = 0
+            if self.urls_total > 0:
+                completion_percentage = (self.urls_completed / self.urls_total) * 100
+            
+            # Calculate average task duration for completed tasks
+            completed_tasks = [
+                task for task in self.stats.values() 
+                if task["status"] == CrawlStatus.COMPLETED.name and task.get("start_time") and task.get("end_time")
+            ]
+            
+            avg_task_duration = 0
+            if completed_tasks:
+                total_duration = sum(task["end_time"] - task["start_time"] for task in completed_tasks)
+                avg_task_duration = total_duration / len(completed_tasks)
+            
+            # Calculate requeue rate
+            requeue_rate = 0
+            if len(self.stats) > 0:
+                requeue_rate = (self.requeued_count / len(self.stats)) * 100
+            
+            # Calculate estimated completion time
+            estimated_completion_time = "N/A"
+            if avg_task_duration > 0 and self.urls_total > 0 and self.urls_completed > 0:
+                remaining_tasks = self.urls_total - self.urls_completed
+                estimated_seconds = remaining_tasks * avg_task_duration
+                estimated_completion_time = self._format_time(estimated_seconds)
+            
+            return {
+                "runtime": runtime,
+                "urls_total": self.urls_total,
+                "urls_completed": self.urls_completed,
+                "completion_percentage": completion_percentage,
+                "status_counts": self.status_counts.copy(),
+                "memory_status": self.memory_status,
+                "peak_memory_percent": self.peak_memory_percent,
+                "peak_memory_time": self.peak_memory_time,
+                "avg_task_duration": avg_task_duration,
+                "estimated_completion_time": estimated_completion_time,
+                "requeue_rate": requeue_rate,
+                "requeued_count": self.requeued_count
+            }
+    
+    def render(self):
+        """
+        Render the terminal UI.
+        
+        This is the main UI rendering loop that:
+        1. Updates all statistics
+        2. Formats the display
+        3. Renders the ASCII interface
+        4. Handles keyboard input
+        
+        Note: The actual rendering is handled by the TerminalUI class
+        which uses the rich library's Live display.
+        """
+        if self.enable_ui and self.terminal_ui:
+            # Force an update of the UI
+            if hasattr(self.terminal_ui, '_update_display'):
+                self.terminal_ui._update_display()
+    
+    def _format_time(self, seconds: float) -> str:
+        """
+        Format time in hours:minutes:seconds.
+        
+        Args:
+            seconds: Time in seconds
+            
+        Returns:
+            Formatted time string (e.g., "1:23:45")
+        """
+        delta = timedelta(seconds=int(seconds))
+        hours, remainder = divmod(delta.seconds, 3600)
+        minutes, seconds = divmod(remainder, 60)
+        
+        if hours > 0:
+            return f"{hours}:{minutes:02}:{seconds:02}"
+        else:
+            return f"{minutes}:{seconds:02}"
+    
+    def _calculate_estimated_completion(self) -> str:
+        """
+        Calculate estimated completion time based on current progress.
+        
+        Returns:
+            Formatted time string
+        """
+        summary = self.get_summary()
+        return summary.get("estimated_completion_time", "N/A")
+
+
+# Example code for testing
+if __name__ == "__main__":
+    # Initialize the monitor
+    monitor = CrawlerMonitor(urls_total=100)
+    
+    # Start monitoring
+    monitor.start()
+    
+    try:
+        # Simulate some tasks
+        for i in range(20):
+            task_id = str(uuid.uuid4())
+            url = f"https://example.com/page{i}"
+            monitor.add_task(task_id, url)
+            
+            # Simulate 20% of tasks are already running
+            if i < 4:
+                monitor.update_task(
+                    task_id=task_id,
+                    status=CrawlStatus.IN_PROGRESS,
+                    start_time=time.time() - 30,  # Started 30 seconds ago
+                    memory_usage=10.5
+                )
+                
+            # Simulate 10% of tasks are completed
+            if i >= 4 and i < 6:
+                start_time = time.time() - 60
+                end_time = time.time() - 15
+                monitor.update_task(
+                    task_id=task_id,
+                    status=CrawlStatus.IN_PROGRESS,
+                    start_time=start_time,
+                    memory_usage=8.2
+                )
+                monitor.update_task(
+                    task_id=task_id,
+                    status=CrawlStatus.COMPLETED,
+                    end_time=end_time,
+                    memory_usage=0,
+                    peak_memory=15.7
+                )
+                
+            # Simulate 5% of tasks fail
+            if i >= 6 and i < 7:
+                start_time = time.time() - 45
+                end_time = time.time() - 20
+                monitor.update_task(
+                    task_id=task_id,
+                    status=CrawlStatus.IN_PROGRESS,
+                    start_time=start_time,
+                    memory_usage=12.3
+                )
+                monitor.update_task(
+                    task_id=task_id,
+                    status=CrawlStatus.FAILED,
+                    end_time=end_time,
+                    memory_usage=0,
+                    peak_memory=18.2,
+                    error_message="Connection timeout"
+                )
+        
+        # Simulate memory pressure
+        monitor.update_memory_status("PRESSURE")
+        
+        # Simulate queue statistics
+        monitor.update_queue_statistics(
+            total_queued=16,  # 20 - 4 (in progress)
+            highest_wait_time=120.5,
+            avg_wait_time=60.2
+        )
+        
+        # Keep the monitor running for a demonstration
+        print("Crawler Monitor is running. Press 'q' to exit.")
+        while monitor.is_running:
+            time.sleep(0.1)
+            
+    except KeyboardInterrupt:
+        print("\nExiting crawler monitor...")
+    finally:
+        # Stop the monitor
+        monitor.stop()
+        print("Crawler monitor exited successfully.")
--- a/crawl4ai/models.py
+++ b/crawl4ai/models.py
@@ -28,6 +28,12 @@ class CrawlerTaskResult:
    start_time: Union[datetime, float]
    end_time: Union[datetime, float]
    error_message: str = ""
+    retry_count: int = 0
+    wait_time: float = 0.0
+    
+    @property
+    def success(self) -> bool:
+        return self.result.success


 class CrawlStatus(Enum):
@@ -67,6 +73,9 @@ class CrawlStats:
    memory_usage: float = 0.0
    peak_memory: float = 0.0
    error_message: str = ""
+    wait_time: float = 0.0
+    retry_count: int = 0
+    counted_requeue: bool = False

    @property
    def duration(self) -> str:
@@ -87,6 +96,7 @@ class CrawlStats:
        duration = end - start
        return str(timedelta(seconds=int(duration.total_seconds())))

+
 class DisplayMode(Enum):
    DETAILED = "DETAILED"
    AGGREGATED = "AGGREGATED"