docs(api): add streaming mode documentation and examples

Add comprehensive documentation for the new streaming mode feature in arun_many(): - Update arun_many() API docs to reflect streaming return type - Add streaming examples in quickstart and multi-url guides - Document stream parameter in configuration classes - Add clone() helper method documentation for configs This change improves documentation for processing large numbers of URLs efficiently.
2025-01-19 18:21:34 +08:00
parent 91463e34f1
commit 8b6fe6a98f
5 changed files with 184 additions and 31 deletions
--- a/docs/md_v2/advanced/multi-url-crawling.md
+++ b/docs/md_v2/advanced/multi-url-crawling.md
@@ -93,44 +93,75 @@ dispatcher = SemaphoreDispatcher(

 ## 4. Usage Examples

-### 4.1 Simple Usage (Default MemoryAdaptiveDispatcher)
+### 4.1 Batch Processing (Default)

 ```python
-async with AsyncWebCrawler(config=browser_config) as crawler:
-    results = await crawler.arun_many(urls, config=run_config)
-```
-
-### 4.2 Memory Adaptive with Rate Limiting
-
-```python
-async def crawl_with_memory_adaptive(urls):
+async def crawl_batch():
    browser_config = BrowserConfig(headless=True, verbose=False)
-    run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        stream=False  # Default: get all results at once
+    )
    
    dispatcher = MemoryAdaptiveDispatcher(
        memory_threshold_percent=70.0,
+        check_interval=1.0,
        max_session_permit=10,
-        rate_limiter=RateLimiter(
-            base_delay=(1.0, 2.0),
-            max_delay=30.0,
-            max_retries=2
-        ),
        monitor=CrawlerMonitor(
-            max_visible_rows=15,
            display_mode=DisplayMode.DETAILED
        )
    )
-    
+
    async with AsyncWebCrawler(config=browser_config) as crawler:
+        # Get all results at once
        results = await crawler.arun_many(
-            urls,
+            urls=urls,
            config=run_config,
            dispatcher=dispatcher
        )
-        return results
+        
+        # Process all results after completion
+        for result in results:
+            if result.success:
+                await process_result(result)
+            else:
+                print(f"Failed to crawl {result.url}: {result.error_message}")
 ```

-### 4.3 Semaphore with Rate Limiting
+### 4.2 Streaming Mode
+
+```python
+async def crawl_streaming():
+    browser_config = BrowserConfig(headless=True, verbose=False)
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        stream=True  # Enable streaming mode
+    )
+    
+    dispatcher = MemoryAdaptiveDispatcher(
+        memory_threshold_percent=70.0,
+        check_interval=1.0,
+        max_session_permit=10,
+        monitor=CrawlerMonitor(
+            display_mode=DisplayMode.DETAILED
+        )
+    )
+
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        # Process results as they become available
+        async for result in await crawler.arun_many(
+            urls=urls,
+            config=run_config,
+            dispatcher=dispatcher
+        ):
+            if result.success:
+                # Process each result immediately
+                await process_result(result)
+            else:
+                print(f"Failed to crawl {result.url}: {result.error_message}")
+```
+
+### 4.3 Semaphore-based Crawling

 ```python
 async def crawl_with_semaphore(urls):