Merge branch 'vr0.5.0.post8'

This commit is contained in:
UncleCode
2025-04-05 21:36:17 +08:00
85 changed files with 12556 additions and 710 deletions

View File

@@ -352,7 +352,10 @@ Example:
from crawl4ai import CrawlerRunConfig, PruningContentFilter
config = CrawlerRunConfig(
content_filter=PruningContentFilter(threshold=0.48)
markdown_generator=DefaultMarkdownGenerator(
content_filter=PruningContentFilter(threshold=0.48, threshold_type="fixed")
),
cache_mode= CacheMode.BYPASS
)
print(config.dump()) # Use this JSON in your API calls
```
@@ -551,7 +554,7 @@ async def test_stream_crawl(session, token: str):
"https://example.com/page3",
],
"browser_config": {"headless": True, "viewport": {"width": 1200}},
"crawler_config": {"stream": True, "cache_mode": "aggressive"}
"crawler_config": {"stream": True, "cache_mode": "bypass"}
}
# headers = {"Authorization": f"Bearer {token}"} # If JWT is enabled, more on this later

View File

@@ -2,6 +2,7 @@ import os
import json
import asyncio
from typing import List, Tuple
from functools import partial
import logging
from typing import Optional, AsyncGenerator
@@ -388,12 +389,13 @@ async def handle_crawl_request(
)
async with AsyncWebCrawler(config=browser_config) as crawler:
results = await crawler.arun_many(
urls=urls,
config=crawler_config,
dispatcher=dispatcher
)
results = []
func = getattr(crawler, "arun" if len(urls) == 1 else "arun_many")
partial_func = partial(func,
urls[0] if len(urls) == 1 else urls,
config=crawler_config,
dispatcher=dispatcher)
results = await partial_func()
return {
"success": True,
"results": [result.model_dump() for result in results]