Merge branch '2025-MAY-2' into next-MAY

This commit is contained in:
ntohidi
2025-07-08 11:46:13 +02:00
28 changed files with 448 additions and 154 deletions

View File

@@ -273,7 +273,7 @@ In a typical scenario, you define **one** `BrowserConfig` for your crawler sessi
```python
import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig, LLMContentFilter, DefaultMarkdownGenerator
from crawl4ai import JsonCssExtractionStrategy
async def main():
@@ -298,7 +298,7 @@ async def main():
# 3) Example LLM content filtering
gemini_config = LLMConfig(
provider="gemini/gemini-1.5-pro"
provider="gemini/gemini-1.5-pro",
api_token = "env:GEMINI_API_TOKEN"
)
@@ -322,8 +322,9 @@ async def main():
)
md_generator = DefaultMarkdownGenerator(
content_filter=filter,
options={"ignore_links": True}
content_filter=filter,
options={"ignore_links": True}
)
# 4) Crawler run config: skip cache, use extraction
run_conf = CrawlerRunConfig(