Merge branch '2025-MAY-2' into next-MAY
This commit is contained in:
@@ -273,7 +273,7 @@ In a typical scenario, you define **one** `BrowserConfig` for your crawler sessi
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig, LLMContentFilter, DefaultMarkdownGenerator
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
|
||||
async def main():
|
||||
@@ -298,7 +298,7 @@ async def main():
|
||||
# 3) Example LLM content filtering
|
||||
|
||||
gemini_config = LLMConfig(
|
||||
provider="gemini/gemini-1.5-pro"
|
||||
provider="gemini/gemini-1.5-pro",
|
||||
api_token = "env:GEMINI_API_TOKEN"
|
||||
)
|
||||
|
||||
@@ -322,8 +322,9 @@ async def main():
|
||||
)
|
||||
|
||||
md_generator = DefaultMarkdownGenerator(
|
||||
content_filter=filter,
|
||||
options={"ignore_links": True}
|
||||
content_filter=filter,
|
||||
options={"ignore_links": True}
|
||||
)
|
||||
|
||||
# 4) Crawler run config: skip cache, use extraction
|
||||
run_conf = CrawlerRunConfig(
|
||||
|
||||
Reference in New Issue
Block a user