Merge branch '2025-APR-1' into 2025-MAY-2

2025-06-02 20:25:58 +02:00
parent b55e27d2ef a55c2b3f88
commit 773ed7b281
14 changed files with 173 additions and 39 deletions
--- a/docs/md_v2/core/browser-crawler-config.md
+++ b/docs/md_v2/core/browser-crawler-config.md
@@ -273,7 +273,7 @@ In a typical scenario, you define **one** `BrowserConfig` for your crawler sessi

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig, LLMContentFilter, DefaultMarkdownGenerator
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy

 async def main():
@@ -298,7 +298,7 @@ async def main():
    # 3) Example LLM content filtering

    gemini_config = LLMConfig(
-        provider="gemini/gemini-1.5-pro" 
+        provider="gemini/gemini-1.5-pro", 
        api_token = "env:GEMINI_API_TOKEN"
    )

@@ -322,8 +322,9 @@ async def main():
    )

    md_generator = DefaultMarkdownGenerator(
-    content_filter=filter,
-    options={"ignore_links": True}
+        content_filter=filter,
+        options={"ignore_links": True}
+    )

    # 4) Crawler run config: skip cache, use extraction
    run_conf = CrawlerRunConfig(
--- a/docs/md_v2/core/cli.md
+++ b/docs/md_v2/core/cli.md
@@ -17,6 +17,9 @@
 - [Configuration Reference](#configuration-reference)
 - [Best Practices & Tips](#best-practices--tips)

+## Installation
+The Crawl4AI CLI will be installed automatically when you install the library.
+
 ## Basic Usage

 The Crawl4AI CLI (`crwl`) provides a simple interface to the Crawl4AI library:
--- a/docs/md_v2/core/markdown-generation.md
+++ b/docs/md_v2/core/markdown-generation.md
@@ -233,7 +233,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:

 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter

 async def main():
@@ -255,9 +255,12 @@ async def main():
        chunk_token_threshold=4096,  # Adjust based on your needs
        verbose=True
    )
-
+    md_generator = DefaultMarkdownGenerator(
+        content_filter=filter,
+        options={"ignore_links": True}
+    )
    config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator,
    )

    async with AsyncWebCrawler() as crawler: