docs: update direct passing of content_filter to CrawlerRunConfig and instead pass it via MarkdownGenerator. Ref: #603

2025-05-07 15:20:36 +05:30
parent 39e3b792a1
commit 2b17f234f8
2 changed files with 14 additions and 6 deletions
--- a/deploy/docker/c4ai-doc-context.md
+++ b/deploy/docker/c4ai-doc-context.md
@@ -403,7 +403,7 @@ async def main():
    md_generator = DefaultMarkdownGenerator(
    content_filter=filter,
-    options={"ignore_links": True}
+    options={"ignore_links": True})
    # 4) Crawler run config: skip cache, use extraction
    run_conf = CrawlerRunConfig(
@@ -4152,7 +4152,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:
 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter
 async def main():
@@ -4175,8 +4175,13 @@ async def main():
        verbose=True
    )
    md_generator = DefaultMarkdownGenerator(
        content_filter=filter,
        options={"ignore_links": True}
    )
    config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator
    )
    async with AsyncWebCrawler() as crawler:
--- a/docs/md_v2/core/markdown-generation.md
+++ b/docs/md_v2/core/markdown-generation.md
@@ -233,7 +233,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:
 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter
 async def main():
@@ -255,9 +255,12 @@ async def main():
        chunk_token_threshold=4096,  # Adjust based on your needs
        verbose=True
    )
-
+    md_generator = DefaultMarkdownGenerator(
        content_filter=filter,
        options={"ignore_links": True}
    )
    config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator,
    )
    async with AsyncWebCrawler() as crawler: