diff --git a/docs/md_v2/core/fit-markdown.md b/docs/md_v2/core/fit-markdown.md index 3c6d3e02..1684a779 100644 --- a/docs/md_v2/core/fit-markdown.md +++ b/docs/md_v2/core/fit-markdown.md @@ -137,7 +137,7 @@ if __name__ == "__main__": - Higher → fewer chunks but more relevant. - Lower → more inclusive. -> In more advanced scenarios, you might see parameters like `use_stemming`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted. +> In more advanced scenarios, you might see parameters like `language`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted. --- @@ -242,4 +242,4 @@ class MyCustomFilter(RelevantContentFilter): With these tools, you can **zero in** on the text that truly matters, ignoring spammy or boilerplate content, and produce a concise, relevant “fit markdown” for your AI or data pipelines. Happy pruning and searching! -- Last Updated: 2025-01-01 \ No newline at end of file +- Last Updated: 2025-01-01 diff --git a/docs/md_v2/core/markdown-generation.md b/docs/md_v2/core/markdown-generation.md index e6f5e12a..d4cad79b 100644 --- a/docs/md_v2/core/markdown-generation.md +++ b/docs/md_v2/core/markdown-generation.md @@ -187,7 +187,7 @@ from crawl4ai import CrawlerRunConfig bm25_filter = BM25ContentFilter( user_query="machine learning", bm25_threshold=1.2, - use_stemming=True + language="english" ) md_generator = DefaultMarkdownGenerator(