From 5b4a586b2da086f623f9c7747974c7a68fa3115c Mon Sep 17 00:00:00 2001 From: UncleCode Date: Thu, 16 May 2024 22:28:24 +0800 Subject: [PATCH] Update web_crawler.py Set CosineExtraction as defaul strategy --- crawl4ai/web_crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crawl4ai/web_crawler.py b/crawl4ai/web_crawler.py index 4604279e..6aebcdb5 100644 --- a/crawl4ai/web_crawler.py +++ b/crawl4ai/web_crawler.py @@ -63,7 +63,7 @@ class WebCrawler: extract_blocks_flag: bool = True, word_count_threshold=MIN_WORD_THRESHOLD, use_cached_html: bool = False, - extraction_strategy: ExtractionStrategy = NoExtractionStrategy(), + extraction_strategy: ExtractionStrategy = CosineStrategy(), chunking_strategy: ChunkingStrategy = RegexChunking(), **kwargs, ) -> CrawlResult: @@ -82,7 +82,7 @@ class WebCrawler: self, url: str, word_count_threshold=MIN_WORD_THRESHOLD, - extraction_strategy: ExtractionStrategy = NoExtractionStrategy(), + extraction_strategy: ExtractionStrategy = CosineStrategy(), chunking_strategy: ChunkingStrategy = RegexChunking(), bypass_cache: bool = False, css_selector: str = None, @@ -184,7 +184,7 @@ class WebCrawler: extract_blocks_flag: bool = True, word_count_threshold=MIN_WORD_THRESHOLD, use_cached_html: bool = False, - extraction_strategy: ExtractionStrategy = NoExtractionStrategy(), + extraction_strategy: ExtractionStrategy = CosineStrategy(), chunking_strategy: ChunkingStrategy = RegexChunking(), **kwargs, ) -> List[CrawlResult]: