diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 66b4c21b..8db69333 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -472,7 +472,9 @@ class AsyncWebCrawler: try: _url = url if not kwargs.get("is_raw_html", False) else "Raw HTML" t1 = time.perf_counter() - scrapping_strategy = WebScrapingStrategy() + scrapping_strategy = WebScrapingStrategy( + logger=self.logger, + ) # result = await scrapping_strategy.ascrap( result = scrapping_strategy.scrap( url, diff --git a/crawl4ai/content_filter_strategy.py b/crawl4ai/content_filter_strategy.py index ca3868bb..f05b92fa 100644 --- a/crawl4ai/content_filter_strategy.py +++ b/crawl4ai/content_filter_strategy.py @@ -468,7 +468,7 @@ class PruningContentFilter(RelevantContentFilter): 'link_text_len': link_text_len } - score = self._compute_composite_score(metrics) + score = self._compute_composite_score(metrics, text_len, tag_len, link_text_len) if self.threshold_type == 'fixed': should_remove = score < self.threshold