From e3488da1945e8c17ee3dc7e501be7187d7f6beae Mon Sep 17 00:00:00 2001 From: Olavo Henrique Marques Peixoto <98776769+olavohenrique03@users.noreply.github.com> Date: Mon, 9 Dec 2024 03:34:52 -0300 Subject: [PATCH 1/4] fixing Readmen tap (#313) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dede4a03..7407484e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 🔥🕷️ Crawl4AI: Crawl Smarter, Faster, Freely. For AI. +# Crawl4AI: Crawl Smarter, Faster, Freely. For AI. unclecode%2Fcrawl4ai | Trendshift From ba3e8088027e67ee8956ff0c54f4ffcc0438ae87 Mon Sep 17 00:00:00 2001 From: lu4nx Date: Mon, 9 Dec 2024 17:19:26 +0800 Subject: [PATCH 2/4] fix: The extract method logs output only when self.verbose is set to True. (#314) Co-authored-by: lu4nx --- crawl4ai/extraction_strategy.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index b79e0c43..b7eabf74 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -92,8 +92,10 @@ class LLMExtractionStrategy(ExtractionStrategy): def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]: - # print("[LOG] Extracting blocks from URL:", url) - print(f"[LOG] Call LLM for {url} - block index: {ix}") + if self.verbose: + # print("[LOG] Extracting blocks from URL:", url) + print(f"[LOG] Call LLM for {url} - block index: {ix}") + variable_values = { "URL": url, "HTML": escape_json_string(sanitize_html(html)), @@ -868,4 +870,4 @@ class JsonXPATHExtractionStrategy(ExtractionStrategy): def run(self, url: str, sections: List[str], *q, **kwargs) -> List[Dict[str, Any]]: combined_html = self.DEL.join(sections) - return self.extract(url, combined_html, **kwargs) \ No newline at end of file + return self.extract(url, combined_html, **kwargs) From ded554d3345ca00c038274fc38ff43b28b45cdd8 Mon Sep 17 00:00:00 2001 From: Mohammed Date: Mon, 9 Dec 2024 07:17:43 -0500 Subject: [PATCH 3/4] Fixed typo (#324) --- docs/md_v2/basic/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/md_v2/basic/quickstart.md b/docs/md_v2/basic/quickstart.md index 95b8a397..c18cd7d1 100644 --- a/docs/md_v2/basic/quickstart.md +++ b/docs/md_v2/basic/quickstart.md @@ -8,7 +8,7 @@ First, let's import the necessary modules and create an instance of `AsyncWebCra ```python import asyncio -from crawl4ai import AsyncWebCrawler, CasheMode +from crawl4ai import AsyncWebCrawler, CacheMode async def main(): async with AsyncWebCrawler(verbose=True) as crawler: From 759164831daf69106dc39c7b999601e3bb607132 Mon Sep 17 00:00:00 2001 From: lvzhengri <95766782+lvzhengri@users.noreply.github.com> Date: Tue, 10 Dec 2024 20:56:52 +0800 Subject: [PATCH 4/4] Update async_webcrawler.py (#337) add @asynccontextmanager --- crawl4ai/async_webcrawler.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 2c17602d..9fe4fcc4 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -132,6 +132,11 @@ class AsyncWebCrawler: # if self.verbose: # print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}") + @asynccontextmanager + async def nullcontext(self): + """异步空上下文管理器""" + yield + async def arun( self, url: str,