From e3488da1945e8c17ee3dc7e501be7187d7f6beae Mon Sep 17 00:00:00 2001
From: Olavo Henrique Marques Peixoto
<98776769+olavohenrique03@users.noreply.github.com>
Date: Mon, 9 Dec 2024 03:34:52 -0300
Subject: [PATCH 1/4] fixing Readmen tap (#313)
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index dede4a03..7407484e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# 🔥🕷️ Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
+# Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
From ba3e8088027e67ee8956ff0c54f4ffcc0438ae87 Mon Sep 17 00:00:00 2001
From: lu4nx
Date: Mon, 9 Dec 2024 17:19:26 +0800
Subject: [PATCH 2/4] fix: The extract method logs output only when
self.verbose is set to True. (#314)
Co-authored-by: lu4nx
---
crawl4ai/extraction_strategy.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
index b79e0c43..b7eabf74 100644
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -92,8 +92,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]:
- # print("[LOG] Extracting blocks from URL:", url)
- print(f"[LOG] Call LLM for {url} - block index: {ix}")
+ if self.verbose:
+ # print("[LOG] Extracting blocks from URL:", url)
+ print(f"[LOG] Call LLM for {url} - block index: {ix}")
+
variable_values = {
"URL": url,
"HTML": escape_json_string(sanitize_html(html)),
@@ -868,4 +870,4 @@ class JsonXPATHExtractionStrategy(ExtractionStrategy):
def run(self, url: str, sections: List[str], *q, **kwargs) -> List[Dict[str, Any]]:
combined_html = self.DEL.join(sections)
- return self.extract(url, combined_html, **kwargs)
\ No newline at end of file
+ return self.extract(url, combined_html, **kwargs)
From ded554d3345ca00c038274fc38ff43b28b45cdd8 Mon Sep 17 00:00:00 2001
From: Mohammed
Date: Mon, 9 Dec 2024 07:17:43 -0500
Subject: [PATCH 3/4] Fixed typo (#324)
---
docs/md_v2/basic/quickstart.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/md_v2/basic/quickstart.md b/docs/md_v2/basic/quickstart.md
index 95b8a397..c18cd7d1 100644
--- a/docs/md_v2/basic/quickstart.md
+++ b/docs/md_v2/basic/quickstart.md
@@ -8,7 +8,7 @@ First, let's import the necessary modules and create an instance of `AsyncWebCra
```python
import asyncio
-from crawl4ai import AsyncWebCrawler, CasheMode
+from crawl4ai import AsyncWebCrawler, CacheMode
async def main():
async with AsyncWebCrawler(verbose=True) as crawler:
From 759164831daf69106dc39c7b999601e3bb607132 Mon Sep 17 00:00:00 2001
From: lvzhengri <95766782+lvzhengri@users.noreply.github.com>
Date: Tue, 10 Dec 2024 20:56:52 +0800
Subject: [PATCH 4/4] Update async_webcrawler.py (#337)
add @asynccontextmanager
---
crawl4ai/async_webcrawler.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py
index 2c17602d..9fe4fcc4 100644
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -132,6 +132,11 @@ class AsyncWebCrawler:
# if self.verbose:
# print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}")
+ @asynccontextmanager
+ async def nullcontext(self):
+ """异步空上下文管理器"""
+ yield
+
async def arun(
self,
url: str,