Merge branch 'main' of https://github.com/unclecode/crawl4ai

2024-12-12 19:58:01 +08:00
parent 3d69715dba 759164831d
commit 20d6f5fdf4
4 changed files with 12 additions and 5 deletions
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# 🔥🕷️ Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
+# Crawl4AI: Crawl Smarter, Faster, Freely. For AI.

 <a href="https://trendshift.io/repositories/11716" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11716" alt="unclecode%2Fcrawl4ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -132,6 +132,11 @@ class AsyncWebCrawler:
        # if self.verbose:
        #     print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}")

+    @asynccontextmanager
+    async def nullcontext(self):
+        """异步空上下文管理器"""
+        yield
+    
    async def arun(
        self,
        url: str,
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -92,8 +92,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
        
            
    def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]:
-        # print("[LOG] Extracting blocks from URL:", url)
-        print(f"[LOG] Call LLM for {url} - block index: {ix}")
+        if self.verbose:
+            # print("[LOG] Extracting blocks from URL:", url)
+            print(f"[LOG] Call LLM for {url} - block index: {ix}")
+
        variable_values = {
            "URL": url,
            "HTML": escape_json_string(sanitize_html(html)),
@@ -868,4 +870,4 @@ class JsonXPATHExtractionStrategy(ExtractionStrategy):

    def run(self, url: str, sections: List[str], *q, **kwargs) -> List[Dict[str, Any]]:
        combined_html = self.DEL.join(sections)
-        return self.extract(url, combined_html, **kwargs)
+        return self.extract(url, combined_html, **kwargs)
--- a/docs/md_v2/basic/quickstart.md
+++ b/docs/md_v2/basic/quickstart.md
@@ -8,7 +8,7 @@ First, let's import the necessary modules and create an instance of `AsyncWebCra

 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler, CasheMode
+from crawl4ai import AsyncWebCrawler, CacheMode

 async def main():
    async with AsyncWebCrawler(verbose=True) as crawler: