This commit is contained in:
UncleCode
2024-12-12 19:58:01 +08:00
4 changed files with 12 additions and 5 deletions

View File

@@ -1,4 +1,4 @@
# 🔥🕷️ Crawl4AI: Crawl Smarter, Faster, Freely. For AI. # Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
<a href="https://trendshift.io/repositories/11716" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11716" alt="unclecode%2Fcrawl4ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> <a href="https://trendshift.io/repositories/11716" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11716" alt="unclecode%2Fcrawl4ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

View File

@@ -132,6 +132,11 @@ class AsyncWebCrawler:
# if self.verbose: # if self.verbose:
# print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}") # print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}")
@asynccontextmanager
async def nullcontext(self):
"""异步空上下文管理器"""
yield
async def arun( async def arun(
self, self,
url: str, url: str,

View File

@@ -92,8 +92,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]: def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]:
if self.verbose:
# print("[LOG] Extracting blocks from URL:", url) # print("[LOG] Extracting blocks from URL:", url)
print(f"[LOG] Call LLM for {url} - block index: {ix}") print(f"[LOG] Call LLM for {url} - block index: {ix}")
variable_values = { variable_values = {
"URL": url, "URL": url,
"HTML": escape_json_string(sanitize_html(html)), "HTML": escape_json_string(sanitize_html(html)),

View File

@@ -8,7 +8,7 @@ First, let's import the necessary modules and create an instance of `AsyncWebCra
```python ```python
import asyncio import asyncio
from crawl4ai import AsyncWebCrawler, CasheMode from crawl4ai import AsyncWebCrawler, CacheMode
async def main(): async def main():
async with AsyncWebCrawler(verbose=True) as crawler: async with AsyncWebCrawler(verbose=True) as crawler: