Merge branch 'main' of https://github.com/unclecode/crawl4ai
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
# 🔥🕷️ Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
|
# Crawl4AI: Crawl Smarter, Faster, Freely. For AI.
|
||||||
|
|
||||||
<a href="https://trendshift.io/repositories/11716" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11716" alt="unclecode%2Fcrawl4ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
<a href="https://trendshift.io/repositories/11716" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11716" alt="unclecode%2Fcrawl4ai | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
|
|
||||||
|
|||||||
@@ -132,6 +132,11 @@ class AsyncWebCrawler:
|
|||||||
# if self.verbose:
|
# if self.verbose:
|
||||||
# print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}")
|
# print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}")
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def nullcontext(self):
|
||||||
|
"""异步空上下文管理器"""
|
||||||
|
yield
|
||||||
|
|
||||||
async def arun(
|
async def arun(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
|
|||||||
@@ -92,8 +92,10 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
|||||||
|
|
||||||
|
|
||||||
def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]:
|
def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]:
|
||||||
# print("[LOG] Extracting blocks from URL:", url)
|
if self.verbose:
|
||||||
print(f"[LOG] Call LLM for {url} - block index: {ix}")
|
# print("[LOG] Extracting blocks from URL:", url)
|
||||||
|
print(f"[LOG] Call LLM for {url} - block index: {ix}")
|
||||||
|
|
||||||
variable_values = {
|
variable_values = {
|
||||||
"URL": url,
|
"URL": url,
|
||||||
"HTML": escape_json_string(sanitize_html(html)),
|
"HTML": escape_json_string(sanitize_html(html)),
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ First, let's import the necessary modules and create an instance of `AsyncWebCra
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
import asyncio
|
import asyncio
|
||||||
from crawl4ai import AsyncWebCrawler, CasheMode
|
from crawl4ai import AsyncWebCrawler, CacheMode
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||||
|
|||||||
Reference in New Issue
Block a user