feat: Add screenshot functionality to crawl_urls

This commit is contained in:
unclecode
2024-06-07 15:33:15 +08:00
parent 8e73a482a2
commit 226a62a3c0
3 changed files with 37 additions and 4 deletions

View File

@@ -35,10 +35,20 @@ def cprint(message, press_any_key=False):
def basic_usage(crawler):
cprint("🛠️ [bold cyan]Basic Usage: Simply provide a URL and let Crawl4ai do the magic![/bold cyan]")
result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True)
result = crawler.run(url="https://www.nbcnews.com/business")
cprint("[LOG] 📦 [bold yellow]Basic crawl result:[/bold yellow]")
print_result(result)
def screenshot_usage(crawler):
cprint("\n📸 [bold cyan]Let's take a screenshot of the page![/bold cyan]")
result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True)
cprint("[LOG] 📦 [bold yellow]Screenshot result:[/bold yellow]")
# Save the screenshot to a file
with open("screenshot.png", "wb") as f:
f.write(base64.b64decode(result.screenshot))
cprint("Screenshot saved to 'screenshot.png'!")
print_result(result)
def understanding_parameters(crawler):
cprint("\n🧠 [bold cyan]Understanding 'bypass_cache' and 'include_raw_html' parameters:[/bold cyan]")
cprint("By default, Crawl4ai caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action.")
@@ -187,11 +197,11 @@ def main():
crawler = create_crawler()
crawler.always_by_pass_cache = True
basic_usage(crawler)
understanding_parameters(crawler)
crawler.always_by_pass_cache = True
screenshot_usage(crawler)
add_chunking_strategy(crawler)
add_extraction_strategy(crawler)
add_llm_extraction_strategy(crawler)