feat: Add screenshot functionality to crawl_urls

2024-06-07 15:33:15 +08:00
parent 8e73a482a2
commit 226a62a3c0
3 changed files with 37 additions and 4 deletions
--- a/docs/examples/quickstart.py
+++ b/docs/examples/quickstart.py
@@ -35,10 +35,20 @@ def cprint(message, press_any_key=False):

 def basic_usage(crawler):
    cprint("🛠️ [bold cyan]Basic Usage: Simply provide a URL and let Crawl4ai do the magic![/bold cyan]")
-    result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True)
+    result = crawler.run(url="https://www.nbcnews.com/business")
    cprint("[LOG] 📦 [bold yellow]Basic crawl result:[/bold yellow]")
    print_result(result)

+def screenshot_usage(crawler):
+    cprint("\n📸 [bold cyan]Let's take a screenshot of the page![/bold cyan]")
+    result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True)
+    cprint("[LOG] 📦 [bold yellow]Screenshot result:[/bold yellow]")
+    # Save the screenshot to a file
+    with open("screenshot.png", "wb") as f:
+        f.write(base64.b64decode(result.screenshot))
+    cprint("Screenshot saved to 'screenshot.png'!")
+    print_result(result)
+
 def understanding_parameters(crawler):
    cprint("\n🧠 [bold cyan]Understanding 'bypass_cache' and 'include_raw_html' parameters:[/bold cyan]")
    cprint("By default, Crawl4ai caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action.")
@@ -187,11 +197,11 @@ def main():

    crawler = create_crawler()

-    crawler.always_by_pass_cache = True
    basic_usage(crawler)
    understanding_parameters(crawler)
    
    crawler.always_by_pass_cache = True
+    screenshot_usage(crawler)
    add_chunking_strategy(crawler)
    add_extraction_strategy(crawler)
    add_llm_extraction_strategy(crawler)