Update README for speed example

2024-06-24 23:06:12 +08:00
parent 1fffeeedd2
commit a0dff192ae
2 changed files with 11 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -72,14 +72,12 @@ print(f"Time taken: {end - start}")
 Let's take a look the calculated time for the above code snippet:

 ```bash
-[LOG] 🚀 Crawling done, success: True, time taken: 0.05835 seconds
-[LOG] 🔥 Extracting semantic blocks, Strategy: NoExtractionStrategy
-[LOG] 🚀 Extraction, time taken: 0.0588 seconds.
-Time taken: 4.29332
+[LOG] 🚀 Crawling done, success: True, time taken: 1.3623387813568115 seconds
+[LOG] 🚀 Content extracted, success: True, time taken: 0.05715131759643555 seconds
+[LOG] 🚀 Extraction, time taken: 0.05750393867492676 seconds.
+Time taken: 1.439958095550537
 ```
-
-It took around 4.29 seconds to crawl the page, extract the content, and return the result.
-
+Fetching the content from the page took 1.3623 seconds, and extracting the content took 0.0575 seconds. 🚀

 ### Extract Structured Data from Web Pages 📊

--- a/crawl4ai/web_crawler.py
+++ b/crawl4ai/web_crawler.py
@@ -47,7 +47,7 @@ class WebCrawler:
            extraction_strategy= NoExtractionStrategy(),
            bypass_cache=False,
            verbose = False,
-            warmup=True
+            # warmup=True
        )
        self.ready = True
        print("[LOG] 🌞 WebCrawler is ready to crawl")
@@ -160,7 +160,11 @@ class WebCrawler:
            if not cached or not html:
                if user_agent:
                    self.crawler_strategy.update_user_agent(user_agent)
+                t1 = time.time()
                html = self.crawler_strategy.crawl(url)
+                t2 = time.time()
+                if verbose:
+                    print(f"[LOG] 🚀 Crawling done for {url}, success: {bool(html)}, time taken: {t2 - t1} seconds")
                if screenshot:
                    screenshot_data = self.crawler_strategy.take_screenshot()

@@ -190,7 +194,7 @@ class WebCrawler:
                t1 = time.time()
                result = get_content_of_website_optimized(url, html, word_count_threshold, css_selector=css_selector, only_text=kwargs.get("only_text", False))
                if verbose:
-                    print(f"[LOG] 🚀 Crawling done for {url}, success: True, time taken: {time.time() - t1} seconds")
+                    print(f"[LOG] 🚀 Content extracted for {url}, success: True, time taken: {time.time() - t1} seconds")
                
                if result is None:
                    raise ValueError(f"Failed to extract content from the website: {url}")