feat:

1. Make active_crawls into a dict instead of set and remove jobs array. Effective lookup and storage of active crawls and crawl control. 2. Put a lock on active_crawls, so similtanious push and pop by coroutines doesn't cause a race condition 3. Move the depth check logic outside the child link for loop, as source_url doesn't change in the loop.
2025-01-28 12:39:45 +05:30
parent f34b4878cf
commit 85847ff13f
2 changed files with 33 additions and 42 deletions
--- a/docs/scraper/scraper_quickstart.py
+++ b/docs/scraper/scraper_quickstart.py
@@ -188,11 +188,11 @@ if __name__ == "__main__":
    import time

    # Run basic example
-    # start_time = time.perf_counter()
-    # print("Running basic scraper example...")
-    # asyncio.run(basic_scraper_example())
-    # end_time = time.perf_counter()
-    # print(f"Basic scraper example completed in {end_time - start_time:.2f} seconds")
+    start_time = time.perf_counter()
+    print("Running basic scraper example...")
+    asyncio.run(basic_scraper_example())
+    end_time = time.perf_counter()
+    print(f"Basic scraper example completed in {end_time - start_time:.2f} seconds")

    # # Run advanced example
    print("\nRunning advanced scraper example...")