refactor(core): reorganize project structure and remove legacy code

Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
2025-01-30 19:35:06 +08:00
parent 31938fb922
commit f81712eb91
23 changed files with 425 additions and 4 deletions
--- a/tests/20241401/test_crawlers.py
+++ b/tests/20241401/test_crawlers.py
@@ -0,0 +1,17 @@
+
+# example_usageexample_usageexample_usage# example_usage.py
+import asyncio
+from crawl4ai.crawlers import get_crawler
+
+async def main():
+    # Get the registered crawler
+    example_crawler = get_crawler("example_site.content")
+    
+    # Crawl example.com
+    result = await example_crawler(url="https://example.com")
+        
+    print(result)
+            
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/hub/test_simple.py
+++ b/tests/hub/test_simple.py
@@ -0,0 +1,30 @@
+# test.py
+from crawl4ai import CrawlerHub
+import json
+
+async def amazon_example():
+    if (crawler_cls := CrawlerHub.get("amazon_product")) :
+        crawler = crawler_cls()
+        print(f"Crawler version: {crawler_cls.meta['version']}")
+        print(f"Rate limits: {crawler_cls.meta.get('rate_limit', 'Unlimited')}")
+        print(await crawler.run("https://amazon.com/test"))
+    else:
+        print("Crawler not found!")
+
+async def google_example():
+    # Get crawler dynamically
+    crawler_cls = CrawlerHub.get("google_search")
+    crawler = crawler_cls()
+
+    # Text search
+    text_results = await crawler.run(query="apple inc", search_type="text",  schema_cache_path="/Users/unclecode/.crawl4ai")
+    print(json.loads(text_results))
+
+    # Image search
+    image_results = await crawler.run(query="apple inc", search_type="image")
+    print(image_results)
+
+if __name__ == "__main__":
+    import asyncio
+    # asyncio.run(amazon_example())
+    asyncio.run(google_example())