refactor(core): reorganize project structure and remove legacy code

Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
2025-01-30 19:35:06 +08:00
parent 31938fb922
commit f81712eb91
23 changed files with 425 additions and 4 deletions
--- a/crawl4ai/crawlers/amazon_product/crawler.py
+++ b/crawl4ai/crawlers/amazon_product/crawler.py
@@ -0,0 +1,20 @@
+from crawl4ai.hub import BaseCrawler
+
+__meta__ = {
+    "version": "1.2.0",
+    "tested_on": ["amazon.com"],
+    "rate_limit": "50 RPM",
+    "schema": {"product": ["name", "price"]}
+}
+
+class AmazonProductCrawler(BaseCrawler):
+    async def run(self, url: str, **kwargs) -> str:
+        try:
+            self.logger.info(f"Crawling {url}")
+            return '{"product": {"name": "Test Amazon Product"}}'
+        except Exception as e:
+            self.logger.error(f"Crawl failed: {str(e)}")
+            return json.dumps({
+                "error": str(e),
+                "metadata": self.meta  # Include meta in error response
+            })