refactor(core): reorganize project structure and remove legacy code
Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
This commit is contained in:
20
crawl4ai/crawlers/amazon_product/crawler.py
Normal file
20
crawl4ai/crawlers/amazon_product/crawler.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from crawl4ai.hub import BaseCrawler
|
||||
|
||||
__meta__ = {
|
||||
"version": "1.2.0",
|
||||
"tested_on": ["amazon.com"],
|
||||
"rate_limit": "50 RPM",
|
||||
"schema": {"product": ["name", "price"]}
|
||||
}
|
||||
|
||||
class AmazonProductCrawler(BaseCrawler):
|
||||
async def run(self, url: str, **kwargs) -> str:
|
||||
try:
|
||||
self.logger.info(f"Crawling {url}")
|
||||
return '{"product": {"name": "Test Amazon Product"}}'
|
||||
except Exception as e:
|
||||
self.logger.error(f"Crawl failed: {str(e)}")
|
||||
return json.dumps({
|
||||
"error": str(e),
|
||||
"metadata": self.meta # Include meta in error response
|
||||
})
|
||||
Reference in New Issue
Block a user