Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
20 lines
655 B
Python
20 lines
655 B
Python
from crawl4ai.hub import BaseCrawler
|
|
|
|
__meta__ = {
|
|
"version": "1.2.0",
|
|
"tested_on": ["amazon.com"],
|
|
"rate_limit": "50 RPM",
|
|
"schema": {"product": ["name", "price"]}
|
|
}
|
|
|
|
class AmazonProductCrawler(BaseCrawler):
|
|
async def run(self, url: str, **kwargs) -> str:
|
|
try:
|
|
self.logger.info(f"Crawling {url}")
|
|
return '{"product": {"name": "Test Amazon Product"}}'
|
|
except Exception as e:
|
|
self.logger.error(f"Crawl failed: {str(e)}")
|
|
return json.dumps({
|
|
"error": str(e),
|
|
"metadata": self.meta # Include meta in error response
|
|
}) |