refactor(core): reorganize project structure and remove legacy code
Major reorganization of the project structure: - Moved legacy synchronous crawler code to legacy folder - Removed deprecated CLI and docs manager - Consolidated version manager into utils.py - Added CrawlerHub to __init__.py exports - Fixed type hints in async_webcrawler.py - Fixed minor bugs in chunking and crawler strategies BREAKING CHANGE: Removed synchronous WebCrawler, CLI, and docs management functionality. Users should migrate to AsyncWebCrawler.
This commit is contained in:
17
tests/20241401/test_crawlers.py
Normal file
17
tests/20241401/test_crawlers.py
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
# example_usageexample_usageexample_usage# example_usage.py
|
||||
import asyncio
|
||||
from crawl4ai.crawlers import get_crawler
|
||||
|
||||
async def main():
|
||||
# Get the registered crawler
|
||||
example_crawler = get_crawler("example_site.content")
|
||||
|
||||
# Crawl example.com
|
||||
result = await example_crawler(url="https://example.com")
|
||||
|
||||
print(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
30
tests/hub/test_simple.py
Normal file
30
tests/hub/test_simple.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# test.py
|
||||
from crawl4ai import CrawlerHub
|
||||
import json
|
||||
|
||||
async def amazon_example():
|
||||
if (crawler_cls := CrawlerHub.get("amazon_product")) :
|
||||
crawler = crawler_cls()
|
||||
print(f"Crawler version: {crawler_cls.meta['version']}")
|
||||
print(f"Rate limits: {crawler_cls.meta.get('rate_limit', 'Unlimited')}")
|
||||
print(await crawler.run("https://amazon.com/test"))
|
||||
else:
|
||||
print("Crawler not found!")
|
||||
|
||||
async def google_example():
|
||||
# Get crawler dynamically
|
||||
crawler_cls = CrawlerHub.get("google_search")
|
||||
crawler = crawler_cls()
|
||||
|
||||
# Text search
|
||||
text_results = await crawler.run(query="apple inc", search_type="text", schema_cache_path="/Users/unclecode/.crawl4ai")
|
||||
print(json.loads(text_results))
|
||||
|
||||
# Image search
|
||||
image_results = await crawler.run(query="apple inc", search_type="image")
|
||||
print(image_results)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
# asyncio.run(amazon_example())
|
||||
asyncio.run(google_example())
|
||||
Reference in New Issue
Block a user