Replace the ScrapingMode enum with a proper strategy pattern implementation for content scraping. This change introduces: - New ContentScrapingStrategy abstract base class - Concrete WebScrapingStrategy and LXMLWebScrapingStrategy implementations - New Pydantic models for structured scraping results - Updated documentation reflecting the new strategy-based approach BREAKING CHANGE: ScrapingMode enum has been removed. Users should now use ContentScrapingStrategy implementations instead.
22 lines
668 B
Python
22 lines
668 B
Python
import nest_asyncio
|
|
nest_asyncio.apply()
|
|
|
|
import asyncio
|
|
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LXMLWebScrapingStrategy, CacheMode
|
|
|
|
async def main():
|
|
config = CrawlerRunConfig(
|
|
cache_mode=CacheMode.BYPASS,
|
|
scraping_strategy=LXMLWebScrapingStrategy() # Faster alternative to default BeautifulSoup
|
|
)
|
|
async with AsyncWebCrawler() as crawler:
|
|
result = await crawler.arun(
|
|
url="https://example.com",
|
|
config=config
|
|
)
|
|
print(f"Success: {result.success}")
|
|
print(f"Markdown length: {len(result.markdown_v2.raw_markdown)}")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|