refactor(examples): update API usage in features demo
Update the demo script to use the new crawler.arun_many() API instead of dispatcher.run_urls() and fix result access patterns. Also improve code formatting and remove extra whitespace. - Replace dispatcher.run_urls with crawler.arun_many - Update streaming demo to use new API and correct result access - Clean up whitespace and formatting - Simplify result property access patterns
This commit is contained in:
@@ -85,17 +85,16 @@ async def demo_memory_dispatcher():
|
|||||||
)
|
)
|
||||||
|
|
||||||
print("\n🚀 Starting batch crawl...")
|
print("\n🚀 Starting batch crawl...")
|
||||||
results = await dispatcher.run_urls(
|
results = await crawler.arun_many(
|
||||||
urls=urls,
|
urls=urls,
|
||||||
crawler=crawler,
|
|
||||||
config=crawler_config,
|
config=crawler_config,
|
||||||
|
dispatcher=dispatcher
|
||||||
)
|
)
|
||||||
print(f"\n✅ Completed {len(results)} URLs successfully")
|
print(f"\n✅ Completed {len(results)} URLs successfully")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n❌ Error in memory dispatcher demo: {str(e)}")
|
print(f"\n❌ Error in memory dispatcher demo: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def demo_streaming_support():
|
async def demo_streaming_support():
|
||||||
"""
|
"""
|
||||||
2. Streaming Support Demo
|
2. Streaming Support Demo
|
||||||
@@ -115,16 +114,17 @@ async def demo_streaming_support():
|
|||||||
dispatcher = MemoryAdaptiveDispatcher(max_session_permit=3, check_interval=0.5)
|
dispatcher = MemoryAdaptiveDispatcher(max_session_permit=3, check_interval=0.5)
|
||||||
|
|
||||||
print("Starting streaming crawl...")
|
print("Starting streaming crawl...")
|
||||||
async for result in dispatcher.run_urls_stream(
|
async for result in await crawler.arun_many(
|
||||||
urls=urls, crawler=crawler, config=crawler_config
|
urls=urls,
|
||||||
|
config=crawler_config,
|
||||||
|
dispatcher=dispatcher
|
||||||
):
|
):
|
||||||
# Process each result as it arrives
|
# Process each result as it arrives
|
||||||
print(
|
print(
|
||||||
f"Received result for {result.url} - Success: {result.result.success}"
|
f"Received result for {result.url} - Success: {result.success}"
|
||||||
)
|
)
|
||||||
if result.result.success:
|
if result.success:
|
||||||
print(f"Content length: {len(result.result.markdown)}")
|
print(f"Content length: {len(result.markdown)}")
|
||||||
|
|
||||||
|
|
||||||
async def demo_content_scraping():
|
async def demo_content_scraping():
|
||||||
"""
|
"""
|
||||||
@@ -138,7 +138,10 @@ async def demo_content_scraping():
|
|||||||
url = "https://example.com/article"
|
url = "https://example.com/article"
|
||||||
|
|
||||||
# Configure with the new LXML strategy
|
# Configure with the new LXML strategy
|
||||||
config = CrawlerRunConfig(scraping_strategy=LXMLWebScrapingStrategy(), verbose=True)
|
config = CrawlerRunConfig(
|
||||||
|
scraping_strategy=LXMLWebScrapingStrategy(),
|
||||||
|
verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
print("Scraping content with LXML strategy...")
|
print("Scraping content with LXML strategy...")
|
||||||
async with crawler:
|
async with crawler:
|
||||||
@@ -146,7 +149,6 @@ async def demo_content_scraping():
|
|||||||
if result.success:
|
if result.success:
|
||||||
print("Successfully scraped content using LXML strategy")
|
print("Successfully scraped content using LXML strategy")
|
||||||
|
|
||||||
|
|
||||||
async def demo_llm_markdown():
|
async def demo_llm_markdown():
|
||||||
"""
|
"""
|
||||||
4. LLM-Powered Markdown Generation Demo
|
4. LLM-Powered Markdown Generation Demo
|
||||||
@@ -197,7 +199,6 @@ async def demo_llm_markdown():
|
|||||||
print(result.markdown_v2.fit_markdown[:500])
|
print(result.markdown_v2.fit_markdown[:500])
|
||||||
print("Successfully generated LLM-filtered markdown")
|
print("Successfully generated LLM-filtered markdown")
|
||||||
|
|
||||||
|
|
||||||
async def demo_robots_compliance():
|
async def demo_robots_compliance():
|
||||||
"""
|
"""
|
||||||
5. Robots.txt Compliance Demo
|
5. Robots.txt Compliance Demo
|
||||||
@@ -221,8 +222,6 @@ async def demo_robots_compliance():
|
|||||||
elif result.success:
|
elif result.success:
|
||||||
print(f"Successfully crawled: {result.url}")
|
print(f"Successfully crawled: {result.url}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def demo_json_schema_generation():
|
async def demo_json_schema_generation():
|
||||||
"""
|
"""
|
||||||
7. LLM-Powered Schema Generation Demo
|
7. LLM-Powered Schema Generation Demo
|
||||||
@@ -276,7 +275,6 @@ async def demo_json_schema_generation():
|
|||||||
print(json.dumps(result.extracted_content, indent=2) if result.extracted_content else None)
|
print(json.dumps(result.extracted_content, indent=2) if result.extracted_content else None)
|
||||||
print("Successfully used generated schema for crawling")
|
print("Successfully used generated schema for crawling")
|
||||||
|
|
||||||
|
|
||||||
async def demo_proxy_rotation():
|
async def demo_proxy_rotation():
|
||||||
"""
|
"""
|
||||||
8. Proxy Rotation Demo
|
8. Proxy Rotation Demo
|
||||||
|
|||||||
Reference in New Issue
Block a user