#!/usr/bin/env python3 """ Demo: How users will call the Adaptive Digest endpoint This shows practical examples of how developers would use the adaptive crawling feature to intelligently gather relevant content based on queries. """ import asyncio import time from typing import Any, Dict, Optional import aiohttp # Configuration API_BASE_URL = "http://localhost:11235" API_TOKEN = None # Set if your API requires authentication class AdaptiveEndpointDemo: def __init__(self, base_url: str = API_BASE_URL, token: str = None): self.base_url = base_url self.headers = {"Content-Type": "application/json"} if token: self.headers["Authorization"] = f"Bearer {token}" async def submit_adaptive_job( self, start_url: str, query: str, config: Optional[Dict] = None ) -> str: """Submit an adaptive crawling job and return task ID""" payload = {"start_url": start_url, "query": query} if config: payload["config"] = config async with aiohttp.ClientSession() as session: async with session.post( f"{self.base_url}/adaptive/digest/job", headers=self.headers, json=payload, ) as response: if response.status == 202: # Accepted result = await response.json() return result["task_id"] else: error_text = await response.text() raise Exception(f"API Error {response.status}: {error_text}") async def check_job_status(self, task_id: str) -> Dict[str, Any]: """Check the status of an adaptive crawling job""" async with aiohttp.ClientSession() as session: async with session.get( f"{self.base_url}/adaptive/digest/job/{task_id}", headers=self.headers ) as response: if response.status == 200: return await response.json() else: error_text = await response.text() raise Exception(f"API Error {response.status}: {error_text}") async def wait_for_completion( self, task_id: str, max_wait: int = 300 ) -> Dict[str, Any]: """Poll job status until completion or timeout""" start_time = time.time() while time.time() - start_time < max_wait: status = await self.check_job_status(task_id) if status["status"] == "COMPLETED": return status elif status["status"] == "FAILED": raise Exception(f"Job failed: {status.get('error', 'Unknown error')}") print( f"ā³ Job {status['status']}... (elapsed: {int(time.time() - start_time)}s)" ) await asyncio.sleep(3) # Poll every 3 seconds raise Exception(f"Job timed out after {max_wait} seconds") async def demo_research_assistant(self): """Demo: Research assistant for academic papers""" print("šŸ”¬ Demo: Academic Research Assistant") print("=" * 50) try: print("šŸš€ Submitting job: Find research on 'machine learning optimization'") task_id = await self.submit_adaptive_job( start_url="https://arxiv.org", query="machine learning optimization techniques recent papers", config={ "max_depth": 3, "confidence_threshold": 0.7, "max_pages": 20, "content_filters": ["academic", "research"], }, ) print(f"šŸ“‹ Job submitted with ID: {task_id}") # Wait for completion result = await self.wait_for_completion(task_id) print("āœ… Research completed!") print(f"šŸŽÆ Confidence score: {result['result']['confidence']:.2f}") print(f"šŸ“Š Coverage stats: {result['result']['coverage_stats']}") # Show relevant content found relevant_content = result["result"]["relevant_content"] print(f"\nšŸ“š Found {len(relevant_content)} relevant research papers:") for i, content in enumerate(relevant_content[:3], 1): title = content.get("title", "Untitled")[:60] relevance = content.get("relevance_score", 0) print(f" {i}. {title}... (relevance: {relevance:.2f})") except Exception as e: print(f"āŒ Error: {e}") async def demo_market_intelligence(self): """Demo: Market intelligence gathering""" print("\nšŸ’¼ Demo: Market Intelligence Gathering") print("=" * 50) try: print("šŸš€ Submitting job: Analyze competitors in 'sustainable packaging'") task_id = await self.submit_adaptive_job( start_url="https://packagingeurope.com", query="sustainable packaging solutions eco-friendly materials competitors market trends", config={ "max_depth": 4, "confidence_threshold": 0.6, "max_pages": 30, "content_filters": ["business", "industry"], "follow_external_links": True, }, ) print(f"šŸ“‹ Job submitted with ID: {task_id}") # Wait for completion result = await self.wait_for_completion(task_id) print("āœ… Market analysis completed!") print(f"šŸŽÆ Intelligence confidence: {result['result']['confidence']:.2f}") # Analyze findings relevant_content = result["result"]["relevant_content"] print( f"\nšŸ“ˆ Market intelligence gathered from {len(relevant_content)} sources:" ) companies = set() trends = [] for content in relevant_content: # Extract company mentions (simplified) text = content.get("content", "") if any( word in text.lower() for word in ["company", "corporation", "inc", "ltd"] ): # This would be more sophisticated in real implementation companies.add(content.get("source_url", "Unknown")) # Extract trend keywords if any( word in text.lower() for word in ["trend", "innovation", "future"] ): trends.append(content.get("title", "Trend")) print(f"šŸ¢ Companies analyzed: {len(companies)}") print(f"šŸ“Š Trends identified: {len(trends)}") except Exception as e: print(f"āŒ Error: {e}") async def demo_content_curation(self): """Demo: Content curation for newsletter""" print("\nšŸ“° Demo: Content Curation for Tech Newsletter") print("=" * 50) try: print("šŸš€ Submitting job: Curate content about 'AI developments this week'") task_id = await self.submit_adaptive_job( start_url="https://techcrunch.com", query="artificial intelligence AI developments news this week recent advances", config={ "max_depth": 2, "confidence_threshold": 0.8, "max_pages": 25, "content_filters": ["news", "recent"], "date_range": "last_7_days", }, ) print(f"šŸ“‹ Job submitted with ID: {task_id}") # Wait for completion result = await self.wait_for_completion(task_id) print("āœ… Content curation completed!") print(f"šŸŽÆ Curation confidence: {result['result']['confidence']:.2f}") # Process curated content relevant_content = result["result"]["relevant_content"] print(f"\nšŸ“® Curated {len(relevant_content)} articles for your newsletter:") # Group by category/topic categories = { "AI Research": [], "Industry News": [], "Product Launches": [], "Other": [], } for content in relevant_content: title = content.get("title", "Untitled") if any( word in title.lower() for word in ["research", "study", "paper"] ): categories["AI Research"].append(content) elif any( word in title.lower() for word in ["company", "startup", "funding"] ): categories["Industry News"].append(content) elif any( word in title.lower() for word in ["launch", "release", "unveil"] ): categories["Product Launches"].append(content) else: categories["Other"].append(content) for category, articles in categories.items(): if articles: print(f"\nšŸ“‚ {category} ({len(articles)} articles):") for article in articles[:2]: # Show top 2 per category title = article.get("title", "Untitled")[:50] print(f" • {title}...") except Exception as e: print(f"āŒ Error: {e}") async def demo_product_research(self): """Demo: Product research and comparison""" print("\nšŸ›ļø Demo: Product Research & Comparison") print("=" * 50) try: print("šŸš€ Submitting job: Research 'best wireless headphones 2024'") task_id = await self.submit_adaptive_job( start_url="https://www.cnet.com", query="best wireless headphones 2024 reviews comparison features price", config={ "max_depth": 3, "confidence_threshold": 0.75, "max_pages": 20, "content_filters": ["review", "comparison"], "extract_structured_data": True, }, ) print(f"šŸ“‹ Job submitted with ID: {task_id}") # Wait for completion result = await self.wait_for_completion(task_id) print("āœ… Product research completed!") print(f"šŸŽÆ Research confidence: {result['result']['confidence']:.2f}") # Analyze product data relevant_content = result["result"]["relevant_content"] print( f"\nšŸŽ§ Product research summary from {len(relevant_content)} sources:" ) # Extract product mentions (simplified example) products = {} for content in relevant_content: text = content.get("content", "").lower() # Look for common headphone brands brands = [ "sony", "bose", "apple", "sennheiser", "jabra", "audio-technica", ] for brand in brands: if brand in text: if brand not in products: products[brand] = 0 products[brand] += 1 print("šŸ·ļø Product mentions:") for product, mentions in sorted( products.items(), key=lambda x: x[1], reverse=True )[:5]: print(f" {product.title()}: {mentions} mentions") except Exception as e: print(f"āŒ Error: {e}") async def demo_monitoring_pipeline(self): """Demo: Set up a monitoring pipeline for ongoing content tracking""" print("\nšŸ“” Demo: Content Monitoring Pipeline") print("=" * 50) monitoring_queries = [ { "name": "Brand Mentions", "start_url": "https://news.google.com", "query": "YourBrand company news mentions", "priority": "high", }, { "name": "Industry Trends", "start_url": "https://techcrunch.com", "query": "SaaS industry trends 2024", "priority": "medium", }, { "name": "Competitor Activity", "start_url": "https://crunchbase.com", "query": "competitor funding announcements product launches", "priority": "high", }, ] print("šŸš€ Starting monitoring pipeline with 3 queries...") jobs = {} # Submit all monitoring jobs for query_config in monitoring_queries: print(f"\nšŸ“‹ Submitting: {query_config['name']}") try: task_id = await self.submit_adaptive_job( start_url=query_config["start_url"], query=query_config["query"], config={ "max_depth": 2, "confidence_threshold": 0.6, "max_pages": 15, }, ) jobs[query_config["name"]] = { "task_id": task_id, "priority": query_config["priority"], "status": "submitted", } print(f" āœ… Job ID: {task_id}") except Exception as e: print(f" āŒ Failed: {e}") # Monitor all jobs print(f"\nā³ Monitoring {len(jobs)} jobs...") completed_jobs = {} max_wait = 180 # 3 minutes total start_time = time.time() while jobs and (time.time() - start_time) < max_wait: for name, job_info in list(jobs.items()): try: status = await self.check_job_status(job_info["task_id"]) if status["status"] == "COMPLETED": completed_jobs[name] = status del jobs[name] print(f" āœ… {name} completed") elif status["status"] == "FAILED": print(f" āŒ {name} failed: {status.get('error', 'Unknown')}") del jobs[name] except Exception as e: print(f" āš ļø Error checking {name}: {e}") if jobs: # Still have pending jobs await asyncio.sleep(5) # Summary print("\nšŸ“Š Monitoring Pipeline Summary:") print(f" āœ… Completed: {len(completed_jobs)} jobs") print(f" ā³ Pending: {len(jobs)} jobs") for name, result in completed_jobs.items(): confidence = result["result"]["confidence"] content_count = len(result["result"]["relevant_content"]) print(f" {name}: {content_count} items (confidence: {confidence:.2f})") async def main(): """Run all adaptive endpoint demos""" print("🧠 Crawl4AI Adaptive Digest Endpoint - User Demo") print("=" * 60) print("This demo shows how developers use adaptive crawling") print("to intelligently gather relevant content based on queries.\n") demo = AdaptiveEndpointDemo() try: # Run individual demos await demo.demo_research_assistant() await demo.demo_market_intelligence() await demo.demo_content_curation() await demo.demo_product_research() # Run monitoring pipeline demo await demo.demo_monitoring_pipeline() print("\nšŸŽ‰ All demos completed successfully!") print("\nReal-world usage patterns:") print("1. Submit multiple jobs for parallel processing") print("2. Poll job status to track progress") print("3. Process results when jobs complete") print("4. Use confidence scores to filter quality content") print("5. Set up monitoring pipelines for ongoing intelligence") except Exception as e: print(f"\nāŒ Demo failed: {e}") print("Make sure the Crawl4AI server is running on localhost:11235") if __name__ == "__main__": asyncio.run(main())