crawl4ai/docs/examples/crawl4ai_v0_7_0_showcase.py

"""
🚀 Crawl4AI v0.7.0 Feature Showcase
=====================================
This demo showcases the major features introduced in v0.7.0:
1. Link Preview/Peek - Advanced link analysis with 3-layer scoring
2. Adaptive Crawling - Intelligent crawling with confidence tracking
3. Virtual Scroll - Capture content from modern infinite scroll pages
4. C4A Script - Domain-specific language for web automation
5. URL Seeder - Smart URL discovery and filtering
6. LLM Context Builder - 3D context for AI assistants

Let's explore each feature with practical examples!
"""

import asyncio
import json
import time
import re
from typing import List, Dict
from rich.console import Console
from rich.table import Table
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.panel import Panel
from rich.syntax import Syntax
from rich.layout import Layout
from rich.live import Live
from rich import box

from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, AdaptiveCrawler, AdaptiveConfig, BrowserConfig, CacheMode
from crawl4ai import AsyncUrlSeeder, SeedingConfig
from crawl4ai.async_configs import LinkPreviewConfig, VirtualScrollConfig
from crawl4ai import c4a_compile, CompilationResult

# Initialize Rich console for beautiful output
console = Console()


def print_banner(title: str, subtitle: str = ""):
    """Print a beautiful banner for each section"""
    console.print(f"\n[bold cyan]{'=' * 80}[/bold cyan]")
    console.print(f"[bold yellow]{title.center(80)}[/bold yellow]")
    if subtitle:
        console.print(f"[dim white]{subtitle.center(80)}[/dim white]")
    console.print(f"[bold cyan]{'=' * 80}[/bold cyan]\n")


def create_score_bar(score: float, max_score: float = 10.0) -> str:
    """Create a visual progress bar for scores"""
    percentage = (score / max_score)
    filled = int(percentage * 20)
    bar = "█" * filled + "░" * (20 - filled)
    return f"[{'green' if score >= 7 else 'yellow' if score >= 4 else 'red'}]{bar}[/] {score:.2f}/{max_score}"


async def link_preview_demo(auto_mode=False):
    """
    🔗 Link Preview/Peek Demo
    Showcases the 3-layer scoring system for intelligent link analysis
    """
    print_banner(
        "🔗 LINK PREVIEW & INTELLIGENT SCORING",
        "Advanced link analysis with intrinsic, contextual, and total scoring"
    )

    # Explain the feature
    console.print(Panel(
        "[bold]What is Link Preview?[/bold]\n\n"
        "Link Preview analyzes links on a page with a sophisticated 3-layer scoring system:\n\n"
        "• [cyan]Intrinsic Score[/cyan]: Quality based on link text, position, and attributes (0-10)\n"
        "• [magenta]Contextual Score[/magenta]: Relevance to your query using semantic analysis (0-1)\n"
        "• [green]Total Score[/green]: Combined score for intelligent prioritization\n\n"
        "This helps you find the most relevant and high-quality links automatically!",
        title="Feature Overview",
        border_style="blue"
    ))

    await asyncio.sleep(2)

    # Demo 1: Basic link analysis with visual scoring
    console.print("\n[bold yellow]Demo 1: Analyzing Python Documentation Links[/bold yellow]\n")

    query = "async await coroutines tutorial"
    console.print(f"[cyan]🔍 Query:[/cyan] [bold]{query}[/bold]")
    console.print("[dim]Looking for links related to asynchronous programming...[/dim]\n")

    config = CrawlerRunConfig(
        link_preview_config=LinkPreviewConfig(
            include_internal=True,
            include_external=False,
            max_links=10,
            concurrency=5,
            query=query,  # Our search context
            verbose=False  # We'll handle the display
        ),
        score_links=True,
        only_text=True
    )

    # Create a progress display
    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        console=console
    ) as progress:
        task = progress.add_task("[cyan]Crawling and analyzing links...", total=None)

        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun("https://docs.python.org/3/library/asyncio.html", config=config)

        progress.remove_task(task)

    if result.success:
        # Extract links with scores
        links = result.links.get("internal", [])
        scored_links = [l for l in links if l.get("head_data") and l.get("total_score")]

        # Sort by total score
        scored_links.sort(key=lambda x: x.get("total_score", 0), reverse=True)

        # Create a beautiful table for results
        table = Table(
            title="🎯 Top Scored Links",
            box=box.ROUNDED,
            show_lines=True,
            title_style="bold magenta"
        )

        table.add_column("Rank", style="cyan", width=6)
        table.add_column("Link Text", style="white", width=40)
        table.add_column("Intrinsic Score", width=25)
        table.add_column("Contextual Score", width=25)
        table.add_column("Total Score", style="bold", width=15)

        for i, link in enumerate(scored_links[:5], 1):
            intrinsic = link.get('intrinsic_score', 0)
            contextual = link.get('contextual_score', 0)
            total = link.get('total_score', 0)

            # Get link text and title
            text = link.get('text', '')[:35] + "..." if len(link.get('text', '')) > 35 else link.get('text', '')
            title = link.get('head_data', {}).get('title', 'No title')[:40]

            table.add_row(
                f"#{i}",
                text or title,
                create_score_bar(intrinsic, 10.0),
                create_score_bar(contextual, 1.0),
                f"[bold green]{total:.3f}[/bold green]"
            )

        console.print(table)

        # Show what makes a high-scoring link
        if scored_links:
            best_link = scored_links[0]
            console.print(f"\n[bold green]🏆 Best Match:[/bold green]")
            console.print(f"URL: [link]{best_link['href']}[/link]")
            console.print(f"Title: {best_link.get('head_data', {}).get('title', 'N/A')}")

            desc = best_link.get('head_data', {}).get('meta', {}).get('description', '')
            if desc:
                console.print(f"Description: [dim]{desc[:100]}...[/dim]")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 2...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 2: Research Assistant Mode
    console.print("\n[bold yellow]Demo 2: Research Assistant - Finding Machine Learning Resources[/bold yellow]\n")

    # First query - will find no results
    query1 = "deep learning neural networks beginners tutorial"
    console.print(f"[cyan]🔍 Query 1:[/cyan] [bold]{query1}[/bold]")
    console.print("[dim]Note: scikit-learn focuses on traditional ML, not deep learning[/dim]\n")

    # Configure for research mode
    research_config = CrawlerRunConfig(
        link_preview_config=LinkPreviewConfig(
            include_internal=True,
            include_external=True,
            query=query1,
            max_links=20,
            score_threshold=0.3,  # Only high-relevance links
            concurrency=10
        ),
        score_links=True
    )

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        console=console
    ) as progress:
        task = progress.add_task("[cyan]Discovering learning resources...", total=None)

        async with AsyncWebCrawler() as crawler:
            result = await crawler.arun("https://scikit-learn.org/stable/", config=research_config)

        progress.remove_task(task)

    if result.success:
        all_links = result.links.get("internal", []) + result.links.get("external", [])
        # Filter for links with actual scores
        relevant_links = [l for l in all_links if l.get("total_score") is not None and l.get("total_score") > 0.3]
        relevant_links.sort(key=lambda x: x.get("total_score", 0), reverse=True)

        console.print(f"[bold green]📚 Found {len(relevant_links)} highly relevant resources![/bold green]\n")

        # Group by score ranges
        excellent = [l for l in relevant_links if l.get("total_score", 0) > 0.7]
        good = [l for l in relevant_links if 0.5 <= l.get("total_score", 0) <= 0.7]
        fair = [l for l in relevant_links if 0.3 <= l.get("total_score", 0) < 0.5]

        if excellent:
            console.print("[bold green]⭐⭐⭐ Excellent Matches:[/bold green]")
            for link in excellent[:3]:
                title = link.get('head_data', {}).get('title', link.get('text', 'No title'))
                console.print(f"  • {title[:60]}... [dim]({link.get('total_score', 0):.2f})[/dim]")

        if good:
            console.print("\n[yellow]⭐⭐ Good Matches:[/yellow]")
            for link in good[:3]:
                title = link.get('head_data', {}).get('title', link.get('text', 'No title'))
                console.print(f"  • {title[:60]}... [dim]({link.get('total_score', 0):.2f})[/dim]")

    # Second query - will find results
    console.print("\n[bold cyan]Let's try a more relevant query for scikit-learn:[/bold cyan]\n")

    query2 = "machine learning classification tutorial examples"
    console.print(f"[cyan]🔍 Query 2:[/cyan] [bold]{query2}[/bold]")
    console.print("[dim]This should find relevant content about traditional ML[/dim]\n")

    research_config2 = CrawlerRunConfig(
        link_preview_config=LinkPreviewConfig(
            include_internal=True,
            include_external=True,
            query=query2,
            max_links=15,
            score_threshold=0.2,  # Slightly lower threshold
            concurrency=10
        ),
        score_links=True
    )

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        console=console
    ) as progress:
        task = progress.add_task("[cyan]Finding ML tutorials...", total=None)

        async with AsyncWebCrawler() as crawler:
            result2 = await crawler.arun("https://scikit-learn.org/stable/", config=research_config2)

        progress.remove_task(task)

    if result2.success:
        all_links2 = result2.links.get("internal", []) + result2.links.get("external", [])
        relevant_links2 = [l for l in all_links2 if l.get("total_score") is not None and l.get("total_score") > 0.2]
        relevant_links2.sort(key=lambda x: x.get("total_score", 0), reverse=True)

        console.print(f"[bold green]📚 Now found {len(relevant_links2)} relevant resources![/bold green]\n")

        if relevant_links2:
            console.print("[bold]Top relevant links for ML tutorials:[/bold]")
            for i, link in enumerate(relevant_links2[:5], 1):
                title = link.get('head_data', {}).get('title', link.get('text', 'No title'))
                score = link.get('total_score', 0)
                console.print(f"{i}. [{score:.3f}] {title[:70]}...")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 3...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 3: Live scoring visualization
    console.print("\n[bold yellow]Demo 3: Understanding the 3-Layer Scoring System[/bold yellow]\n")

    demo_query = "async programming tutorial"
    console.print(f"[cyan]🔍 Query:[/cyan] [bold]{demo_query}[/bold]")
    console.print("[dim]Let's see how different link types score against this query[/dim]\n")

    # Create a sample link analysis
    sample_links = [
        {
            "text": "Complete Guide to Async Programming",
            "intrinsic": 9.2,
            "contextual": 0.95,
            "factors": ["Strong keywords", "Title position", "Descriptive text"]
        },
        {
            "text": "API Reference",
            "intrinsic": 6.5,
            "contextual": 0.15,
            "factors": ["Common link text", "Navigation menu", "Low relevance"]
        },
        {
            "text": "Click here",
            "intrinsic": 2.1,
            "contextual": 0.05,
            "factors": ["Poor link text", "No context", "Generic anchor"]
        }
    ]

    for link in sample_links:
        total = (link["intrinsic"] / 10 * 0.4) + (link["contextual"] * 0.6)

        panel_content = (
            f"[bold]Link Text:[/bold] {link['text']}\n\n"
            f"[cyan]Intrinsic Score:[/cyan] {create_score_bar(link['intrinsic'], 10.0)}\n"
            f"[magenta]Contextual Score:[/magenta] {create_score_bar(link['contextual'], 1.0)}\n"
            f"[green]Total Score:[/green] {total:.3f}\n\n"
            f"[dim]Factors: {', '.join(link['factors'])}[/dim]"
        )

        console.print(Panel(
            panel_content,
            title=f"Link Analysis",
            border_style="blue" if total > 0.7 else "yellow" if total > 0.3 else "red"
        ))
        await asyncio.sleep(1)

    # Summary
    console.print("\n[bold green]✨ Link Preview Benefits:[/bold green]")
    console.print("• Automatically finds the most relevant links for your research")
    console.print("• Saves time by prioritizing high-quality content")
    console.print("• Provides semantic understanding beyond simple keyword matching")
    console.print("• Enables intelligent crawling decisions\n")


async def adaptive_crawling_demo(auto_mode=False):
    """
    🎯 Adaptive Crawling Demo
    Shows intelligent crawling that knows when to stop
    """
    print_banner(
        "🎯 ADAPTIVE CRAWLING",
        "Intelligent crawling that knows when it has enough information"
    )

    # Explain the feature
    console.print(Panel(
        "[bold]What is Adaptive Crawling?[/bold]\n\n"
        "Adaptive Crawling intelligently determines when sufficient information has been gathered:\n\n"
        "• [cyan]Confidence Tracking[/cyan]: Monitors how well we understand the topic (0-100%)\n"
        "• [magenta]Smart Exploration[/magenta]: Follows most promising links based on relevance\n"
        "• [green]Early Stopping[/green]: Stops when confidence threshold is reached\n"
        "• [yellow]Two Strategies[/yellow]: Statistical (fast) vs Embedding (semantic)\n\n"
        "Perfect for research tasks where you need 'just enough' information!",
        title="Feature Overview",
        border_style="blue"
    ))

    await asyncio.sleep(2)

    # Demo 1: Basic adaptive crawling with confidence visualization
    console.print("\n[bold yellow]Demo 1: Statistical Strategy - Fast Topic Understanding[/bold yellow]\n")

    query = "Python async web scraping best practices"
    console.print(f"[cyan]🔍 Research Query:[/cyan] [bold]{query}[/bold]")
    console.print(f"[cyan]🎯 Goal:[/cyan] Gather enough information to understand the topic")
    console.print(f"[cyan]📊 Strategy:[/cyan] Statistical (keyword-based, fast)\n")

    # Configure adaptive crawler
    config = AdaptiveConfig(
        strategy="statistical",
        max_pages=3,  # Limit to 3 pages for demo
        confidence_threshold=0.7,  # Stop at 70% confidence
        top_k_links=2,  # Follow top 2 links per page
        min_gain_threshold=0.05  # Need 5% information gain to continue
    )

    async with AsyncWebCrawler(verbose=False) as crawler:
        adaptive = AdaptiveCrawler(crawler, config)

        # Create progress tracking
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=console
        ) as progress:

            # Track crawling progress
            crawl_task = progress.add_task("[cyan]Starting adaptive crawl...", total=None)

            # Start crawling
            start_time = time.time()
            result = await adaptive.digest(
                start_url="https://docs.python.org/3/library/asyncio.html",
                query=query
            )
            elapsed = time.time() - start_time

            progress.remove_task(crawl_task)

        # Display results with visual confidence meter
        console.print(f"\n[bold green]✅ Crawling Complete in {elapsed:.1f} seconds![/bold green]\n")

        # Create confidence visualization
        confidence = adaptive.confidence
        conf_percentage = int(confidence * 100)
        conf_bar = "█" * (conf_percentage // 5) + "░" * (20 - conf_percentage // 5)

        console.print(f"[bold]Confidence Level:[/bold] [{('green' if confidence >= 0.7 else 'yellow' if confidence >= 0.5 else 'red')}]{conf_bar}[/] {conf_percentage}%")

        # Show crawl statistics
        stats_table = Table(
            title="📊 Crawl Statistics",
            box=box.ROUNDED,
            show_lines=True
        )

        stats_table.add_column("Metric", style="cyan", width=25)
        stats_table.add_column("Value", style="white", width=20)

        stats_table.add_row("Pages Crawled", str(len(result.crawled_urls)))
        stats_table.add_row("Knowledge Base Size", f"{len(adaptive.state.knowledge_base)} documents")
        # Calculate total content from CrawlResult objects
        total_content = 0
        for doc in adaptive.state.knowledge_base:
            if hasattr(doc, 'markdown') and doc.markdown and hasattr(doc.markdown, 'raw_markdown'):
                total_content += len(doc.markdown.raw_markdown)
        stats_table.add_row("Total Content", f"{total_content:,} chars")
        stats_table.add_row("Time per Page", f"{elapsed / len(result.crawled_urls):.2f}s")

        console.print(stats_table)

        # Show top relevant pages
        console.print("\n[bold]🏆 Most Relevant Pages Found:[/bold]")
        relevant_pages = adaptive.get_relevant_content(top_k=3)
        for i, page in enumerate(relevant_pages, 1):
            console.print(f"\n{i}. [bold]{page['url']}[/bold]")
            console.print(f"   Relevance: {page['score']:.2%}")

            # Show key information extracted
            content = page['content'] or ""
            if content:
                # Find most relevant sentence
                sentences = [s.strip() for s in content.split('.') if s.strip()]
                if sentences:
                    console.print(f"   [dim]Key insight: {sentences[0]}...[/dim]")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 2...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 2: Early Stopping Demonstration
    console.print("\n[bold yellow]Demo 2: Early Stopping - Stop When We Know Enough[/bold yellow]\n")

    query2 = "Python requests library tutorial"
    console.print(f"[cyan]🔍 Research Query:[/cyan] [bold]{query2}[/bold]")
    console.print(f"[cyan]🎯 Goal:[/cyan] Stop as soon as we reach 60% confidence")
    console.print("[dim]Watch how adaptive crawling stops early when it has enough info[/dim]\n")

    # Configure for early stopping
    early_stop_config = AdaptiveConfig(
        strategy="statistical",
        max_pages=10,  # Allow up to 10, but will stop early
        confidence_threshold=0.6,  # Lower threshold for demo
        top_k_links=2
    )

    async with AsyncWebCrawler(verbose=False) as crawler:
        adaptive_early = AdaptiveCrawler(crawler, early_stop_config)

        # Track progress
        console.print("[cyan]Starting crawl with early stopping enabled...[/cyan]")
        start_time = time.time()

        result = await adaptive_early.digest(
            start_url="https://docs.python-requests.org/en/latest/",
            query=query2
        )

        elapsed = time.time() - start_time

        # Show results
        console.print(f"\n[bold green]✅ Stopped early at {int(adaptive_early.confidence * 100)}% confidence![/bold green]")
        console.print(f"• Crawled only {len(result.crawled_urls)} pages (max was 10)")
        console.print(f"• Saved time: ~{elapsed:.1f}s total")
        console.print(f"• Efficiency: {elapsed / len(result.crawled_urls):.1f}s per page\n")

        # Show why it stopped
        if adaptive_early.confidence >= 0.6:
            console.print("[green]✓ Reached confidence threshold - no need to crawl more![/green]")
        else:
            console.print("[yellow]⚠ Hit max pages limit before reaching threshold[/yellow]")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 3...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 3: Knowledge Base Export/Import
    console.print("\n[bold yellow]Demo 3: Knowledge Base Export & Import[/bold yellow]\n")

    query3 = "Python decorators tutorial"
    console.print(f"[cyan]🔍 Research Query:[/cyan] [bold]{query3}[/bold]")
    console.print("[dim]Build knowledge base, export it, then import for continued research[/dim]\n")

    # First crawl - build knowledge base
    export_config = AdaptiveConfig(
        strategy="statistical",
        max_pages=2,  # Small for demo
        confidence_threshold=0.5
    )

    async with AsyncWebCrawler(verbose=False) as crawler:
        # Phase 1: Initial research
        console.print("[bold]Phase 1: Initial Research[/bold]")
        adaptive1 = AdaptiveCrawler(crawler, export_config)

        result1 = await adaptive1.digest(
            start_url="https://realpython.com/",
            query=query3
        )

        console.print(f"✓ Built knowledge base with {len(adaptive1.state.knowledge_base)} documents")
        console.print(f"✓ Confidence: {int(adaptive1.confidence * 100)}%\n")

        # Export knowledge base
        console.print("[bold]💾 Exporting Knowledge Base:[/bold]")
        kb_export = adaptive1.export_knowledge_base()

        export_stats = {
            "documents": len(kb_export['documents']),
            "urls": len(kb_export['visited_urls']),
            "size": len(json.dumps(kb_export)),
            "confidence": kb_export['confidence']
        }

        for key, value in export_stats.items():
            console.print(f"• {key.capitalize()}: {value:,}" if isinstance(value, int) else f"• {key.capitalize()}: {value:.2%}")

        # Phase 2: Import and continue
        console.print("\n[bold]Phase 2: Import & Continue Research[/bold]")
        adaptive2 = AdaptiveCrawler(crawler, export_config)

        # Import the knowledge base
        adaptive2.import_knowledge_base(kb_export)
        console.print(f"✓ Imported {len(adaptive2.state.knowledge_base)} documents")
        console.print(f"✓ Starting confidence: {int(adaptive2.confidence * 100)}%")

        # Continue research from a different starting point
        console.print("\n[cyan]Continuing research from a different angle...[/cyan]")
        result2 = await adaptive2.digest(
            start_url="https://docs.python.org/3/glossary.html#term-decorator",
            query=query3
        )

        console.print(f"\n[bold green]✅ Research Complete![/bold green]")
        console.print(f"• Total documents: {len(adaptive2.state.knowledge_base)}")
        console.print(f"• Final confidence: {int(adaptive2.confidence * 100)}%")
        console.print(f"• Knowledge preserved across sessions!")

    # Summary
    console.print("\n[bold green]✨ Adaptive Crawling Benefits:[/bold green]")
    console.print("• Automatically stops when enough information is gathered")
    console.print("• Follows most promising links based on relevance")
    console.print("• Saves time and resources with intelligent exploration")
    console.print("• Export/import knowledge bases for continued research")
    console.print("• Choose strategy based on needs: speed vs semantic understanding\n")


async def virtual_scroll_demo(auto_mode=False):
    """
    📜 Virtual Scroll Demo
    Shows how to capture content from modern infinite scroll pages
    """
    import os
    import http.server
    import socketserver
    import threading
    from pathlib import Path

    print_banner(
        "📜 VIRTUAL SCROLL SUPPORT",
        "Capture all content from pages with DOM recycling"
    )

    # Explain the feature
    console.print(Panel(
        "[bold]What is Virtual Scroll?[/bold]\n\n"
        "Virtual Scroll handles modern web pages that use DOM recycling techniques:\n\n"
        "• [cyan]Twitter/X-like feeds[/cyan]: Content replaced as you scroll\n"
        "• [magenta]Instagram grids[/magenta]: Visual content with virtualization\n"
        "• [green]News feeds[/green]: Mixed content with different behaviors\n"
        "• [yellow]Infinite scroll[/yellow]: Captures everything, not just visible\n\n"
        "Without this, you'd only get the initially visible content!",
        title="Feature Overview",
        border_style="blue"
    ))

    await asyncio.sleep(2)

    # Start test server with HTML examples
    ASSETS_DIR = Path(__file__).parent / "assets"

    class TestServer:
        """Simple HTTP server to serve our test HTML files"""

        def __init__(self, port=8080):
            self.port = port
            self.httpd = None
            self.server_thread = None

        async def start(self):
            """Start the test server"""
            Handler = http.server.SimpleHTTPRequestHandler

            # Save current directory and change to assets directory
            self.original_cwd = os.getcwd()
            os.chdir(ASSETS_DIR)

            # Try to find an available port
            for _ in range(10):
                try:
                    self.httpd = socketserver.TCPServer(("", self.port), Handler)
                    break
                except OSError:
                    self.port += 1

            if self.httpd is None:
                raise RuntimeError("Could not find available port")

            self.server_thread = threading.Thread(target=self.httpd.serve_forever)
            self.server_thread.daemon = True
            self.server_thread.start()

            # Give server time to start
            await asyncio.sleep(0.5)

            console.print(f"[green]Test server started on http://localhost:{self.port}[/green]")
            return self.port

        def stop(self):
            """Stop the test server"""
            if self.httpd:
                self.httpd.shutdown()
            # Restore original directory
            if hasattr(self, 'original_cwd'):
                os.chdir(self.original_cwd)

    server = TestServer()
    port = await server.start()

    try:
        # Demo 1: Twitter-like virtual scroll (content REPLACED)
        console.print("\n[bold yellow]Demo 1: Twitter-like Virtual Scroll - Content Replaced[/bold yellow]\n")
        console.print("[cyan]This simulates Twitter/X where only visible tweets exist in DOM[/cyan]\n")

        url = f"http://localhost:{port}/virtual_scroll_twitter_like.html"

        # First, crawl WITHOUT virtual scroll
        console.print("[red]WITHOUT Virtual Scroll:[/red]")

        config_normal = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
        browser_config = BrowserConfig(
            headless=False if not auto_mode else True,
            viewport={"width": 1280, "height": 800}
        )

        async with AsyncWebCrawler(config=browser_config) as crawler:
            result_normal = await crawler.arun(url=url, config=config_normal)

            # Count tweets
            tweets_normal = len(set(re.findall(r'data-tweet-id="(\d+)"', result_normal.html)))
            console.print(f"• Captured only {tweets_normal} tweets (initial visible)")
            console.print(f"• HTML size: {len(result_normal.html):,} bytes\n")

        # Then, crawl WITH virtual scroll
        console.print("[green]WITH Virtual Scroll:[/green]")

        virtual_config = VirtualScrollConfig(
            container_selector="#timeline",
            scroll_count=50,
            scroll_by="container_height",
            wait_after_scroll=0.2
        )

        config_virtual = CrawlerRunConfig(
            virtual_scroll_config=virtual_config,
            cache_mode=CacheMode.BYPASS
        )

        async with AsyncWebCrawler(config=browser_config) as crawler:
            result_virtual = await crawler.arun(url=url, config=config_virtual)

            tweets_virtual = len(set(re.findall(r'data-tweet-id="(\d+)"', result_virtual.html)))
            console.print(f"• Captured {tweets_virtual} tweets (all content)")
            console.print(f"• HTML size: {len(result_virtual.html):,} bytes")
            console.print(f"• [bold]{tweets_virtual / tweets_normal if tweets_normal > 0 else 'N/A':.1f}x more content![/bold]\n")

        if not auto_mode:
            console.print("\n[dim]Press Enter to continue to Demo 2...[/dim]")
            input()
        else:
            await asyncio.sleep(1)

        # Demo 2: Instagram Grid Example
        console.print("\n[bold yellow]Demo 2: Instagram Grid - Visual Grid Layout[/bold yellow]\n")
        console.print("[cyan]This shows how virtual scroll works with grid layouts[/cyan]\n")

        url2 = f"http://localhost:{port}/virtual_scroll_instagram_grid.html"

        # Configure for grid layout
        grid_config = VirtualScrollConfig(
            container_selector=".feed-container",
            scroll_count=100,  # Many scrolls for 999 posts
            scroll_by="container_height",
            wait_after_scroll=0.1 if auto_mode else 0.3
        )

        config = CrawlerRunConfig(
            virtual_scroll_config=grid_config,
            cache_mode=CacheMode.BYPASS,
            screenshot=True  # Take a screenshot
        )

        async with AsyncWebCrawler(config=browser_config) as crawler:
            result = await crawler.arun(url=url2, config=config)

            # Count posts in grid
            posts = re.findall(r'data-post-id="(\d+)"', result.html)
            unique_posts = sorted(set(int(id) for id in posts))

            console.print(f"[green]✅ Results:[/green]")
            console.print(f"• Posts captured: {len(unique_posts)} unique posts")
            if unique_posts:
                console.print(f"• Post IDs range: {min(unique_posts)} to {max(unique_posts)}")
                console.print(f"• Expected: 0 to 998 (999 posts total)")

                if len(unique_posts) >= 900:
                    console.print(f"• [bold green]SUCCESS! Captured {len(unique_posts)/999*100:.1f}% of all posts[/bold green]")

        if not auto_mode:
            console.print("\n[dim]Press Enter to continue to Demo 3...[/dim]")
            input()
        else:
            await asyncio.sleep(1)

        # Demo 3: Show the actual code
        console.print("\n[bold yellow]Demo 3: The Code - How It Works[/bold yellow]\n")

        # Show the actual implementation
        code = '''# Example: Crawling Twitter-like feed with virtual scroll
url = "http://localhost:8080/virtual_scroll_twitter_like.html"

# Configure virtual scroll
virtual_config = VirtualScrollConfig(
    container_selector="#timeline",      # The scrollable container
    scroll_count=50,                    # Max number of scrolls
    scroll_by="container_height",       # Scroll by container height
    wait_after_scroll=0.3              # Wait 300ms after each scroll
)

config = CrawlerRunConfig(
    virtual_scroll_config=virtual_config,
    cache_mode=CacheMode.BYPASS
)

# Use headless=False to watch it work!
browser_config = BrowserConfig(
    headless=False,
    viewport={"width": 1280, "height": 800}
)

async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun(url=url, config=config)

    # Extract all tweets
    tweets = re.findall(r\'data-tweet-id="(\\d+)"\', result.html)
    unique_tweets = set(tweets)

    print(f"Captured {len(unique_tweets)} unique tweets!")
    print(f"Without virtual scroll: only ~10 tweets")
    print(f"With virtual scroll: all 500 tweets!")'''

        syntax = Syntax(code, "python", theme="monokai", line_numbers=True)
        console.print(Panel(syntax, title="Implementation", border_style="green"))

        # Summary
        console.print("\n[bold green]✨ Virtual Scroll Benefits:[/bold green]")
        console.print("• Captures ALL content, not just initially visible")
        console.print("• Handles Twitter, Instagram, LinkedIn, and more")
        console.print("• Smart scrolling with configurable parameters")
        console.print("• Essential for modern web scraping")
        console.print("• Works with any virtualized content\n")

    finally:
        # Stop the test server
        server.stop()
        console.print("[dim]Test server stopped[/dim]")


async def url_seeder_demo(auto_mode=False):
    """
    🌱 URL Seeder Demo
    Shows intelligent URL discovery and filtering
    """
    print_banner(
        "🌱 URL SEEDER - INTELLIGENT URL DISCOVERY",
        "Pre-discover and filter URLs before crawling"
    )

    # Explain the feature
    console.print(Panel(
        "[bold]What is URL Seeder?[/bold]\n\n"
        "URL Seeder enables intelligent crawling at scale by pre-discovering URLs:\n\n"
        "• [cyan]Discovery[/cyan]: Find all URLs from sitemaps or by crawling\n"
        "• [magenta]Filtering[/magenta]: Filter by patterns, dates, or content\n"
        "• [green]Ranking[/green]: Score URLs by relevance (BM25 or semantic)\n"
        "• [yellow]Metadata[/yellow]: Extract head data without full crawl\n\n"
        "Perfect for targeted crawling of large websites!",
        title="Feature Overview",
        border_style="blue"
    ))

    await asyncio.sleep(2)

    # Demo 1: Basic URL discovery
    console.print("\n[bold yellow]Demo 1: Discover URLs from Sitemap[/bold yellow]\n")

    target_site = "realpython.com"
    console.print(f"[cyan]🔍 Target:[/cyan] [bold]{target_site}[/bold]")
    console.print("[dim]Let's discover what content is available[/dim]\n")

    async with AsyncUrlSeeder() as seeder:
        # First, see total URLs available
        console.print("[cyan]Discovering ALL URLs from sitemap...[/cyan]")

        all_urls = await seeder.urls(
            target_site,
            SeedingConfig(source="sitemap")
        )

        console.print(f"[green]✅ Found {len(all_urls)} total URLs![/green]\n")

        # Show URL categories
        categories = {}
        for url_info in all_urls[:100]:  # Sample first 100
            url = url_info['url']
            if '/tutorials/' in url:
                categories['tutorials'] = categories.get('tutorials', 0) + 1
            elif '/python-' in url:
                categories['python-topics'] = categories.get('python-topics', 0) + 1
            elif '/courses/' in url:
                categories['courses'] = categories.get('courses', 0) + 1
            else:
                categories['other'] = categories.get('other', 0) + 1

        console.print("[bold]URL Categories (sample of first 100):[/bold]")
        for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
            console.print(f"• {cat}: {count} URLs")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 2...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 2: Pattern filtering
    console.print("\n[bold yellow]Demo 2: Filter URLs by Pattern[/bold yellow]\n")

    pattern = "*python-basics*"
    console.print(f"[cyan]🎯 Pattern:[/cyan] [bold]{pattern}[/bold]")
    console.print("[dim]Finding Python basics tutorials[/dim]\n")

    async with AsyncUrlSeeder() as seeder:
        filtered_urls = await seeder.urls(
            target_site,
            SeedingConfig(
                source="sitemap",
                pattern=pattern,
                max_urls=10
            )
        )

        console.print(f"[green]✅ Found {len(filtered_urls)} Python basics URLs:[/green]\n")

        for i, url_info in enumerate(filtered_urls[:5], 1):
            console.print(f"{i}. {url_info['url']}")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 3...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 3: Smart search with BM25 ranking
    console.print("\n[bold yellow]Demo 3: Smart Search with BM25 Ranking[/bold yellow]\n")

    query = "web scraping beautifulsoup tutorial"
    console.print(f"[cyan]🔍 Query:[/cyan] [bold]{query}[/bold]")
    console.print("[dim]Using BM25 to find most relevant content[/dim]\n")

    async with AsyncUrlSeeder() as seeder:
        # Search with relevance scoring
        results = await seeder.urls(
            target_site,
            SeedingConfig(
                source="sitemap",
                pattern="*beautiful-soup*",  # Find Beautiful Soup pages
                extract_head=True,  # Get metadata
                query=query,
                scoring_method="bm25",
                # No threshold - show all results ranked by BM25
                max_urls=10
            )
        )

        console.print(f"[green]✅ Top {len(results)} most relevant results:[/green]\n")

        # Create a table for results
        table = Table(
            title="🎯 Relevance-Ranked Results",
            box=box.ROUNDED,
            show_lines=True
        )

        table.add_column("Rank", style="cyan", width=6)
        table.add_column("Score", style="yellow", width=8)
        table.add_column("Title", style="white", width=50)
        table.add_column("URL", style="dim", width=40)

        for i, result in enumerate(results[:5], 1):
            score = result.get('relevance_score', 0)
            title = result.get('head_data', {}).get('title', 'No title')[:50]
            url = result['url'].split('/')[-2]  # Just the slug

            table.add_row(
                f"#{i}",
                f"{score:.3f}",
                title,
                f".../{url}/"
            )

        console.print(table)

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 4...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 4: Complete pipeline - Discover → Filter → Crawl
    console.print("\n[bold yellow]Demo 4: Complete Pipeline - Discover → Filter → Crawl[/bold yellow]\n")

    console.print("[cyan]Let's build a complete crawling pipeline:[/cyan]")
    console.print("1. Discover URLs about Python decorators")
    console.print("2. Filter and rank by relevance")
    console.print("3. Crawl top results\n")

    async with AsyncUrlSeeder() as seeder:
        # Step 1: Discover and filter
        console.print("[bold]Step 1: Discovering decorator tutorials...[/bold]")

        decorator_urls = await seeder.urls(
            target_site,
            SeedingConfig(
                source="sitemap",
                pattern="*decorator*",
                extract_head=True,
                query="python decorators tutorial examples",
                scoring_method="bm25",
                max_urls=5
            )
        )

        console.print(f"Found {len(decorator_urls)} relevant URLs\n")

        # Step 2: Show what we'll crawl
        console.print("[bold]Step 2: URLs to crawl (ranked by relevance):[/bold]")
        urls_to_crawl = []
        for i, url_info in enumerate(decorator_urls[:3], 1):
            urls_to_crawl.append(url_info['url'])
            title = url_info.get('head_data', {}).get('title', 'No title')
            console.print(f"{i}. {title[:60]}...")
            console.print(f"   [dim]{url_info['url']}[/dim]")

        # Step 3: Crawl them
        console.print("\n[bold]Step 3: Crawling selected URLs...[/bold]")

        async with AsyncWebCrawler() as crawler:
            config = CrawlerRunConfig(
                only_text=True,
                cache_mode=CacheMode.BYPASS
            )

            # Crawl just the first URL for demo
            if urls_to_crawl:
                console.print(f"\n[dim]Crawling first URL: {urls_to_crawl[0]}[/dim]")
                result = await crawler.arun(urls_to_crawl[0], config=config)

                if result.success:
                    console.print(f"\n[green]✅ Successfully crawled the page![/green]")
                    console.print("\n[bold]Sample content:[/bold]")
                    content = result.markdown.raw_markdown[:300].replace('\n', ' ')
                    console.print(f"[dim]{content}...[/dim]")
                else:
                    console.print(f"[red]Failed to crawl: {result.error_message}[/red]")

    # Show code example
    console.print("\n[bold yellow]Code Example:[/bold yellow]\n")

    code = '''# Complete URL Seeder pipeline
async with AsyncUrlSeeder() as seeder:
    # 1. Discover and filter URLs
    urls = await seeder.urls(
        "example.com",
        SeedingConfig(
            source="sitemap",              # or "crawl"
            pattern="*tutorial*",          # URL pattern
            extract_head=True,             # Get metadata
            query="python web scraping",   # Search query
            scoring_method="bm25",         # Ranking method
            score_threshold=0.2,           # Quality filter
            max_urls=10                    # Max URLs
        )
    )

    # 2. Extract just the URLs
    urls_to_crawl = [u["url"] for u in urls[:5]]

    # 3. Crawl them efficiently
    async with AsyncWebCrawler() as crawler:
        results = await crawler.arun_many(urls_to_crawl)

        async for result in results:
            if result.success:
                print(f"Crawled: {result.url}")
                # Process content...'''

    syntax = Syntax(code, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, title="Implementation", border_style="green"))

    # Summary
    console.print("\n[bold green]✨ URL Seeder Benefits:[/bold green]")
    console.print("• Pre-discover URLs before crawling - save time!")
    console.print("• Filter by patterns, dates, or content relevance")
    console.print("• Rank URLs by BM25 or semantic similarity")
    console.print("• Extract metadata without full crawl")
    console.print("• Perfect for large-scale targeted crawling\n")


async def c4a_script_demo(auto_mode=False):
    """
    🎭 C4A Script Demo
    Shows the power of our domain-specific language for web automation
    """
    print_banner(
        "🎭 C4A SCRIPT - AUTOMATION MADE SIMPLE",
        "Domain-specific language for complex web interactions"
    )

    # Explain the feature
    console.print(Panel(
        "[bold]What is C4A Script?[/bold]\n\n"
        "C4A Script is a simple yet powerful language for web automation:\n\n"
        "• [cyan]English-like syntax[/cyan]: IF, CLICK, TYPE, WAIT - intuitive commands\n"
        "• [magenta]Smart transpiler[/magenta]: Converts to optimized JavaScript\n"
        "• [green]Error handling[/green]: Helpful error messages with suggestions\n"
        "• [yellow]Reusable procedures[/yellow]: Build complex workflows easily\n\n"
        "Perfect for automating logins, handling popups, pagination, and more!",
        title="Feature Overview",
        border_style="blue"
    ))

    await asyncio.sleep(2)

    # Demo 1: Basic transpilation demonstration
    console.print("\n[bold yellow]Demo 1: Understanding C4A Script Transpilation[/bold yellow]\n")

    simple_script = """# Handle cookie banner and scroll
WAIT `body` 2
IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
SCROLL DOWN 500
WAIT 1"""

    console.print("[cyan]C4A Script:[/cyan]")
    syntax = Syntax(simple_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="cyan"))

    # Compile it
    from crawl4ai import c4a_compile

    console.print("\n[cyan]Transpiling to JavaScript...[/cyan]")
    result = c4a_compile(simple_script)

    if result.success:
        console.print("[green]✅ Compilation successful![/green]\n")
        console.print("[cyan]Generated JavaScript:[/cyan]")

        js_display = "\n".join(result.js_code)
        js_syntax = Syntax(js_display, "javascript", theme="monokai", line_numbers=True)
        console.print(Panel(js_syntax, border_style="green"))

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 2...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 2: Error handling showcase
    console.print("\n[bold yellow]Demo 2: Smart Error Detection & Suggestions[/bold yellow]\n")

    # Script with intentional errors
    error_script = """WAIT body 2
CLICK button.submit
IF (EXISTS .modal) CLICK .close"""

    console.print("[cyan]C4A Script with errors:[/cyan]")
    syntax = Syntax(error_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="red"))

    console.print("\n[cyan]Compiling...[/cyan]")
    result = c4a_compile(error_script)

    if not result.success:
        console.print("[red]❌ Compilation failed (as expected)[/red]\n")

        # Show the first error
        error = result.first_error
        console.print(f"[bold red]Error at line {error.line}, column {error.column}:[/bold red]")
        console.print(f"[yellow]{error.message}[/yellow]")
        console.print(f"\nProblematic code: [red]{error.source_line}[/red]")
        console.print(" " * (16 + error.column) + "[red]^[/red]")

        if error.suggestions:
            console.print("\n[green]💡 Suggestions:[/green]")
            for suggestion in error.suggestions:
                console.print(f"   • {suggestion.message}")

    # Show the fixed version
    fixed_script = """WAIT `body` 2
CLICK `button.submit`
IF (EXISTS `.modal`) THEN CLICK `.close`"""

    console.print("\n[cyan]Fixed C4A Script:[/cyan]")
    syntax = Syntax(fixed_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="green"))

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 3...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 3: Real-world example - E-commerce automation
    console.print("\n[bold yellow]Demo 3: Real-World E-commerce Automation[/bold yellow]\n")

    console.print("[cyan]Scenario:[/cyan] Automate product search with smart handling\n")

    ecommerce_script = """# E-commerce Product Search Automation
# Define reusable procedures
PROC handle_popups
  # Close cookie banner if present
  IF (EXISTS `.cookie-notice`) THEN CLICK `.cookie-accept`

  # Close newsletter popup if it appears
  IF (EXISTS `#newsletter-modal`) THEN CLICK `.modal-close`
ENDPROC

PROC search_product
  # Click search box and type query
  CLICK `.search-input`
  TYPE "wireless headphones"
  PRESS Enter

  # Wait for results
  WAIT `.product-grid` 10
ENDPROC

# Main automation flow
SET max_products = 50

# Step 1: Navigate and handle popups
GO https://shop.example.com
WAIT `body` 3
handle_popups

# Step 2: Perform search
search_product

# Step 3: Load more products (infinite scroll)
REPEAT (SCROLL DOWN 1000, `document.querySelectorAll('.product-card').length < 50`)

# Step 4: Apply filters
IF (EXISTS `.filter-price`) THEN CLICK `input[data-filter="under-100"]`
WAIT 2

# Step 5: Extract product count
EVAL `console.log('Found ' + document.querySelectorAll('.product-card').length + ' products')`"""

    syntax = Syntax(ecommerce_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, title="E-commerce Automation Script", border_style="cyan"))

    # Compile and show results
    console.print("\n[cyan]Compiling automation script...[/cyan]")
    result = c4a_compile(ecommerce_script)

    if result.success:
        console.print(f"[green]✅ Successfully compiled to {len(result.js_code)} JavaScript statements![/green]")
        console.print("\n[bold]Script Analysis:[/bold]")
        console.print(f"• Procedures defined: {len(result.metadata.get('procedures', []))}")
        console.print(f"• Variables used: {len(result.metadata.get('variables', []))}")
        console.print(f"• Total commands: {result.metadata.get('total_commands', 0)}")

    if not auto_mode:
        console.print("\n[dim]Press Enter to continue to Demo 4...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 4: Integration with Crawl4AI - LIVE DEMO
    console.print("\n[bold yellow]Demo 4: Live Integration with Crawl4AI[/bold yellow]\n")

    console.print("[cyan]Let's see C4A Script in action with real web crawling![/cyan]\n")

    # Create a simple C4A script for demo
    live_script = """# Handle common website patterns
WAIT `body` 2
# Close cookie banner if exists
IF (EXISTS `.cookie-banner, .cookie-notice, #cookie-consent`) THEN CLICK `.accept, .agree, button[aria-label*="accept"]`
# Scroll to load content
SCROLL DOWN 500
WAIT 1"""

    console.print("[bold]Our C4A Script:[/bold]")
    syntax = Syntax(live_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="cyan"))

    # Method 1: Direct C4A Script usage
    console.print("\n[bold cyan]Method 1: Direct C4A Script Integration[/bold cyan]\n")

    try:
        # Import necessary components
        from crawl4ai.extraction_strategy import JsonCssExtractionStrategy

        # Define extraction schema
        schema = {
            "name": "page_content",
            "selector": "body",
            "fields": {
                "title": {"selector": "h1, title", "type": "text"},
                "paragraphs": {"selector": "p", "type": "list", "fields": {"text": {"type": "text"}}},
                "links": {"selector": "a[href]", "type": "list", "fields": {"text": {"type": "text"}, "href": {"type": "attribute", "attribute": "href"}}}
            }
        }

        # Create config with C4A script
        config = CrawlerRunConfig(
            c4a_script=live_script,
            extraction_strategy=JsonCssExtractionStrategy(schema),
            only_text=True,
            cache_mode=CacheMode.BYPASS
        )

        console.print("[green]✅ Config created with C4A script![/green]")
        console.print(f"[dim]The C4A script will be automatically transpiled when crawling[/dim]\n")

        # Show the actual code
        code_example1 = f'''# Live code that's actually running:
config = CrawlerRunConfig(
    c4a_script="""{live_script}""",
    extraction_strategy=JsonCssExtractionStrategy(schema),
    only_text=True,
    cache_mode=CacheMode.BYPASS
)

# This would run the crawler:
# async with AsyncWebCrawler() as crawler:
#     result = await crawler.arun("https://example.com", config=config)
#     print(f"Extracted {{len(result.extracted_content)}} items")'''

        syntax = Syntax(code_example1, "python", theme="monokai", line_numbers=True)
        console.print(Panel(syntax, title="Method 1: Direct Integration (Live Code)", border_style="green"))

    except Exception as e:
        console.print(f"[red]Error in demo: {e}[/red]")

    if not auto_mode:
        console.print("\n[dim]Press Enter to see Method 2...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Method 2: Pre-compilation approach
    console.print("\n[bold cyan]Method 2: Pre-compile and Reuse[/bold cyan]\n")

    # Advanced script with procedures
    advanced_script = """# E-commerce automation with procedures
PROC handle_popups
  IF (EXISTS `.popup-overlay`) THEN CLICK `.popup-close`
  IF (EXISTS `#newsletter-modal`) THEN CLICK `.modal-dismiss`
ENDPROC

PROC load_all_products
  # Keep scrolling until no more products load
  REPEAT (SCROLL DOWN 1000, `document.querySelectorAll('.product').length < window.lastProductCount`)
  EVAL `window.lastProductCount = document.querySelectorAll('.product').length`
ENDPROC

# Main flow
WAIT `.products-container` 5
handle_popups
EVAL `window.lastProductCount = 0`
load_all_products"""

    console.print("[bold]Advanced C4A Script with Procedures:[/bold]")
    syntax = Syntax(advanced_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="cyan"))

    # Actually compile it
    console.print("\n[cyan]Compiling the script...[/cyan]")
    compilation_result = c4a_compile(advanced_script)

    if compilation_result.success:
        console.print(f"[green]✅ Successfully compiled to {len(compilation_result.js_code)} JavaScript statements![/green]\n")

        # Show first few JS statements
        console.print("[bold]Generated JavaScript (first 5 statements):[/bold]")
        js_preview = "\n".join(compilation_result.js_code[:5])
        if len(compilation_result.js_code) > 5:
            js_preview += f"\n... and {len(compilation_result.js_code) - 5} more statements"

        js_syntax = Syntax(js_preview, "javascript", theme="monokai", line_numbers=True)
        console.print(Panel(js_syntax, border_style="green"))

        # Create actual config with compiled code
        config_with_js = CrawlerRunConfig(
            js_code=compilation_result.js_code,
            wait_for="css:.products-container",
            cache_mode=CacheMode.BYPASS
        )

        console.print("\n[green]✅ Config created with pre-compiled JavaScript![/green]")

        # Show the actual implementation
        code_example2 = f'''# Live code showing pre-compilation:
# Step 1: Compile once
result = c4a_compile(advanced_script)
if result.success:
    js_code = result.js_code  # {len(compilation_result.js_code)} statements generated

    # Step 2: Use compiled code multiple times
    config = CrawlerRunConfig(
        js_code=js_code,
        wait_for="css:.products-container",
        cache_mode=CacheMode.BYPASS
    )

    # Step 3: Run crawler with compiled code
    # async with AsyncWebCrawler() as crawler:
    #     # Can reuse js_code for multiple URLs
    #     for url in ["shop1.com", "shop2.com"]:
    #         result = await crawler.arun(url, config=config)
else:
    print(f"Compilation error: {{result.first_error.message}}")'''

        syntax = Syntax(code_example2, "python", theme="monokai", line_numbers=True)
        console.print(Panel(syntax, title="Method 2: Pre-compilation (Live Code)", border_style="green"))

    else:
        console.print(f"[red]Compilation failed: {compilation_result.first_error.message}[/red]")

    if not auto_mode:
        console.print("\n[dim]Press Enter to see a real-world example...[/dim]")
        input()
    else:
        await asyncio.sleep(1)

    # Demo 5: Real-world example with actual crawling
    console.print("\n[bold yellow]Demo 5: Real-World Example - News Site Automation[/bold yellow]\n")

    news_script = """# News site content extraction
# Wait for main content
WAIT `article, .article-content, main` 5

# Handle common annoyances
IF (EXISTS `.cookie-notice`) THEN CLICK `button[class*="accept"]`
IF (EXISTS `.newsletter-popup`) THEN CLICK `.close, .dismiss`

# Expand "Read More" sections
IF (EXISTS `.read-more-button`) THEN CLICK `.read-more-button`

# Load comments if available
IF (EXISTS `.load-comments`) THEN CLICK `.load-comments`
WAIT 2"""

    console.print("[bold]News Site Automation Script:[/bold]")
    syntax = Syntax(news_script, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, border_style="cyan"))

    # Create and show actual working config
    console.print("\n[cyan]Creating crawler configuration...[/cyan]")

    news_config = CrawlerRunConfig(
        c4a_script=news_script,
        wait_for="css:article",
        only_text=True,
        cache_mode=CacheMode.BYPASS
    )

    console.print("[green]✅ Configuration ready for crawling![/green]\n")

    # Show how to actually use it
    usage_example = '''# Complete working example:
async def crawl_news_site():
    """Crawl a news site with C4A automation"""

    async with AsyncWebCrawler(verbose=False) as crawler:
        result = await crawler.arun(
            url="https://example-news.com/article",
            config=CrawlerRunConfig(
                c4a_script=news_script,
                wait_for="css:article",
                only_text=True
            )
        )

        if result.success:
            print(f"✓ Crawled: {result.url}")
            print(f"✓ Content length: {len(result.markdown.raw_markdown)} chars")
            print(f"✓ Links found: {len(result.links.get('internal', []))} internal")

            # The C4A script ensured we:
            # - Handled cookie banners
            # - Expanded collapsed content
            # - Loaded dynamic comments
            # All automatically!

        return result

# Run it:
# result = await crawl_news_site()'''

    syntax = Syntax(usage_example, "python", theme="monokai", line_numbers=True)
    console.print(Panel(syntax, title="Complete Working Example", border_style="green"))

    # Summary
    console.print("\n[bold green]✨ What We Demonstrated:[/bold green]")
    console.print("• C4A Script transpiles to optimized JavaScript automatically")
    console.print("• Direct integration via `c4a_script` parameter - easiest approach")
    console.print("• Pre-compilation via `c4a_compile()` - best for reuse")
    console.print("• Real configs that you can copy and use immediately")
    console.print("• Actual code running, not just examples!\n")


async def interactive_menu():
    """Interactive menu to select demos"""
    from rich.prompt import Prompt

    demos = {
        "1": ("Link Preview & Scoring", link_preview_demo),
        "2": ("Adaptive Crawling", adaptive_crawling_demo),
        "3": ("Virtual Scroll", virtual_scroll_demo),
        "4": ("URL Seeder", url_seeder_demo),
        "5": ("C4A Script", c4a_script_demo),
        "6": ("LLM Context Builder", lambda auto: console.print("[yellow]LLM Context demo coming soon![/yellow]")),
        "7": ("Run All Demos", None),  # Special case
        "0": ("Exit", None)
    }

    while True:
        # Clear screen for better presentation
        console.clear()

        print_banner(
            "🚀 CRAWL4AI v0.7.0 SHOWCASE",
            "Interactive Demo Menu"
        )

        console.print("\n[bold cyan]Select a demo to run:[/bold cyan]\n")

        for key, (name, _) in demos.items():
            if key == "0":
                console.print(f"\n[dim]{key}. {name}[/dim]")
            else:
                console.print(f"{key}. {name}")

        choice = Prompt.ask("\n[bold]Enter your choice[/bold]", choices=list(demos.keys()))

        if choice == "0":
            console.print("\n[yellow]Thanks for exploring Crawl4AI v0.7.0![/yellow]")
            break
        elif choice == "7":
            # Run all demos
            console.clear()
            for key in ["1", "3", "4", "5"]:  # Link Preview, Virtual Scroll, URL Seeder, C4A Script
                name, demo_func = demos[key]
                if demo_func:
                    await demo_func(auto_mode=True)
                    console.print("\n[dim]Press Enter to continue...[/dim]")
                    input()
        else:
            name, demo_func = demos[choice]
            if demo_func:
                console.clear()
                await demo_func(auto_mode=False)
                console.print("\n[dim]Press Enter to return to menu...[/dim]")
                input()


async def main():
    """Run all feature demonstrations"""
    import sys

    # Check command line arguments
    interactive_mode = "--interactive" in sys.argv or "-i" in sys.argv
    auto_mode = "--auto" in sys.argv

    if interactive_mode:
        await interactive_menu()
    elif auto_mode:
        console.print("[yellow]Running in AUTO MODE - skipping user prompts[/yellow]\n")

        # Run demos automatically
        await link_preview_demo(auto_mode=True)
        await asyncio.sleep(2)
        # await adaptive_crawling_demo(auto_mode=True)  # Skip for now
        await virtual_scroll_demo(auto_mode=True)
        await asyncio.sleep(2)
        await url_seeder_demo(auto_mode=True)
        await asyncio.sleep(2)
        await c4a_script_demo(auto_mode=True)
    else:
        # Default: run all demos with prompts
        try:
            # 1. Link Preview Demo
            await link_preview_demo(auto_mode=False)

            console.print("\n[dim]Press Enter to continue to Virtual Scroll demo...[/dim]")
            input()

            # 2. Virtual Scroll Demo
            await virtual_scroll_demo(auto_mode=False)

            console.print("\n[dim]Press Enter to continue to URL Seeder demo...[/dim]")
            input()

            # 3. URL Seeder Demo
            await url_seeder_demo(auto_mode=False)

            console.print("\n[dim]Press Enter to continue to C4A Script demo...[/dim]")
            input()

            # 4. C4A Script Demo
            await c4a_script_demo(auto_mode=False)

            # TODO: Add other demos here
            # await llm_context_demo()

            console.print("\n[bold green]✨ All demos completed![/bold green]")
            console.print("\nTo explore individual demos, run: [cyan]python crawl4ai_v0_7_0_showcase.py --interactive[/cyan]")

        except KeyboardInterrupt:
            console.print("\n[yellow]Demo interrupted by user[/yellow]")
        except Exception as e:
            console.print(f"\n[red]Error: {str(e)}[/red]")
            import traceback
            traceback.print_exc()


if __name__ == "__main__":
    import sys

    # Show usage if --help is provided
    if "--help" in sys.argv or "-h" in sys.argv:
        console.print("\n[bold]Crawl4AI v0.7.0 Feature Showcase[/bold]\n")
        console.print("Usage: python crawl4ai_v0_7_0_showcase.py [options]\n")
        console.print("Options:")
        console.print("  --interactive, -i    Interactive menu to select demos")
        console.print("  --auto              Run all demos without user prompts")
        console.print("  --help, -h          Show this help message\n")
        console.print("Default: Run all demos with prompts between each\n")
    else:
        asyncio.run(main())