crawl4ai/docs/examples/c4a_script/demo_c4a_crawl4ai.py

"""
Demonstration of C4A-Script integration with Crawl4AI
Shows various use cases and features
"""

import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai import c4a_compile, CompilationResult

async def example_basic_usage():
    """Basic C4A-Script usage with Crawl4AI"""
    print("\n" + "="*60)
    print("Example 1: Basic C4A-Script Usage")
    print("="*60)

    # Define your automation script
    c4a_script = """
    # Wait for page to load
    WAIT `body` 2

    # Handle cookie banner if present
    IF (EXISTS `.cookie-banner`) THEN CLICK `.accept-btn`

    # Scroll down to load more content
    SCROLL DOWN 500
    WAIT 1

    # Click load more button if exists
    IF (EXISTS `.load-more`) THEN CLICK `.load-more`
    """

    # Create crawler config with C4A script
    config = CrawlerRunConfig(
        url="https://example.com",
        c4a_script=c4a_script,
        wait_for="css:.content",
        verbose=False
    )

    print("✅ C4A Script compiled successfully!")
    print(f"Generated {len(config.js_code)} JavaScript commands")

    # In production, you would run:
    # async with AsyncWebCrawler() as crawler:
    #     result = await crawler.arun(config=config)


async def example_form_filling():
    """Form filling with C4A-Script"""
    print("\n" + "="*60)
    print("Example 2: Form Filling with C4A-Script")
    print("="*60)

    # Form automation script
    form_script = """
    # Set form values
    SET email = "test@example.com"
    SET message = "This is a test message"

    # Fill the form
    CLICK `#email-input`
    TYPE $email

    CLICK `#message-textarea`
    TYPE $message

    # Submit the form
    CLICK `button[type="submit"]`

    # Wait for success message
    WAIT `.success-message` 10
    """

    config = CrawlerRunConfig(
        url="https://example.com/contact",
        c4a_script=form_script
    )

    print("✅ Form filling script ready")
    print("Script will:")
    print("  - Fill email field")
    print("  - Fill message textarea")
    print("  - Submit form")
    print("  - Wait for confirmation")


async def example_dynamic_loading():
    """Handle dynamic content loading"""
    print("\n" + "="*60)
    print("Example 3: Dynamic Content Loading")
    print("="*60)

    # Script for infinite scroll or pagination
    pagination_script = """
    # Initial wait
    WAIT `.product-list` 5

    # Load all products by clicking "Load More" repeatedly
    REPEAT (CLICK `.load-more`, `document.querySelector('.load-more') !== null`)

    # Alternative: Scroll to load (infinite scroll)
    # REPEAT (SCROLL DOWN 1000, `document.querySelectorAll('.product').length < 100`)

    # Extract count
    EVAL `console.log('Products loaded: ' + document.querySelectorAll('.product').length)`
    """

    config = CrawlerRunConfig(
        url="https://example.com/products",
        c4a_script=pagination_script,
        screenshot=True  # Capture final state
    )

    print("✅ Dynamic loading script ready")
    print("Script will load all products by repeatedly clicking 'Load More'")


async def example_multi_step_workflow():
    """Complex multi-step workflow with procedures"""
    print("\n" + "="*60)
    print("Example 4: Multi-Step Workflow with Procedures")
    print("="*60)

    # Complex workflow with reusable procedures
    workflow_script = """
    # Define login procedure
    PROC login
      CLICK `#username`
      TYPE "demo_user"
      CLICK `#password`
      TYPE "demo_pass"
      CLICK `#login-btn`
      WAIT `.dashboard` 10
    ENDPROC

    # Define search procedure
    PROC search_product
      CLICK `.search-box`
      TYPE "laptop"
      PRESS Enter
      WAIT `.search-results` 5
    ENDPROC

    # Main workflow
    GO https://example.com
    login
    search_product

    # Process results
    IF (EXISTS `.no-results`) THEN EVAL `console.log('No products found')`
    ELSE REPEAT (CLICK `.add-to-cart`, 3)
    """

    # Compile to check for errors
    result = c4a_compile(workflow_script)

    if result.success:
        print("✅ Complex workflow compiled successfully!")
        print("Workflow includes:")
        print("  - Login procedure")
        print("  - Product search")
        print("  - Conditional cart additions")

        config = CrawlerRunConfig(
            url="https://example.com",
            c4a_script=workflow_script
        )
    else:
        print("❌ Compilation error:")
        error = result.first_error
        print(f"  Line {error.line}: {error.message}")


async def example_error_handling():
    """Demonstrate error handling"""
    print("\n" + "="*60)
    print("Example 5: Error Handling")
    print("="*60)

    # Script with intentional error
    bad_script = """
    WAIT body 2
    CLICK button
    IF (EXISTS .modal) CLICK .close
    """

    try:
        config = CrawlerRunConfig(
            url="https://example.com",
            c4a_script=bad_script
        )
    except ValueError as e:
        print("✅ Error caught as expected:")
        print(f"  {e}")

    # Fixed version
    good_script = """
    WAIT `body` 2
    CLICK `button`
    IF (EXISTS `.modal`) THEN CLICK `.close`
    """

    config = CrawlerRunConfig(
        url="https://example.com",
        c4a_script=good_script
    )

    print("\n✅ Fixed script compiled successfully!")


async def example_combining_with_extraction():
    """Combine C4A-Script with extraction strategies"""
    print("\n" + "="*60)
    print("Example 6: C4A-Script + Extraction Strategies")
    print("="*60)

    from crawl4ai import JsonCssExtractionStrategy

    # Script to prepare page for extraction
    prep_script = """
    # Expand all collapsed sections
    REPEAT (CLICK `.expand-btn`, `document.querySelectorAll('.expand-btn:not(.expanded)').length > 0`)

    # Load all comments
    IF (EXISTS `.load-comments`) THEN CLICK `.load-comments`
    WAIT `.comments-section` 5

    # Close any popups
    IF (EXISTS `.popup-close`) THEN CLICK `.popup-close`
    """

    # Define extraction schema
    schema = {
        "name": "article",
        "selector": "article.main",
        "fields": {
            "title": {"selector": "h1", "type": "text"},
            "content": {"selector": ".content", "type": "text"},
            "comments": {
                "selector": ".comment",
                "type": "list",
                "fields": {
                    "author": {"selector": ".author", "type": "text"},
                    "text": {"selector": ".text", "type": "text"}
                }
            }
        }
    }

    config = CrawlerRunConfig(
        url="https://example.com/article",
        c4a_script=prep_script,
        extraction_strategy=JsonCssExtractionStrategy(schema),
        wait_for="css:.comments-section"
    )

    print("✅ Combined C4A + Extraction ready")
    print("Workflow:")
    print("  1. Expand collapsed sections")
    print("  2. Load comments")
    print("  3. Extract structured data")


async def main():
    """Run all examples"""
    print("\n🚀 C4A-Script + Crawl4AI Integration Demo\n")

    # Run all examples
    await example_basic_usage()
    await example_form_filling()
    await example_dynamic_loading()
    await example_multi_step_workflow()
    await example_error_handling()
    await example_combining_with_extraction()

    print("\n" + "="*60)
    print("✅ All examples completed successfully!")
    print("="*60)

    print("\nTo run actual crawls, uncomment the AsyncWebCrawler sections")
    print("or create your own scripts using these examples as templates.")


if __name__ == "__main__":
    asyncio.run(main())