Merge branch 'next'

2024-11-29 20:54:28 +08:00
parent b0419edda6 1def53b7fe
commit 569bdb6073
13 changed files with 430 additions and 194 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,64 @@
 # Changelog

+## [0.3.746] November 29, 2024
+
+### Major Features
+1. Enhanced Docker Support (Nov 29, 2024)
+   - Improved GPU support in Docker images.
+   - Dockerfile refactored for better platform-specific installations.
+   - Introduced new Docker commands for different platforms:
+     - `basic-amd64`, `all-amd64`, `gpu-amd64` for AMD64.
+     - `basic-arm64`, `all-arm64`, `gpu-arm64` for ARM64.
+
+### Infrastructure & Documentation
+- Enhanced README.md to improve user guidance and installation instructions.
+- Added installation instructions for Playwright setup in README.
+- Created and updated examples in `docs/examples/quickstart_async.py` to be more useful and user-friendly.
+- Updated `requirements.txt` with a new `pydantic` dependency.
+- Bumped version number in `crawl4ai/__version__.py` to 0.3.746.
+
+### Breaking Changes
+- Streamlined application structure:
+  - Removed static pages and related code from `main.py` which might affect existing deployments relying on static content.
+
+### Development Updates
+- Developed `post_install` method in `crawl4ai/install.py` to streamline post-installation setup tasks.
+- Refined migration processes in `crawl4ai/migrations.py` with enhanced logging for better error visibility.
+- Updated `docker-compose.yml` to support local and hub services for different architectures, enhancing build and deploy capabilities.
+- Refactored example test cases in `docs/examples/docker_example.py` to facilitate comprehensive testing.
+
+### README.md
+Updated README with new docker commands and setup instructions.
+Enhanced installation instructions and guidance.
+
+### crawl4ai/install.py
+Added post-install script functionality.
+Introduced `post_install` method for automation of post-installation tasks.
+
+### crawl4ai/migrations.py
+Improved migration logging.
+Refined migration processes and added better logging.
+
+### docker-compose.yml
+Refactored docker-compose for better service management.
+Updated to define services for different platforms and versions.
+
+### requirements.txt
+Updated dependencies.
+Added `pydantic` to requirements file.
+
+### crawler/__version__.py
+Updated version number.
+Bumped version number to 0.3.746.
+
+### docs/examples/quickstart_async.py
+Enhanced example scripts.
+Uncommented example usage in async guide for user functionality.
+
+### main.py
+Refactored code to improve maintainability.
+Streamlined app structure by removing static pages code.
+
 ## [0.3.743] November 27, 2024

 Enhance features and documentation
--- a/19
+++ b/19
@@ -1,6 +1,9 @@
 # syntax=docker/dockerfile:1.4

-# Build arguments
+ARG TARGETPLATFORM
+ARG BUILDPLATFORM
+
+# Other build arguments
 ARG PYTHON_VERSION=3.10

 # Base stage with system dependencies
@@ -63,13 +66,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

 # GPU support if enabled and architecture is supported
-RUN if [ "$ENABLE_GPU" = "true" ] && [ "$(dpkg --print-architecture)" != "arm64" ] ; then \
+RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
    apt-get update && apt-get install -y --no-install-recommends \
    nvidia-cuda-toolkit \
    && rm -rf /var/lib/apt/lists/* ; \
-    else \
-        echo "Skipping NVIDIA CUDA Toolkit installation (unsupported architecture or GPU disabled)"; \
-    fi
+else \
+    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
+fi

 # Create and set working directory
 WORKDIR /app
@@ -120,7 +123,11 @@ RUN pip install --no-cache-dir \
 RUN mkdocs build

 # Install Playwright and browsers
-RUN playwright install
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    playwright install chromium; \
+    elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    playwright install chromium; \
+    fi

 # Expose port
 EXPOSE 8000 11235 9222 8080
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ Crawl4AI is the #1 trending GitHub repository, actively maintained by a vibrant
 1. Install Crawl4AI:
 ```bash
 pip install crawl4ai
+crawl4ai-setup # Setup the browser
 ```

 2. Run a simple web crawl:
@@ -140,11 +141,12 @@ For basic web crawling and scraping tasks:

 ```bash
 pip install crawl4ai
+crawl4ai-setup # Setup the browser
 ```

 By default, this will install the asynchronous version of Crawl4AI, using Playwright for web crawling.

-👉 **Note**: When you install Crawl4AI, the setup script should automatically install and set up Playwright. However, if you encounter any Playwright-related errors, you can manually install it using one of these methods:
+👉 **Note**: When you install Crawl4AI, the `crawl4ai-setup` should automatically install and set up Playwright. However, if you encounter any Playwright-related errors, you can manually install it using one of these methods:

 1. Through the command line:

@@ -218,48 +220,173 @@ Crawl4AI is available as Docker images for easy deployment. You can either pull

 ---

-### Option 1: Docker Hub (Recommended)
+<details>
+<summary>🐳 <strong>Option 1: Docker Hub (Recommended)</strong></summary>

+Choose the appropriate image based on your platform and needs:
+
+### For AMD64 (Regular Linux/Windows):
 ```bash
-# Pull and run from Docker Hub (choose one):
-docker pull unclecode/crawl4ai:basic    # Basic crawling features
-docker pull unclecode/crawl4ai:all      # Full installation (ML, LLM support)
-docker pull unclecode/crawl4ai:gpu      # GPU-enabled version
+# Basic version (recommended)
+docker pull unclecode/crawl4ai:basic-amd64
+docker run -p 11235:11235 unclecode/crawl4ai:basic-amd64

-# Run the container
-docker run -p 11235:11235 unclecode/crawl4ai:basic  # Replace 'basic' with your chosen version
+# Full ML/LLM support
+docker pull unclecode/crawl4ai:all-amd64
+docker run -p 11235:11235 unclecode/crawl4ai:all-amd64

-# In case you want to set platform to arm64
-docker run --platform linux/arm64 -p 11235:11235 unclecode/crawl4ai:basic
-
-# In case to allocate more shared memory for the container
-docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic
+# With GPU support
+docker pull unclecode/crawl4ai:gpu-amd64
+docker run -p 11235:11235 unclecode/crawl4ai:gpu-amd64
 ```

---
+### For ARM64 (M1/M2 Macs, ARM servers):
+```bash
+# Basic version (recommended)
+docker pull unclecode/crawl4ai:basic-arm64
+docker run -p 11235:11235 unclecode/crawl4ai:basic-arm64

-### Option 2: Build from Repository
+# Full ML/LLM support
+docker pull unclecode/crawl4ai:all-arm64
+docker run -p 11235:11235 unclecode/crawl4ai:all-arm64
+
+# With GPU support
+docker pull unclecode/crawl4ai:gpu-arm64
+docker run -p 11235:11235 unclecode/crawl4ai:gpu-arm64
+```
+
+Need more memory? Add `--shm-size`:
+```bash
+docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic-amd64
+```
+
+Test the installation:
+```bash
+curl http://localhost:11235/health
+```
+
+### For Raspberry Pi (32-bit) (coming soon):
+```bash
+# Pull and run basic version (recommended for Raspberry Pi)
+docker pull unclecode/crawl4ai:basic-armv7
+docker run -p 11235:11235 unclecode/crawl4ai:basic-armv7
+
+# With increased shared memory if needed
+docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic-armv7
+```
+
+Note: Due to hardware constraints, only the basic version is recommended for Raspberry Pi.
+
+</details>
+
+<details>
+<summary>🐳 <strong>Option 2: Build from Repository</strong></summary>
+
+Build the image locally based on your platform:

 ```bash
 # Clone the repository
 git clone https://github.com/unclecode/crawl4ai.git
 cd crawl4ai

-# Build the image
-docker build -t crawl4ai:local \
-  --build-arg INSTALL_TYPE=basic \  # Options: basic, all
+# For AMD64 (Regular Linux/Windows)
+docker build --platform linux/amd64 \
+  --tag crawl4ai:local \
+  --build-arg INSTALL_TYPE=basic \
  .

-# In case you want to set platform to arm64
-docker build -t crawl4ai:local \
-  --build-arg INSTALL_TYPE=basic \  # Options: basic, all
-  --platform linux/arm64 \
+# For ARM64 (M1/M2 Macs, ARM servers)
+docker build --platform linux/arm64 \
+  --tag crawl4ai:local \
+  --build-arg INSTALL_TYPE=basic \
  .
-
-# Run your local build
-docker run -p 11235:11235 crawl4ai:local
 ```

+Build options:
+- INSTALL_TYPE=basic (default): Basic crawling features
+- INSTALL_TYPE=all: Full ML/LLM support
+- ENABLE_GPU=true: Add GPU support
+
+Example with all options:
+```bash
+docker build --platform linux/amd64 \
+  --tag crawl4ai:local \
+  --build-arg INSTALL_TYPE=all \
+  --build-arg ENABLE_GPU=true \
+  .
+```
+
+Run your local build:
+```bash
+# Regular run
+docker run -p 11235:11235 crawl4ai:local
+
+# With increased shared memory
+docker run --shm-size=2gb -p 11235:11235 crawl4ai:local
+```
+
+Test the installation:
+```bash
+curl http://localhost:11235/health
+```
+
+</details>
+
+<details>
+<summary>🐳 <strong>Option 3: Using Docker Compose</strong></summary>
+
+Docker Compose provides a more structured way to run Crawl4AI, especially when dealing with environment variables and multiple configurations.
+
+```bash
+# Clone the repository
+git clone https://github.com/unclecode/crawl4ai.git
+cd crawl4ai
+```
+
+### For AMD64 (Regular Linux/Windows):
+```bash
+# Build and run locally
+docker-compose --profile local-amd64 up
+
+# Run from Docker Hub
+VERSION=basic docker-compose --profile hub-amd64 up   # Basic version
+VERSION=all docker-compose --profile hub-amd64 up     # Full ML/LLM support
+VERSION=gpu docker-compose --profile hub-amd64 up     # GPU support
+```
+
+### For ARM64 (M1/M2 Macs, ARM servers):
+```bash
+# Build and run locally
+docker-compose --profile local-arm64 up
+
+# Run from Docker Hub
+VERSION=basic docker-compose --profile hub-arm64 up   # Basic version
+VERSION=all docker-compose --profile hub-arm64 up     # Full ML/LLM support
+VERSION=gpu docker-compose --profile hub-arm64 up     # GPU support
+```
+
+Environment variables (optional):
+```bash
+# Create a .env file
+CRAWL4AI_API_TOKEN=your_token
+OPENAI_API_KEY=your_openai_key
+CLAUDE_API_KEY=your_claude_key
+```
+
+The compose file includes:
+- Memory management (4GB limit, 1GB reserved)
+- Shared memory volume for browser support
+- Health checks
+- Auto-restart policy
+- All necessary port mappings
+
+Test the installation:
+```bash
+curl http://localhost:11235/health
+```
+
+</details>
+
 ---

 ### Quick Test
@@ -276,11 +403,11 @@ response = requests.post(
 )
 task_id = response.json()["task_id"]

-# Get results
+# Continue polling until the task is complete (status="completed")
 result = requests.get(f"http://localhost:11235/task/{task_id}")
 ```

-For advanced configuration, environment variables, and usage examples, see our [Docker Deployment Guide](https://crawl4ai.com/mkdocs/basic/docker-deployment/).
+For more examples, see our [Docker Examples](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/docker_example.py). For advanced configuration, environment variables, and usage examples, see our [Docker Deployment Guide](https://crawl4ai.com/mkdocs/basic/docker-deployment/).

 </details>

--- a/crawl4ai/init.py
+++ b/crawl4ai/init.py
@@ -4,7 +4,6 @@ from .async_webcrawler import AsyncWebCrawler, CacheMode

 from .models import CrawlResult
 from .__version__ import __version__
-# __version__ = "0.3.73"

 __all__ = [
    "AsyncWebCrawler",
--- a/crawl4ai/version.py
+++ b/crawl4ai/version.py
@@ -1,2 +1,2 @@
 # crawl4ai/_version.py
-__version__ = "0.3.745"
+__version__ = "0.3.746"
--- a/crawl4ai/install.py
+++ b/crawl4ai/install.py
@@ -0,0 +1,44 @@
+import subprocess
+import sys
+import asyncio
+from .async_logger import AsyncLogger, LogLevel
+
+# Initialize logger
+logger = AsyncLogger(log_level=LogLevel.DEBUG, verbose=True)
+
+def post_install():
+    """Run all post-installation tasks"""
+    logger.info("Running post-installation setup...", tag="INIT")
+    install_playwright()
+    run_migration()
+    logger.success("Post-installation setup completed!", tag="COMPLETE")
+    
+def install_playwright():
+    logger.info("Installing Playwright browsers...", tag="INIT")
+    try:
+        subprocess.check_call([sys.executable, "-m", "playwright", "install"])
+        logger.success("Playwright installation completed successfully.", tag="COMPLETE")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Error during Playwright installation: {e}", tag="ERROR")
+        logger.warning(
+            "Please run 'python -m playwright install' manually after the installation."
+        )
+    except Exception as e:
+        logger.error(f"Unexpected error during Playwright installation: {e}", tag="ERROR")
+        logger.warning(
+            "Please run 'python -m playwright install' manually after the installation."
+        )
+
+def run_migration():
+    """Initialize database during installation"""
+    try:
+        logger.info("Starting database initialization...", tag="INIT")
+        from crawl4ai.async_database import async_db_manager
+
+        asyncio.run(async_db_manager.initialize())
+        logger.success("Database initialization completed successfully.", tag="COMPLETE")
+    except ImportError:
+        logger.warning("Database module not found. Will initialize on first use.")
+    except Exception as e:
+        logger.warning(f"Database initialization failed: {e}")
+        logger.warning("Database will be initialized on first use")
--- a/crawl4ai/migrations.py
+++ b/crawl4ai/migrations.py
@@ -9,9 +9,13 @@ import aiofiles
 import shutil
 import time
 from datetime import datetime
+from .async_logger import AsyncLogger, LogLevel

-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
+# Initialize logger
+logger = AsyncLogger(log_level=LogLevel.DEBUG, verbose=True)
+
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)

 class DatabaseMigration:
    def __init__(self, db_path: str):
@@ -55,7 +59,8 @@ class DatabaseMigration:

    async def migrate_database(self):
        """Migrate existing database to file-based storage"""
-        logger.info("Starting database migration...")
+        # logger.info("Starting database migration...")
+        logger.info("Starting database migration...", tag="INIT")
        
        try:
            async with aiosqlite.connect(self.db_path) as db:
@@ -91,19 +96,25 @@ class DatabaseMigration:
                    
                    migrated_count += 1
                    if migrated_count % 100 == 0:
-                        logger.info(f"Migrated {migrated_count} records...")
+                        logger.info(f"Migrated {migrated_count} records...", tag="INIT")
+                        

                await db.commit()
-                logger.info(f"Migration completed. {migrated_count} records processed.")
+                logger.success(f"Migration completed. {migrated_count} records processed.", tag="COMPLETE")

        except Exception as e:
-            logger.error(f"Migration failed: {e}")
-            raise
+            # logger.error(f"Migration failed: {e}")
+            logger.error(
+                message="Migration failed: {error}",
+                tag="ERROR",
+                params={"error": str(e)}
+            )
+            raise e

 async def backup_database(db_path: str) -> str:
    """Create backup of existing database"""
    if not os.path.exists(db_path):
-        logger.info("No existing database found. Skipping backup.")
+        logger.info("No existing database found. Skipping backup.", tag="INIT")
        return None
        
    # Create backup with timestamp
@@ -116,11 +127,16 @@ async def backup_database(db_path: str) -> str:
        
        # Create backup
        shutil.copy2(db_path, backup_path)
-        logger.info(f"Database backup created at: {backup_path}")
+        logger.info(f"Database backup created at: {backup_path}", tag="COMPLETE")
        return backup_path
    except Exception as e:
-        logger.error(f"Backup failed: {e}")
-        raise
+        # logger.error(f"Backup failed: {e}")
+        logger.error(
+                message="Migration failed: {error}",
+                tag="ERROR",
+                params={"error": str(e)}
+            )
+        raise e
    
 async def run_migration(db_path: Optional[str] = None):
    """Run database migration"""
@@ -128,7 +144,7 @@ async def run_migration(db_path: Optional[str] = None):
        db_path = os.path.join(Path.home(), ".crawl4ai", "crawl4ai.db")
    
    if not os.path.exists(db_path):
-        logger.info("No existing database found. Skipping migration.")
+        logger.info("No existing database found. Skipping migration.", tag="INIT")
        return
        
    # Create backup first
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,6 @@
 services:
-  crawl4ai:
+  # Local build services for different platforms
+  crawl4ai-amd64:
    build:
      context: .
      dockerfile: Dockerfile
@@ -7,35 +8,39 @@ services:
        PYTHON_VERSION: "3.10"
        INSTALL_TYPE: ${INSTALL_TYPE:-basic}
        ENABLE_GPU: false
-    profiles: ["local"]
-    ports:
-      - "11235:11235"
-      - "8000:8000"
-      - "9222:9222"
-      - "8080:8080"
-    environment:
-      - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-}
-      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
-      - CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
-    volumes:
-      - /dev/shm:/dev/shm
-    deploy:
-      resources:
-        limits:
-          memory: 4G
-        reservations:
-          memory: 1G
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11235/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 40s
+      platforms:
+        - linux/amd64
+    profiles: ["local-amd64"]
+    extends: &base-config
+      file: docker-compose.yml
+      service: base-config

-  crawl4ai-hub:
-    image: unclecode/crawl4ai:basic
-    profiles: ["hub"]
+  crawl4ai-arm64:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        PYTHON_VERSION: "3.10"
+        INSTALL_TYPE: ${INSTALL_TYPE:-basic}
+        ENABLE_GPU: false
+      platforms:
+        - linux/arm64
+    profiles: ["local-arm64"]
+    extends: *base-config
+
+  # Hub services for different platforms and versions
+  crawl4ai-hub-amd64:
+    image: unclecode/crawl4ai:${VERSION:-basic}-amd64
+    profiles: ["hub-amd64"]
+    extends: *base-config
+
+  crawl4ai-hub-arm64:
+    image: unclecode/crawl4ai:${VERSION:-basic}-arm64
+    profiles: ["hub-arm64"]
+    extends: *base-config
+
+  # Base configuration to be extended
+  base-config:
    ports:
      - "11235:11235"
      - "8000:8000"
--- a/docs/examples/docker_example.py
+++ b/docs/examples/docker_example.py
@@ -78,20 +78,20 @@ def test_docker_deployment(version="basic"):
            time.sleep(5)
    
    # Test cases based on version
-    # test_basic_crawl(tester)
-    # test_basic_crawl(tester)
-    # test_basic_crawl_sync(tester)
    test_basic_crawl_direct(tester)
+    test_basic_crawl(tester)
+    test_basic_crawl(tester)
+    test_basic_crawl_sync(tester)
    
-    # if version in ["full", "transformer"]:
-    #     test_cosine_extraction(tester)
+    if version in ["full", "transformer"]:
+        test_cosine_extraction(tester)

-    # test_js_execution(tester)
-    # test_css_selector(tester)
-    # test_structured_extraction(tester)
-    # test_llm_extraction(tester)
-    # test_llm_with_ollama(tester)
-    # test_screenshot(tester)
+    test_js_execution(tester)
+    test_css_selector(tester)
+    test_structured_extraction(tester)
+    test_llm_extraction(tester)
+    test_llm_with_ollama(tester)
+    test_screenshot(tester)
    

 def test_basic_crawl(tester: Crawl4AiTester):
--- a/docs/examples/quickstart_async.py
+++ b/docs/examples/quickstart_async.py
@@ -32,7 +32,7 @@ print("Website: https://crawl4ai.com")
 async def simple_crawl():
    print("\n--- Basic Usage ---")
    async with AsyncWebCrawler(verbose=True) as crawler:
-        result = await crawler.arun(url="https://www.nbcnews.com/business")
+        result = await crawler.arun(url="https://www.nbcnews.com/business", cache_mode= CacheMode.BYPASS)
        print(result.markdown[:500])  # Print first 500 characters

 async def simple_example_with_running_js_code():
@@ -76,8 +76,9 @@ async def use_proxy():
    async with AsyncWebCrawler(verbose=True, proxy="http://your-proxy-url:port") as crawler:
        result = await crawler.arun(
            url="https://www.nbcnews.com/business",
-            bypass_cache=True
+            cache_mode= CacheMode.BYPASS
        )
+        if result.success:
            print(result.markdown[:500])  # Print first 500 characters

 async def capture_and_save_screenshot(url: str, output_path: str):
@@ -85,7 +86,7 @@ async def capture_and_save_screenshot(url: str, output_path: str):
        result = await crawler.arun(
            url=url,
            screenshot=True,
-            bypass_cache=True
+            cache_mode= CacheMode.BYPASS
        )
        
        if result.success and result.screenshot:
@@ -141,41 +142,68 @@ async def extract_structured_data_using_llm(provider: str, api_token: str = None
 async def extract_structured_data_using_css_extractor():
    print("\n--- Using JsonCssExtractionStrategy for Fast Structured Output ---")
    schema = {
-        "name": "Coinbase Crypto Prices",
-        "baseSelector": ".cds-tableRow-t45thuk",
+    "name": "KidoCode Courses",
+    "baseSelector": "section.charge-methodology .w-tab-content > div",
    "fields": [
        {
-                "name": "crypto",
-                "selector": "td:nth-child(1) h2",
+            "name": "section_title",
+            "selector": "h3.heading-50",
            "type": "text",
        },
        {
-                "name": "symbol",
-                "selector": "td:nth-child(1) p",
+            "name": "section_description",
+            "selector": ".charge-content",
            "type": "text",
        },
        {
-                "name": "price",
-                "selector": "td:nth-child(2)",
+            "name": "course_name",
+            "selector": ".text-block-93",
            "type": "text",
+        },
+        {
+            "name": "course_description",
+            "selector": ".course-content-text",
+            "type": "text",
+        },
+        {
+            "name": "course_icon",
+            "selector": ".image-92",
+            "type": "attribute",
+            "attribute": "src"
        }
-        ],
+    ]
+}
+
+    async with AsyncWebCrawler(
+        headless=True,
+        verbose=True
+    ) as crawler:
+        
+        # Create the JavaScript that handles clicking multiple times
+        js_click_tabs = """
+        (async () => {
+            const tabs = document.querySelectorAll("section.charge-methodology .tabs-menu-3 > div");
+            
+            for(let tab of tabs) {
+                // scroll to the tab
+                tab.scrollIntoView();
+                tab.click();
+                // Wait for content to load and animations to complete
+                await new Promise(r => setTimeout(r, 500));
            }
+        })();
+        """     

-    extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True)
-
-    async with AsyncWebCrawler(verbose=True) as crawler:
        result = await crawler.arun(
-            url="https://www.coinbase.com/explore",
-            extraction_strategy=extraction_strategy,
-            cache_mode=CacheMode.BYPASS,
+            url="https://www.kidocode.com/degrees/technology",
+            extraction_strategy=JsonCssExtractionStrategy(schema, verbose=True),
+            js_code=[js_click_tabs],
+            cache_mode=CacheMode.BYPASS
        )

-        assert result.success, "Failed to crawl the page"
-
-        news_teasers = json.loads(result.extracted_content)
-        print(f"Successfully extracted {len(news_teasers)} news teasers")
-        print(json.dumps(news_teasers[0], indent=2))
+        companies = json.loads(result.extracted_content)
+        print(f"Successfully extracted {len(companies)} companies")
+        print(json.dumps(companies[0], indent=2))

 # Advanced Session-Based Crawling with Dynamic Content 🔄
 async def crawl_dynamic_content_pages_method_1():
@@ -363,21 +391,21 @@ async def crawl_custom_browser_type():
    # Use Firefox
    start = time.time()
    async with AsyncWebCrawler(browser_type="firefox", verbose=True, headless = True) as crawler:
-        result = await crawler.arun(url="https://www.example.com", bypass_cache=True)
+        result = await crawler.arun(url="https://www.example.com", cache_mode= CacheMode.BYPASS)
        print(result.markdown[:500])
        print("Time taken: ", time.time() - start)

    # Use WebKit
    start = time.time()
    async with AsyncWebCrawler(browser_type="webkit", verbose=True, headless = True) as crawler:
-        result = await crawler.arun(url="https://www.example.com", bypass_cache=True)
+        result = await crawler.arun(url="https://www.example.com", cache_mode= CacheMode.BYPASS)
        print(result.markdown[:500])
        print("Time taken: ", time.time() - start)

    # Use Chromium (default)
    start = time.time()
    async with AsyncWebCrawler(verbose=True, headless = True) as crawler:
-        result = await crawler.arun(url="https://www.example.com", bypass_cache=True)
+        result = await crawler.arun(url="https://www.example.com", cache_mode= CacheMode.BYPASS)
        print(result.markdown[:500])
        print("Time taken: ", time.time() - start)

@@ -537,7 +565,7 @@ async def main():
    await simple_crawl()
    await simple_example_with_running_js_code()
    await simple_example_with_css_selector()
-    await use_proxy()
+    # await use_proxy()
    await capture_and_save_screenshot("https://www.example.com", os.path.join(__location__, "tmp/example_screenshot.jpg"))
    await extract_structured_data_using_css_extractor()

@@ -548,14 +576,14 @@ async def main():
    await extract_structured_data_using_llm("openai/gpt-4o", os.getenv("OPENAI_API_KEY"))

    # You always can pass custom headers to the extraction strategy
-    custom_headers = {
-        "Authorization": "Bearer your-custom-token",
-        "X-Custom-Header": "Some-Value"
-    }
-    await extract_structured_data_using_llm(extra_headers=custom_headers)
+    # custom_headers = {
+    #     "Authorization": "Bearer your-custom-token",
+    #     "X-Custom-Header": "Some-Value"
+    # }
+    # await extract_structured_data_using_llm(extra_headers=custom_headers)
    
-    # await crawl_dynamic_content_pages_method_1()
-    # await crawl_dynamic_content_pages_method_2()
+    await crawl_dynamic_content_pages_method_1()
+    await crawl_dynamic_content_pages_method_2()
    await crawl_dynamic_content_pages_method_3()
    
    await crawl_custom_browser_type()
--- a/main.py
+++ b/main.py
@@ -340,9 +340,6 @@ app.add_middleware(
    allow_headers=["*"],  # Allows all headers
 )

-# Mount the pages directory as a static directory
-app.mount("/pages", StaticFiles(directory=__location__ + "/pages"), name="pages")
-
 # API token security
 security = HTTPBearer()
 CRAWL4AI_API_TOKEN = os.getenv("CRAWL4AI_API_TOKEN") or "test_api_code"
@@ -364,7 +361,6 @@ if os.path.exists(__location__ + "/site"):
    app.mount("/mkdocs", StaticFiles(directory="site", html=True), name="mkdocs")

 site_templates = Jinja2Templates(directory=__location__ + "/site")
-templates = Jinja2Templates(directory=__location__ + "/pages")

 crawler_service = CrawlerService()

--- a/requirements.txt
+++ b/requirements.txt
@@ -1,16 +1,16 @@
 aiosqlite~=0.20
-html2text~=2024.2
 lxml~=5.3
-litellm~=1.48
+litellm>=1.53.1
 numpy>=1.26.0,<3
 pillow~=10.4
-playwright>=1.47,<1.48
+playwright>=1.49.0
 python-dotenv~=1.0
 requests~=2.26
 beautifulsoup4~=4.12
-tf-playwright-stealth~=1.0
+tf-playwright-stealth>=1.1.0
 xxhash~=3.4
 rank-bm25~=0.2
-aiofiles~=24.0
+aiofiles>=24.1.0
 colorama~=0.4
 snowballstemmer~=2.2
+pydantic>=2.10
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,8 @@
 from setuptools import setup, find_packages
-from setuptools.command.install import install
 import os
 from pathlib import Path
 import shutil
-import subprocess
-import sys
-import asyncio
+

 # Create the .crawl4ai folder in the user's home directory if it doesn't exist
 # If the folder already exists, remove the cache folder
@@ -49,46 +46,6 @@ transformer_requirements = ["transformers", "tokenizers"]
 cosine_similarity_requirements = ["torch", "transformers", "nltk"]
 sync_requirements = ["selenium"]

-
-def install_playwright():
-    print("Installing Playwright browsers...")
-    try:
-        subprocess.check_call([sys.executable, "-m", "playwright", "install"])
-        print("Playwright installation completed successfully.")
-    except subprocess.CalledProcessError as e:
-        print(f"Error during Playwright installation: {e}")
-        print(
-            "Please run 'python -m playwright install' manually after the installation."
-        )
-    except Exception as e:
-        print(f"Unexpected error during Playwright installation: {e}")
-        print(
-            "Please run 'python -m playwright install' manually after the installation."
-        )
-
-
-def run_migration():
-    """Initialize database during installation"""
-    try:
-        print("Starting database initialization...")
-        from crawl4ai.async_database import async_db_manager
-
-        asyncio.run(async_db_manager.initialize())
-        print("Database initialization completed successfully.")
-    except ImportError:
-        print("Warning: Database module not found. Will initialize on first use.")
-    except Exception as e:
-        print(f"Warning: Database initialization failed: {e}")
-        print("Database will be initialized on first use")
-
-
-class PostInstallCommand(install):
-    def run(self):
-        install.run(self)
-        install_playwright()
-        # run_migration()
-
-
 setup(
    name="Crawl4AI",
    version=version,
@@ -116,7 +73,8 @@ setup(
    entry_points={
        "console_scripts": [
            "crawl4ai-download-models=crawl4ai.model_loader:main",
-            "crawl4ai-migrate=crawl4ai.migrations:main",  # Added migration command
+            "crawl4ai-migrate=crawl4ai.migrations:main",  
+            'crawl4ai-setup=crawl4ai.install:post_install', 
        ],
    },
    classifiers=[
@@ -130,7 +88,4 @@ setup(
        "Programming Language :: Python :: 3.10",
    ],
    python_requires=">=3.7",
-    cmdclass={
-        "install": PostInstallCommand,
-    },
 )