Release v0.7.6: The 0.7.6 Update

- Updated version to 0.7.6 - Added comprehensive demo and release notes - Updated all documentation - Update the veriosn in Dockerfile to 0.7.6
2025-10-22 13:46:54 +02:00
15 changed files with 707 additions and 1979 deletions
--- a/.yoyo/snapshot
+++ b/.yoyo/snapshot
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -1,23 +1,22 @@
 import asyncio
-import hashlib
+import time
 from typing import List, Optional
 import os
-import shlex
+import sys
 import shutil
 import tempfile
 import psutil  
 import signal
 import subprocess
-import sys
+import shlex
 import tempfile
 import time
 import warnings
 from typing import List, Optional
 import psutil
 from playwright.async_api import BrowserContext
-
+import hashlib
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .config import DOWNLOAD_PAGE_TIMEOUT
 from .js_snippet import load_js_script
 from .config import DOWNLOAD_PAGE_TIMEOUT
 from .async_configs import BrowserConfig, CrawlerRunConfig
 from .utils import get_chromium_path
 import warnings
 BROWSER_DISABLE_OPTIONS = [
    "--disable-background-networking",
@@ -66,7 +65,7 @@ class ManagedBrowser:
            _cleanup(): Terminates the browser process and removes the temporary directory.
            create_profile(): Static method to create a user profile by launching a browser for user interaction.
    """
-
+    
    @staticmethod
    def build_browser_flags(config: BrowserConfig) -> List[str]:
        """Common CLI flags for launching Chromium"""
@@ -93,25 +92,21 @@ class ManagedBrowser:
        if config.light_mode:
            flags.extend(BROWSER_DISABLE_OPTIONS)
        if config.text_mode:
-            flags.extend(
+            flags.extend([
-                [
+                "--blink-settings=imagesEnabled=false",
-                    "--blink-settings=imagesEnabled=false",
+                "--disable-remote-fonts",
-                    "--disable-remote-fonts",
+                "--disable-images",
-                    "--disable-images",
+                "--disable-javascript",
-                    "--disable-javascript",
+                "--disable-software-rasterizer",
-                    "--disable-software-rasterizer",
+                "--disable-dev-shm-usage",
-                    "--disable-dev-shm-usage",
+            ])
                ]
            )
        # proxy support
        if config.proxy:
            flags.append(f"--proxy-server={config.proxy}")
        elif config.proxy_config:
            creds = ""
            if config.proxy_config.username and config.proxy_config.password:
-                creds = (
+                creds = f"{config.proxy_config.username}:{config.proxy_config.password}@"
                    f"{config.proxy_config.username}:{config.proxy_config.password}@"
                )
            flags.append(f"--proxy-server={creds}{config.proxy_config.server}")
        # dedupe
        return list(dict.fromkeys(flags))
@@ -132,7 +127,7 @@ class ManagedBrowser:
        logger=None,
        host: str = "localhost",
        debugging_port: int = 9222,
-        cdp_url: Optional[str] = None,
+        cdp_url: Optional[str] = None, 
        browser_config: Optional[BrowserConfig] = None,
    ):
        """
@@ -168,7 +163,7 @@ class ManagedBrowser:
        Starts the browser process or returns CDP endpoint URL.
        If cdp_url is provided, returns it directly.
        If user_data_dir is not provided for local browser, creates a temporary directory.
-
+        
        Returns:
            str: CDP endpoint URL
        """
@@ -184,9 +179,10 @@ class ManagedBrowser:
        # Get browser path and args based on OS and browser type
        # browser_path = self._get_browser_path()
        args = await self._get_browser_args()
-
+        
        if self.browser_config.extra_args:
            args.extend(self.browser_config.extra_args)
        # ── make sure no old Chromium instance is owning the same port/profile ──
        try:
@@ -204,9 +200,7 @@ class ManagedBrowser:
            else:  # macOS / Linux
                # kill any process listening on the same debugging port
                pids = (
-                    subprocess.check_output(
+                    subprocess.check_output(shlex.split(f"lsof -t -i:{self.debugging_port}"))
                        shlex.split(f"lsof -t -i:{self.debugging_port}")
                    )
                    .decode()
                    .strip()
                    .splitlines()
@@ -225,7 +219,8 @@ class ManagedBrowser:
                        os.remove(fp)
        except Exception as _e:
            # non-fatal — we'll try to start anyway, but log what happened
-            self.logger.warning(f"pre-launch cleanup failed: {_e}", tag="BROWSER")
+            self.logger.warning(f"pre-launch cleanup failed: {_e}", tag="BROWSER")            
        # Start browser process
        try:
@@ -233,26 +228,26 @@ class ManagedBrowser:
            # On Unix, we'll use preexec_fn=os.setpgrp to start the process in a new process group
            if sys.platform == "win32":
                self.browser_process = subprocess.Popen(
-                    args,
+                    args, 
-                    stdout=subprocess.PIPE,
+                    stdout=subprocess.PIPE, 
                    stderr=subprocess.PIPE,
-                    creationflags=subprocess.DETACHED_PROCESS
+                    creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
                    | subprocess.CREATE_NEW_PROCESS_GROUP,
                )
            else:
                self.browser_process = subprocess.Popen(
-                    args,
+                    args, 
-                    stdout=subprocess.PIPE,
+                    stdout=subprocess.PIPE, 
                    stderr=subprocess.PIPE,
-                    preexec_fn=os.setpgrp,  # Start in a new process group
+                    preexec_fn=os.setpgrp  # Start in a new process group
                )
-
+                
            # If verbose is True print args used to run the process
            if self.logger and self.browser_config.verbose:
                self.logger.debug(
-                    f"Starting browser with args: {' '.join(args)}", tag="BROWSER"
+                    f"Starting browser with args: {' '.join(args)}",
-                )
+                    tag="BROWSER"
-
+                )    
            # We'll monitor for a short time to make sure it starts properly, but won't keep monitoring
            await asyncio.sleep(0.5)  # Give browser time to start
            await self._initial_startup_check()
@@ -269,7 +264,7 @@ class ManagedBrowser:
        """
        if not self.browser_process:
            return
-
+            
        # Check that process started without immediate termination
        await asyncio.sleep(0.5)
        if self.browser_process.poll() is not None:
@@ -279,7 +274,7 @@ class ManagedBrowser:
                stdout, stderr = self.browser_process.communicate(timeout=0.5)
            except subprocess.TimeoutExpired:
                pass
-
+                
            self.logger.error(
                message="Browser process terminated during startup | Code: {code} | STDOUT: {stdout} | STDERR: {stderr}",
                tag="ERROR",
@@ -289,7 +284,7 @@ class ManagedBrowser:
                    "stderr": stderr.decode() if stderr else "",
                },
            )
-
+    
    async def _monitor_browser_process(self):
        """
        Monitor the browser process for unexpected termination.
@@ -412,14 +407,7 @@ class ManagedBrowser:
                        if sys.platform == "win32":
                            # On Windows we might need taskkill for detached processes
                            try:
-                                subprocess.run(
+                                subprocess.run(["taskkill", "/F", "/PID", str(self.browser_process.pid)])
                                    [
                                        "taskkill",
                                        "/F",
                                        "/PID",
                                        str(self.browser_process.pid),
                                    ]
                                )
                            except Exception:
                                self.browser_process.kill()
                        else:
@@ -429,7 +417,7 @@ class ManagedBrowser:
            except Exception as e:
                self.logger.error(
                    message="Error terminating browser: {error}",
-                    tag="ERROR",
+                    tag="ERROR", 
                    params={"error": str(e)},
                )
@@ -442,77 +430,75 @@ class ManagedBrowser:
                    tag="ERROR",
                    params={"error": str(e)},
                )
-
+                
    # These methods have been moved to BrowserProfiler class
    @staticmethod
    async def create_profile(browser_config=None, profile_name=None, logger=None):
        """
        This method has been moved to the BrowserProfiler class.
-
+        
        Creates a browser profile by launching a browser for interactive user setup
        and waits until the user closes it. The profile is stored in a directory that
        can be used later with BrowserConfig.user_data_dir.
-
+        
        Please use BrowserProfiler.create_profile() instead.
-
+        
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
-
+            
            profiler = BrowserProfiler()
            profile_path = await profiler.create_profile(profile_name="my-login-profile")
            ```
        """
        from .browser_profiler import BrowserProfiler
-
+        
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler(logger=logger)
-        return await profiler.create_profile(
+        return await profiler.create_profile(profile_name=profile_name, browser_config=browser_config)
-            profile_name=profile_name, browser_config=browser_config
+    
        )
    @staticmethod
    def list_profiles():
        """
        This method has been moved to the BrowserProfiler class.
-
+        
        Lists all available browser profiles in the Crawl4AI profiles directory.
-
+        
        Please use BrowserProfiler.list_profiles() instead.
-
+        
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
-
+            
            profiler = BrowserProfiler()
            profiles = profiler.list_profiles()
            ```
        """
        from .browser_profiler import BrowserProfiler
-
+        
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler()
        return profiler.list_profiles()
-
+        
    @staticmethod
    def delete_profile(profile_name_or_path):
        """
        This method has been moved to the BrowserProfiler class.
-
+        
        Delete a browser profile by name or path.
-
+        
        Please use BrowserProfiler.delete_profile() instead.
-
+        
        Example:
            ```python
            from crawl4ai.browser_profiler import BrowserProfiler
-
+            
            profiler = BrowserProfiler()
            success = profiler.delete_profile("my-profile")
            ```
        """
        from .browser_profiler import BrowserProfiler
-
+        
        # Create a BrowserProfiler instance and delegate to it
        profiler = BrowserProfiler()
        return profiler.delete_profile(profile_name_or_path)
@@ -565,10 +551,11 @@ async def clone_runtime_state(
                "accuracy": crawlerRunConfig.geolocation.accuracy,
            }
        )
-
+        
    return dst
 class BrowserManager:
    """
    Manages the browser instance and context.
@@ -585,7 +572,7 @@ class BrowserManager:
    """
    _playwright_instance = None
-
+    
    @classmethod
    async def get_playwright(cls, use_undetected: bool = False):
        if use_undetected:
@@ -593,11 +580,9 @@ class BrowserManager:
        else:
            from playwright.async_api import async_playwright
        cls._playwright_instance = await async_playwright().start()
-        return cls._playwright_instance
+        return cls._playwright_instance    
-    def __init__(
+    def __init__(self, browser_config: BrowserConfig, logger=None, use_undetected: bool = False):
        self, browser_config: BrowserConfig, logger=None, use_undetected: bool = False
    ):
        """
        Initialize the BrowserManager with a browser configuration.
@@ -623,17 +608,16 @@ class BrowserManager:
        # Keep track of contexts by a "config signature," so each unique config reuses a single context
        self.contexts_by_config = {}
        self._contexts_lock = asyncio.Lock()
-
+        
        # Serialize context.new_page() across concurrent tasks to avoid races
        # when using a shared persistent context (context.pages may be empty
        # for all racers). Prevents 'Target page/context closed' errors.
        self._page_lock = asyncio.Lock()
-
+        
        # Stealth adapter for stealth mode
        self._stealth_adapter = None
        if self.config.enable_stealth and not self.use_undetected:
            from .browser_adapter import StealthAdapter
            self._stealth_adapter = StealthAdapter()
        # Initialize ManagedBrowser if needed
@@ -662,7 +646,7 @@ class BrowserManager:
        """
        if self.playwright is not None:
            await self.close()
-
+            
        if self.use_undetected:
            from patchright.async_api import async_playwright
        else:
@@ -673,11 +657,7 @@ class BrowserManager:
        if self.config.cdp_url or self.config.use_managed_browser:
            self.config.use_managed_browser = True
-            cdp_url = (
+            cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url
                await self.managed_browser.start()
                if not self.config.cdp_url
                else self.config.cdp_url
            )
            self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
            contexts = self.browser.contexts
            if contexts:
@@ -698,6 +678,7 @@ class BrowserManager:
            self.default_context = self.browser
    def _build_browser_args(self) -> dict:
        """Build browser launch arguments from config."""
        args = [
@@ -743,7 +724,7 @@ class BrowserManager:
        # Deduplicate args
        args = list(dict.fromkeys(args))
-
+        
        browser_args = {"headless": self.config.headless, "args": args}
        if self.config.chrome_channel:
@@ -820,9 +801,9 @@ class BrowserManager:
            context.set_default_navigation_timeout(DOWNLOAD_PAGE_TIMEOUT)
            if self.config.downloads_path:
                context._impl_obj._options["accept_downloads"] = True
-                context._impl_obj._options["downloads_path"] = (
+                context._impl_obj._options[
-                    self.config.downloads_path
+                    "downloads_path"
-                )
+                ] = self.config.downloads_path
        # Handle user agent and browser hints
        if self.config.user_agent:
@@ -853,7 +834,7 @@ class BrowserManager:
                or crawlerRunConfig.simulate_user
                or crawlerRunConfig.magic
            ):
-                await context.add_init_script(load_js_script("navigator_overrider"))
+                await context.add_init_script(load_js_script("navigator_overrider"))        
    async def create_browser_context(self, crawlerRunConfig: CrawlerRunConfig = None):
        """
@@ -864,7 +845,7 @@ class BrowserManager:
            Context: Browser context object with the specified configurations
        """
        # Base settings
-        user_agent = self.config.headers.get("User-Agent", self.config.user_agent)
+        user_agent = self.config.headers.get("User-Agent", self.config.user_agent) 
        viewport_settings = {
            "width": self.config.viewport_width,
            "height": self.config.viewport_height,
@@ -937,7 +918,7 @@ class BrowserManager:
            "device_scale_factor": 1.0,
            "java_script_enabled": self.config.java_script_enabled,
        }
-
+        
        if crawlerRunConfig:
            # Check if there is value for crawlerRunConfig.proxy_config set add that to context
            if crawlerRunConfig.proxy_config:
@@ -945,12 +926,10 @@ class BrowserManager:
                    "server": crawlerRunConfig.proxy_config.server,
                }
                if crawlerRunConfig.proxy_config.username:
-                    proxy_settings.update(
+                    proxy_settings.update({
-                        {
+                        "username": crawlerRunConfig.proxy_config.username,
-                            "username": crawlerRunConfig.proxy_config.username,
+                        "password": crawlerRunConfig.proxy_config.password,
-                            "password": crawlerRunConfig.proxy_config.password,
+                    })
                        }
                    )
                context_settings["proxy"] = proxy_settings
        if self.config.text_mode:
@@ -1008,12 +987,12 @@ class BrowserManager:
            "cache_mode",
            "content_filter",
            "semaphore_count",
-            "url",
+            "url"
        ]
-
+        
        # Do NOT exclude locale, timezone_id, or geolocation as these DO affect browser context
        # and should cause a new context to be created if they change
-
+        
        for key in ephemeral_keys:
            if key in config_dict:
                del config_dict[key]
@@ -1034,7 +1013,7 @@ class BrowserManager:
                    self.logger.warning(
                        message="Failed to apply stealth to page: {error}",
                        tag="STEALTH",
-                        params={"error": str(e)},
+                        params={"error": str(e)}
                    )
    async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
@@ -1060,10 +1039,8 @@ class BrowserManager:
        if self.config.use_managed_browser:
            if self.config.storage_state:
                context = await self.create_browser_context(crawlerRunConfig)
-                ctx = self.default_context  # default context, one window only
+                ctx = self.default_context        # default context, one window only
-                ctx = await clone_runtime_state(
+                ctx = await clone_runtime_state(context, ctx, crawlerRunConfig, self.config)
                    context, ctx, crawlerRunConfig, self.config
                )
                # Avoid concurrent new_page on shared persistent context
                # See GH-1198: context.pages can be empty under races
                async with self._page_lock:
@@ -1075,21 +1052,14 @@ class BrowserManager:
                page = next((p for p in pages if p.url == crawlerRunConfig.url), None)
                if not page:
                    if pages:
-                        # FIX: Always create a new page for managed browsers to support concurrent crawling
+                        page = pages[0]
                        # Previously: page = pages[0]
                        async with self._page_lock:
                            page = await context.new_page()
                        await self._apply_stealth_to_page(page)
                    else:
                        # Double-check under lock to avoid TOCTOU and ensure only
                        # one task calls new_page when pages=[] concurrently
                        async with self._page_lock:
                            pages = context.pages
                            if pages:
-                                # FIX: Always create a new page for managed browsers to support concurrent crawling
+                                page = pages[0]
                                # Previously: page = pages[0]
                                page = await context.new_page()
                                await self._apply_stealth_to_page(page)
                            else:
                                page = await context.new_page()
                                await self._apply_stealth_to_page(page)
@@ -1145,7 +1115,7 @@ class BrowserManager:
        """Close all browser resources and clean up."""
        if self.config.cdp_url:
            return
-
+        
        if self.config.sleep_on_close:
            await asyncio.sleep(0.5)
@@ -1161,7 +1131,7 @@ class BrowserManager:
                self.logger.error(
                    message="Error closing context: {error}",
                    tag="ERROR",
-                    params={"error": str(e)},
+                    params={"error": str(e)}
                )
        self.contexts_by_config.clear()
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -785,54 +785,6 @@ curl http://localhost:11235/crawl/job/crawl_xyz
 The response includes `status` field: `"processing"`, `"completed"`, or `"failed"`.
 #### LLM Extraction Jobs with Webhooks
 The same webhook system works for LLM extraction jobs via `/llm/job`:
 ```bash
 # Submit LLM extraction job with webhook
 curl -X POST http://localhost:11235/llm/job \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/article",
    "q": "Extract the article title, author, and main points",
    "provider": "openai/gpt-4o-mini",
    "webhook_config": {
      "webhook_url": "https://myapp.com/webhooks/llm-complete",
      "webhook_data_in_payload": true,
      "webhook_headers": {
        "X-Webhook-Secret": "your-secret-token"
      }
    }
  }'
 # Response: {"task_id": "llm_1234567890"}
 ```
 **Your webhook receives:**
 ```json
 {
  "task_id": "llm_1234567890",
  "task_type": "llm_extraction",
  "status": "completed",
  "timestamp": "2025-10-22T12:30:00.000000+00:00",
  "urls": ["https://example.com/article"],
  "data": {
    "extracted_content": {
      "title": "Understanding Web Scraping",
      "author": "John Doe",
      "main_points": ["Point 1", "Point 2", "Point 3"]
    }
  }
 }
 ```
 **Key Differences for LLM Jobs:**
 - Task type is `"llm_extraction"` instead of `"crawl"`
 - Extracted data is in `data.extracted_content`
 - Single URL only (not an array)
 - Supports schema-based extraction with `schema` parameter
 > 💡 **Pro tip**: See [WEBHOOK_EXAMPLES.md](./WEBHOOK_EXAMPLES.md) for detailed examples including TypeScript client code, Flask webhook handlers, and failure handling.
 ---
--- a/docs/examples/docker_hooks_examples.py
+++ b/docs/examples/docker_hooks_examples.py
--- a/docs/md_v2/core/docker-deployment.md
+++ b/docs/md_v2/core/docker-deployment.md
@@ -27,14 +27,6 @@
  - [Hook Response Information](#hook-response-information)
  - [Error Handling](#error-handling)
  - [Hooks Utility: Function-Based Approach (Python)](#hooks-utility-function-based-approach-python)
 - [Job Queue & Webhook API](#job-queue-webhook-api)
  - [Why Use the Job Queue API?](#why-use-the-job-queue-api)
  - [Available Endpoints](#available-endpoints)
  - [Webhook Configuration](#webhook-configuration)
  - [Usage Examples](#usage-examples)
  - [Webhook Best Practices](#webhook-best-practices)
  - [Use Cases](#use-cases)
  - [Troubleshooting](#troubleshooting)
 - [Dockerfile Parameters](#dockerfile-parameters)
 - [Using the API](#using-the-api)
  - [Playground Interface](#playground-interface)
@@ -1118,464 +1110,6 @@ if __name__ == "__main__":
 ---
 ## Job Queue & Webhook API
 The Docker deployment includes a powerful asynchronous job queue system with webhook support for both crawling and LLM extraction tasks. Instead of waiting for long-running operations to complete, submit jobs and receive real-time notifications via webhooks when they finish.
 ### Why Use the Job Queue API?
 **Traditional Synchronous API (`/crawl`):**
 - Client waits for entire crawl to complete
 - Timeout issues with long-running crawls
 - Resource blocking during execution
 - Constant polling required for status updates
 **Asynchronous Job Queue API (`/crawl/job`, `/llm/job`):**
 - ✅ Submit job and continue immediately
 - ✅ No timeout concerns for long operations
 - ✅ Real-time webhook notifications on completion
 - ✅ Better resource utilization
 - ✅ Perfect for batch processing
 - ✅ Ideal for microservice architectures
 ### Available Endpoints
 #### 1. Crawl Job Endpoint
 ```
 POST /crawl/job
 ```
 Submit an asynchronous crawl job with optional webhook notification.
 **Request Body:**
 ```json
 {
  "urls": ["https://example.com"],
  "cache_mode": "bypass",
  "extraction_strategy": {
    "type": "JsonCssExtractionStrategy",
    "schema": {
      "title": "h1",
      "content": ".article-body"
    }
  },
  "webhook_config": {
    "webhook_url": "https://your-app.com/webhook/crawl-complete",
    "webhook_data_in_payload": true,
    "webhook_headers": {
      "X-Webhook-Secret": "your-secret-token",
      "X-Custom-Header": "value"
    }
  }
 }
 ```
 **Response:**
 ```json
 {
  "task_id": "crawl_1698765432",
  "message": "Crawl job submitted"
 }
 ```
 #### 2. LLM Extraction Job Endpoint
 ```
 POST /llm/job
 ```
 Submit an asynchronous LLM extraction job with optional webhook notification.
 **Request Body:**
 ```json
 {
  "url": "https://example.com/article",
  "q": "Extract the article title, author, publication date, and main points",
  "provider": "openai/gpt-4o-mini",
  "schema": "{\"title\": \"string\", \"author\": \"string\", \"date\": \"string\", \"points\": [\"string\"]}",
  "cache": false,
  "webhook_config": {
    "webhook_url": "https://your-app.com/webhook/llm-complete",
    "webhook_data_in_payload": true,
    "webhook_headers": {
      "X-Webhook-Secret": "your-secret-token"
    }
  }
 }
 ```
 **Response:**
 ```json
 {
  "task_id": "llm_1698765432",
  "message": "LLM job submitted"
 }
 ```
 #### 3. Job Status Endpoint
 ```
 GET /job/{task_id}
 ```
 Check the status and retrieve results of a submitted job.
 **Response (In Progress):**
 ```json
 {
  "task_id": "crawl_1698765432",
  "status": "processing",
  "message": "Job is being processed"
 }
 ```
 **Response (Completed):**
 ```json
 {
  "task_id": "crawl_1698765432",
  "status": "completed",
  "result": {
    "markdown": "# Page Title\n\nContent...",
    "extracted_content": {...},
    "links": {...}
  }
 }
 ```
 ### Webhook Configuration
 Webhooks provide real-time notifications when your jobs complete, eliminating the need for constant polling.
 #### Webhook Config Parameters
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
 | `webhook_url` | string | Yes | Your HTTP(S) endpoint to receive notifications |
 | `webhook_data_in_payload` | boolean | No | Include full result data in webhook payload (default: false) |
 | `webhook_headers` | object | No | Custom headers for authentication/identification |
 #### Webhook Payload Format
 **Success Notification (Crawl Job):**
 ```json
 {
  "task_id": "crawl_1698765432",
  "task_type": "crawl",
  "status": "completed",
  "timestamp": "2025-10-22T12:30:00.000000+00:00",
  "urls": ["https://example.com"],
  "data": {
    "markdown": "# Page content...",
    "extracted_content": {...},
    "links": {...}
  }
 }
 ```
 **Success Notification (LLM Job):**
 ```json
 {
  "task_id": "llm_1698765432",
  "task_type": "llm_extraction",
  "status": "completed",
  "timestamp": "2025-10-22T12:30:00.000000+00:00",
  "urls": ["https://example.com/article"],
  "data": {
    "extracted_content": {
      "title": "Understanding Web Scraping",
      "author": "John Doe",
      "date": "2025-10-22",
      "points": ["Point 1", "Point 2"]
    }
  }
 }
 ```
 **Failure Notification:**
 ```json
 {
  "task_id": "crawl_1698765432",
  "task_type": "crawl",
  "status": "failed",
  "timestamp": "2025-10-22T12:30:00.000000+00:00",
  "urls": ["https://example.com"],
  "error": "Connection timeout after 30 seconds"
 }
 ```
 #### Webhook Delivery & Retry
 - **Delivery Method:** HTTP POST to your `webhook_url`
 - **Content-Type:** `application/json`
 - **Retry Policy:** Exponential backoff with 5 attempts
  - Attempt 1: Immediate
  - Attempt 2: 1 second delay
  - Attempt 3: 2 seconds delay
  - Attempt 4: 4 seconds delay
  - Attempt 5: 8 seconds delay
 - **Success Status Codes:** 200-299
 - **Custom Headers:** Your `webhook_headers` are included in every request
 ### Usage Examples
 #### Example 1: Python with Webhook Handler (Flask)
 ```python
 from flask import Flask, request, jsonify
 import requests
 app = Flask(__name__)
 # Webhook handler
@app.route('/webhook/crawl-complete', methods=['POST'])
 def handle_crawl_webhook():
    payload = request.json
    if payload['status'] == 'completed':
        print(f"✅ Job {payload['task_id']} completed!")
        print(f"Task type: {payload['task_type']}")
        # Access the crawl results
        if 'data' in payload:
            markdown = payload['data'].get('markdown', '')
            extracted = payload['data'].get('extracted_content', {})
            print(f"Extracted {len(markdown)} characters")
            print(f"Structured data: {extracted}")
    else:
        print(f"❌ Job {payload['task_id']} failed: {payload.get('error')}")
    return jsonify({"status": "received"}), 200
 # Submit a crawl job with webhook
 def submit_crawl_job():
    response = requests.post(
        "http://localhost:11235/crawl/job",
        json={
            "urls": ["https://example.com"],
            "extraction_strategy": {
                "type": "JsonCssExtractionStrategy",
                "schema": {
                    "name": "Example Schema",
                    "baseSelector": "body",
                    "fields": [
                        {"name": "title", "selector": "h1", "type": "text"},
                        {"name": "description", "selector": "meta[name='description']", "type": "attribute", "attribute": "content"}
                    ]
                }
            },
            "webhook_config": {
                "webhook_url": "https://your-app.com/webhook/crawl-complete",
                "webhook_data_in_payload": True,
                "webhook_headers": {
                    "X-Webhook-Secret": "your-secret-token"
                }
            }
        }
    )
    task_id = response.json()['task_id']
    print(f"Job submitted: {task_id}")
    return task_id
 if __name__ == '__main__':
    app.run(port=5000)
 ```
 #### Example 2: LLM Extraction with Webhooks
 ```python
 import requests
 def submit_llm_job_with_webhook():
    response = requests.post(
        "http://localhost:11235/llm/job",
        json={
            "url": "https://example.com/article",
            "q": "Extract the article title, author, and main points",
            "provider": "openai/gpt-4o-mini",
            "webhook_config": {
                "webhook_url": "https://your-app.com/webhook/llm-complete",
                "webhook_data_in_payload": True,
                "webhook_headers": {
                    "X-Webhook-Secret": "your-secret-token"
                }
            }
        }
    )
    task_id = response.json()['task_id']
    print(f"LLM job submitted: {task_id}")
    return task_id
 # Webhook handler for LLM jobs
@app.route('/webhook/llm-complete', methods=['POST'])
 def handle_llm_webhook():
    payload = request.json
    if payload['status'] == 'completed':
        extracted = payload['data']['extracted_content']
        print(f"✅ LLM extraction completed!")
        print(f"Results: {extracted}")
    else:
        print(f"❌ LLM extraction failed: {payload.get('error')}")
    return jsonify({"status": "received"}), 200
 ```
 #### Example 3: Without Webhooks (Polling)
 If you don't use webhooks, you can poll for results:
 ```python
 import requests
 import time
 # Submit job
 response = requests.post(
    "http://localhost:11235/crawl/job",
    json={"urls": ["https://example.com"]}
 )
 task_id = response.json()['task_id']
 # Poll for results
 while True:
    result = requests.get(f"http://localhost:11235/job/{task_id}")
    data = result.json()
    if data['status'] == 'completed':
        print("Job completed!")
        print(data['result'])
        break
    elif data['status'] == 'failed':
        print(f"Job failed: {data.get('error')}")
        break
    print("Still processing...")
    time.sleep(2)
 ```
 #### Example 4: Global Webhook Configuration
 Set a default webhook URL in your `config.yml` to avoid repeating it in every request:
 ```yaml
 # config.yml
 api:
  crawler:
    # ... other settings ...
    webhook:
      default_url: "https://your-app.com/webhook/default"
      default_headers:
        X-Webhook-Secret: "your-secret-token"
 ```
 Then submit jobs without webhook config:
 ```python
 # Uses the global webhook configuration
 response = requests.post(
    "http://localhost:11235/crawl/job",
    json={"urls": ["https://example.com"]}
 )
 ```
 ### Webhook Best Practices
 1. **Authentication:** Always use custom headers for webhook authentication
   ```json
   "webhook_headers": {
     "X-Webhook-Secret": "your-secret-token"
   }
   ```
 2. **Idempotency:** Design your webhook handler to be idempotent (safe to receive duplicate notifications)
 3. **Fast Response:** Return HTTP 200 quickly; process data asynchronously if needed
   ```python
   @app.route('/webhook', methods=['POST'])
   def webhook():
       payload = request.json
       # Queue for background processing
       queue.enqueue(process_webhook, payload)
       return jsonify({"status": "received"}), 200
   ```
 4. **Error Handling:** Handle both success and failure notifications
   ```python
   if payload['status'] == 'completed':
       # Process success
   elif payload['status'] == 'failed':
       # Log error, retry, or alert
   ```
 5. **Validation:** Verify webhook authenticity using custom headers
   ```python
   secret = request.headers.get('X-Webhook-Secret')
   if secret != os.environ['EXPECTED_SECRET']:
       return jsonify({"error": "Unauthorized"}), 401
   ```
 6. **Logging:** Log webhook deliveries for debugging
   ```python
   logger.info(f"Webhook received: {payload['task_id']} - {payload['status']}")
   ```
 ### Use Cases
 **1. Batch Processing**
 Submit hundreds of URLs and get notified as each completes:
 ```python
 urls = ["https://site1.com", "https://site2.com", ...]
 for url in urls:
    submit_crawl_job(url, webhook_url="https://app.com/webhook")
 ```
 **2. Microservice Integration**
 Integrate with event-driven architectures:
 ```python
 # Service A submits job
 task_id = submit_crawl_job(url)
 # Service B receives webhook and triggers next step
@app.route('/webhook')
 def webhook():
    process_result(request.json)
    trigger_next_service()
    return "OK", 200
 ```
 **3. Long-Running Extractions**
 Handle complex LLM extractions without timeouts:
 ```python
 submit_llm_job(
    url="https://long-article.com",
    q="Comprehensive summary with key points and analysis",
    webhook_url="https://app.com/webhook/llm"
 )
 ```
 ### Troubleshooting
 **Webhook not receiving notifications?**
 - Check your webhook URL is publicly accessible
 - Verify firewall/security group settings
 - Use webhook testing tools like webhook.site for debugging
 - Check server logs for delivery attempts
 - Ensure your handler returns 200-299 status code
 **Job stuck in processing?**
 - Check Redis connection: `docker logs <container_name> | grep redis`
 - Verify worker processes: `docker exec <container_name> ps aux | grep worker`
 - Check server logs: `docker logs <container_name>`
 **Need to cancel a job?**
 Jobs are processed asynchronously. If you need to cancel:
 - Delete the task from Redis (requires Redis CLI access)
 - Or implement a cancellation endpoint in your webhook handler
 ---
 ## Dockerfile Parameters
 You can customize the image build process using build arguments (`--build-arg`). These are typically used via `docker buildx build` or within the `docker-compose.yml` file.
--- a/docs/md_v2/marketplace/admin/admin.js
+++ b/docs/md_v2/marketplace/admin/admin.js
@@ -529,19 +529,8 @@ class AdminDashboard {
                    </label>
                </div>
                <div class="form-group full-width">
-                    <label>Long Description (Markdown - Overview tab)</label>
+                    <label>Integration Guide</label>
-                    <textarea id="form-long-description" rows="10" placeholder="Enter detailed description with markdown formatting...">${app?.long_description || ''}</textarea>
+                    <textarea id="form-integration" rows="10">${app?.integration_guide || ''}</textarea>
                    <small>Markdown support: **bold**, *italic*, [links](url), # headers, code blocks, lists</small>
                </div>
                <div class="form-group full-width">
                    <label>Integration Guide (Markdown - Integration tab)</label>
                    <textarea id="form-integration" rows="20" placeholder="Enter integration guide with installation, examples, and code snippets using markdown...">${app?.integration_guide || ''}</textarea>
                    <small>Single markdown field with installation, examples, and complete guide. Code blocks get auto copy buttons.</small>
                </div>
                <div class="form-group full-width">
                    <label>Documentation (Markdown - Documentation tab)</label>
                    <textarea id="form-documentation" rows="20" placeholder="Enter documentation with API reference, examples, and best practices using markdown...">${app?.documentation || ''}</textarea>
                    <small>Full documentation with API reference, examples, best practices, etc.</small>
                </div>
            </div>
        `;
@@ -723,9 +712,7 @@ class AdminDashboard {
            data.contact_email = document.getElementById('form-email').value;
            data.featured = document.getElementById('form-featured').checked ? 1 : 0;
            data.sponsored = document.getElementById('form-sponsored').checked ? 1 : 0;
            data.long_description = document.getElementById('form-long-description').value;
            data.integration_guide = document.getElementById('form-integration').value;
            data.documentation = document.getElementById('form-documentation').value;
        } else if (type === 'articles') {
            data.title = document.getElementById('form-title').value;
            data.slug = this.generateSlug(data.title);
--- a/docs/md_v2/marketplace/app-detail.css
+++ b/docs/md_v2/marketplace/app-detail.css
@@ -278,12 +278,12 @@
 }
 .tab-content {
-    display: none !important;
+    display: none;
    padding: 2rem;
 }
 .tab-content.active {
-    display: block !important;
+    display: block;
 }
 /* Overview Layout */
@@ -510,31 +510,6 @@
    line-height: 1.5;
 }
 /* Markdown rendered code blocks */
 .integration-content pre,
 .docs-content pre {
    background: var(--bg-dark);
    border: 1px solid var(--border-color);
    margin: 1rem 0;
    padding: 1rem;
    padding-top: 2.5rem; /* Space for copy button */
    overflow-x: auto;
    position: relative;
    max-height: none; /* Remove any height restrictions */
    height: auto; /* Allow content to expand */
 }
 .integration-content pre code,
 .docs-content pre code {
    background: transparent;
    padding: 0;
    color: var(--text-secondary);
    font-size: 0.875rem;
    line-height: 1.5;
    white-space: pre; /* Preserve whitespace and line breaks */
    display: block;
 }
 /* Feature Grid */
 .feature-grid {
    display: grid;
--- a/docs/md_v2/marketplace/app-detail.html
+++ b/docs/md_v2/marketplace/app-detail.html
@@ -73,14 +73,27 @@
                <div class="tabs">
                    <button class="tab-btn active" data-tab="overview">Overview</button>
                    <button class="tab-btn" data-tab="integration">Integration</button>
-                    <!-- <button class="tab-btn" data-tab="docs">Documentation</button>
+                    <button class="tab-btn" data-tab="docs">Documentation</button>
-                    <button class="tab-btn" data-tab="support">Support</button> -->
+                    <button class="tab-btn" data-tab="support">Support</button>
                </div>
                <section id="overview-tab" class="tab-content active">
                    <div class="overview-columns">
                        <div class="overview-main">
                            <h2>Overview</h2>
                            <div id="app-overview">Overview content goes here.</div>
                            <h3>Key Features</h3>
                            <ul id="app-features" class="features-list">
                                <li>Feature 1</li>
                                <li>Feature 2</li>
                                <li>Feature 3</li>
                            </ul>
                            <h3>Use Cases</h3>
                            <div id="app-use-cases" class="use-cases">
                                <p>Describe how this app can help your workflow.</p>
                            </div>
                        </div>
                        <aside class="sidebar">
@@ -129,16 +142,37 @@
                </section>
                <section id="integration-tab" class="tab-content">
-                    <div class="integration-content" id="app-integration">
+                    <div class="integration-content">
                        <h2>Integration Guide</h2>
                        <h3>Installation</h3>
                        <div class="code-block">
                            <pre><code id="install-code"># Installation instructions will appear here</code></pre>
                        </div>
                        <h3>Basic Usage</h3>
                        <div class="code-block">
                            <pre><code id="usage-code"># Usage example will appear here</code></pre>
                        </div>
                        <h3>Complete Integration Example</h3>
                        <div class="code-block">
                            <button class="copy-btn" id="copy-integration">Copy</button>
                            <pre><code id="integration-code"># Complete integration guide will appear here</code></pre>
                        </div>
                    </div>
                </section>
-                <!-- <section id="docs-tab" class="tab-content">
+                <section id="docs-tab" class="tab-content">
-                    <div class="docs-content" id="app-docs">
+                    <div class="docs-content">
                        <h2>Documentation</h2>
                        <div id="app-docs" class="doc-sections">
                            <p>Documentation coming soon.</p>
                        </div>
                    </div>
-                </section> -->
+                </section>
-                <!-- <section id="support-tab" class="tab-content">
+                <section id="support-tab" class="tab-content">
                    <div class="docs-content">
                        <h2>Support</h2>
                        <div class="support-grid">
@@ -156,7 +190,7 @@
                            </div>
                        </div>
                    </div>
-                </section> -->
+                </section>
            </div>
        </main>
--- a/docs/md_v2/marketplace/app-detail.js
+++ b/docs/md_v2/marketplace/app-detail.js
@@ -112,7 +112,7 @@ class AppDetailPage {
        }
        // Contact
-        document.getElementById('app-contact') && (document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available');
+        document.getElementById('app-contact').textContent = this.appData.contact_email || 'Not available';
        // Sidebar info
        document.getElementById('sidebar-downloads').textContent = this.formatNumber(this.appData.downloads || 0);
@@ -123,134 +123,146 @@ class AppDetailPage {
        document.getElementById('sidebar-pricing').textContent = this.appData.pricing || 'Free';
        document.getElementById('sidebar-contact').textContent = this.appData.contact_email || 'contact@example.com';
-        // Render tab contents from database fields
+        // Integration guide
-        this.renderTabContents();
+        this.renderIntegrationGuide();
    }
-    renderTabContents() {
+    renderIntegrationGuide() {
-        // Overview tab - use long_description from database
+        // Installation code
-        const overviewDiv = document.getElementById('app-overview');
+        const installCode = document.getElementById('install-code');
-        if (overviewDiv) {
+        if (installCode) {
-            if (this.appData.long_description) {
+            if (this.appData.type === 'Open Source' && this.appData.github_url) {
-                overviewDiv.innerHTML = this.renderMarkdown(this.appData.long_description);
+                installCode.textContent = `# Clone from GitHub
-            } else {
+git clone ${this.appData.github_url}
-                overviewDiv.innerHTML = `<p>${this.appData.description || 'No overview available.'}</p>`;
+
 # Install dependencies
 pip install -r requirements.txt`;
            } else if (this.appData.name.toLowerCase().includes('api')) {
                installCode.textContent = `# Install via pip
 pip install ${this.appData.slug}
 # Or install from source
 pip install git+${this.appData.github_url || 'https://github.com/example/repo'}`;
            }
        }
-        // Integration tab - use integration_guide field from database
+        // Usage code - customize based on category
-        const integrationDiv = document.getElementById('app-integration');
+        const usageCode = document.getElementById('usage-code');
-        if (integrationDiv) {
+        if (usageCode) {
-            if (this.appData.integration_guide) {
+            if (this.appData.category === 'Browser Automation') {
-                integrationDiv.innerHTML = this.renderMarkdown(this.appData.integration_guide);
+                usageCode.textContent = `from crawl4ai import AsyncWebCrawler
-                // Add copy buttons to all code blocks
+from ${this.appData.slug.replace(/-/g, '_')} import ${this.appData.name.replace(/\s+/g, '')}
-                this.addCopyButtonsToCodeBlocks(integrationDiv);
+
-            } else {
+async def main():
-                integrationDiv.innerHTML = '<p>Integration guide not yet available. Please check the official website for details.</p>';
+    # Initialize ${this.appData.name}
    automation = ${this.appData.name.replace(/\s+/g, '')}()
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://example.com",
            browser_config=automation.config,
            wait_for="css:body"
        )
        print(result.markdown)`;
        } else if (this.appData.category === 'Proxy Services') {
            usageCode.textContent = `from crawl4ai import AsyncWebCrawler
 import ${this.appData.slug.replace(/-/g, '_')}
 # Configure proxy
 proxy_config = {
    "server": "${this.appData.website_url || 'https://proxy.example.com'}",
    "username": "your_username",
    "password": "your_password"
 }
 async with AsyncWebCrawler(proxy=proxy_config) as crawler:
    result = await crawler.arun(
        url="https://example.com",
        bypass_cache=True
    )
    print(result.status_code)`;
        } else if (this.appData.category === 'LLM Integration') {
            usageCode.textContent = `from crawl4ai import AsyncWebCrawler
 from crawl4ai.extraction_strategy import LLMExtractionStrategy
 # Configure LLM extraction
 strategy = LLMExtractionStrategy(
    provider="${this.appData.name.toLowerCase().includes('gpt') ? 'openai' : 'anthropic'}",
    api_key="your-api-key",
    model="${this.appData.name.toLowerCase().includes('gpt') ? 'gpt-4' : 'claude-3'}",
    instruction="Extract structured data"
 )
 async with AsyncWebCrawler() as crawler:
    result = await crawler.arun(
        url="https://example.com",
        extraction_strategy=strategy
    )
    print(result.extracted_content)`;
            }
        }
-        // Documentation tab - use documentation field from database
+        // Integration example
-        const docsDiv = document.getElementById('app-docs');
+        const integrationCode = document.getElementById('integration-code');
-        if (docsDiv) {
+        if (integrationCode) {
-            if (this.appData.documentation) {
+            integrationCode.textContent = this.appData.integration_guide ||
-                docsDiv.innerHTML = this.renderMarkdown(this.appData.documentation);
+`# Complete ${this.appData.name} Integration Example
-                // Add copy buttons to all code blocks
+
-                this.addCopyButtonsToCodeBlocks(docsDiv);
+from crawl4ai import AsyncWebCrawler
-            } else {
+from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
-                docsDiv.innerHTML = '<p>Documentation coming soon.</p>';
+import json
-            }
+
 async def crawl_with_${this.appData.slug.replace(/-/g, '_')}():
    """
    Complete example showing how to use ${this.appData.name}
    with Crawl4AI for production web scraping
    """
    # Define extraction schema
    schema = {
        "name": "ProductList",
        "baseSelector": "div.product",
        "fields": [
            {"name": "title", "selector": "h2", "type": "text"},
            {"name": "price", "selector": ".price", "type": "text"},
            {"name": "image", "selector": "img", "type": "attribute", "attribute": "src"},
            {"name": "link", "selector": "a", "type": "attribute", "attribute": "href"}
        ]
    }
    # Initialize crawler with ${this.appData.name}
    async with AsyncWebCrawler(
        browser_type="chromium",
        headless=True,
        verbose=True
    ) as crawler:
        # Crawl with extraction
        result = await crawler.arun(
            url="https://example.com/products",
            extraction_strategy=JsonCssExtractionStrategy(schema),
            cache_mode="bypass",
            wait_for="css:.product",
            screenshot=True
        )
        # Process results
        if result.success:
            products = json.loads(result.extracted_content)
            print(f"Found {len(products)} products")
            for product in products[:5]:
                print(f"- {product['title']}: {product['price']}")
        return products
 # Run the crawler
 if __name__ == "__main__":
    import asyncio
    asyncio.run(crawl_with_${this.appData.slug.replace(/-/g, '_')}())`;
        }
    }
    addCopyButtonsToCodeBlocks(container) {
        // Find all code blocks and add copy buttons
        const codeBlocks = container.querySelectorAll('pre code');
        codeBlocks.forEach(codeBlock => {
            const pre = codeBlock.parentElement;
            // Skip if already has a copy button
            if (pre.querySelector('.copy-btn')) return;
            // Create copy button
            const copyBtn = document.createElement('button');
            copyBtn.className = 'copy-btn';
            copyBtn.textContent = 'Copy';
            copyBtn.onclick = () => {
                navigator.clipboard.writeText(codeBlock.textContent).then(() => {
                    copyBtn.textContent = '✓ Copied!';
                    setTimeout(() => {
                        copyBtn.textContent = 'Copy';
                    }, 2000);
                });
            };
            // Add button to pre element
            pre.style.position = 'relative';
            pre.insertBefore(copyBtn, codeBlock);
        });
    }
    renderMarkdown(text) {
        if (!text) return '';
        // Store code blocks temporarily to protect them from processing
        const codeBlocks = [];
        let processed = text.replace(/```(\w+)?\n([\s\S]*?)```/g, (match, lang, code) => {
            const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
            codeBlocks.push(`<pre><code class="language-${lang || ''}">${this.escapeHtml(code)}</code></pre>`);
            return placeholder;
        });
        // Store inline code temporarily
        const inlineCodes = [];
        processed = processed.replace(/`([^`]+)`/g, (match, code) => {
            const placeholder = `___INLINE_CODE_${inlineCodes.length}___`;
            inlineCodes.push(`<code>${this.escapeHtml(code)}</code>`);
            return placeholder;
        });
        // Now process the rest of the markdown
        processed = processed
            // Headers
            .replace(/^### (.*$)/gim, '<h3>$1</h3>')
            .replace(/^## (.*$)/gim, '<h2>$1</h2>')
            .replace(/^# (.*$)/gim, '<h1>$1</h1>')
            // Bold
            .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
            // Italic
            .replace(/\*(.*?)\*/g, '<em>$1</em>')
            // Links
            .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank">$1</a>')
            // Line breaks
            .replace(/\n\n/g, '</p><p>')
            .replace(/\n/g, '<br>')
            // Lists
            .replace(/^\* (.*)$/gim, '<li>$1</li>')
            .replace(/^- (.*)$/gim, '<li>$1</li>')
            // Wrap in paragraphs
            .replace(/^(?!<[h|p|pre|ul|ol|li])/gim, '<p>')
            .replace(/(?<![>])$/gim, '</p>');
        // Restore inline code
        inlineCodes.forEach((code, i) => {
            processed = processed.replace(`___INLINE_CODE_${i}___`, code);
        });
        // Restore code blocks
        codeBlocks.forEach((block, i) => {
            processed = processed.replace(`___CODE_BLOCK_${i}___`, block);
        });
        return processed;
    }
    escapeHtml(text) {
        const div = document.createElement('div');
        div.textContent = text;
        return div.innerHTML;
    }
    formatNumber(num) {
        if (num >= 1000000) {
            return (num / 1000000).toFixed(1) + 'M';
@@ -263,27 +275,45 @@ class AppDetailPage {
    setupEventListeners() {
        // Tab switching
        const tabs = document.querySelectorAll('.tab-btn');
        tabs.forEach(tab => {
            tab.addEventListener('click', () => {
-                // Update active tab button
+                // Update active tab
                tabs.forEach(t => t.classList.remove('active'));
                tab.classList.add('active');
                // Show corresponding content
                const tabName = tab.dataset.tab;
-
+                document.querySelectorAll('.tab-content').forEach(content => {
                // Hide all tab contents
                const allTabContents = document.querySelectorAll('.tab-content');
                allTabContents.forEach(content => {
                    content.classList.remove('active');
                });
                document.getElementById(`${tabName}-tab`).classList.add('active');
            });
        });
-                // Show the selected tab content
+        // Copy integration code
-                const targetTab = document.getElementById(`${tabName}-tab`);
+        document.getElementById('copy-integration').addEventListener('click', () => {
-                if (targetTab) {
+            const code = document.getElementById('integration-code').textContent;
-                    targetTab.classList.add('active');
+            navigator.clipboard.writeText(code).then(() => {
-                }
+                const btn = document.getElementById('copy-integration');
                const originalText = btn.innerHTML;
                btn.innerHTML = '<span>✓</span> Copied!';
                setTimeout(() => {
                    btn.innerHTML = originalText;
                }, 2000);
            });
        });
        // Copy code buttons
        document.querySelectorAll('.copy-btn').forEach(btn => {
            btn.addEventListener('click', (e) => {
                const codeBlock = e.target.closest('.code-block');
                const code = codeBlock.querySelector('code').textContent;
                navigator.clipboard.writeText(code).then(() => {
                    btn.textContent = 'Copied!';
                    setTimeout(() => {
                        btn.textContent = 'Copy';
                    }, 2000);
                });
            });
        });
    }
--- a/docs/md_v2/marketplace/backend/server.py
+++ b/docs/md_v2/marketplace/backend/server.py
@@ -471,17 +471,13 @@ async def delete_sponsor(sponsor_id: int):
 app.include_router(router)
 # Version info
 VERSION = "1.1.0"
 BUILD_DATE = "2025-10-26"
@app.get("/")
 async def root():
    """API info"""
    return {
        "name": "Crawl4AI Marketplace API",
-        "version": VERSION,
+        "version": "1.0.0",
        "build_date": BUILD_DATE,
        "endpoints": [
            "/marketplace/api/apps",
            "/marketplace/api/articles",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
    "rank-bm25~=0.2",
    "snowballstemmer~=2.2",
    "pydantic>=2.10",
-    "pyOpenSSL>=25.3.0",
+    "pyOpenSSL>=24.3.0",
    "psutil>=6.1.1",
    "PyYAML>=6.0",
    "nltk>=3.9.1",
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ rank-bm25~=0.2
 colorama~=0.4
 snowballstemmer~=2.2
 pydantic>=2.10
-pyOpenSSL>=25.3.0
+pyOpenSSL>=24.3.0
 psutil>=6.1.1
 PyYAML>=6.0
 nltk>=3.9.1
--- a/tests/test_cdp_concurrency_compact.py
+++ b/tests/test_cdp_concurrency_compact.py
@@ -1,283 +0,0 @@
 """
 Compact test suite for CDP concurrency fix.
 This file consolidates all tests related to the CDP concurrency fix for
 AsyncWebCrawler.arun_many() with managed browsers.
 The bug was that all concurrent tasks were fighting over one shared tab,
 causing failures. This has been fixed by modifying the get_page() method
 in browser_manager.py to always create new pages instead of reusing pages[0].
 """
 import asyncio
 import shutil
 import sys
 import tempfile
 from pathlib import Path
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig
 from crawl4ai.async_configs import BrowserConfig
 # =============================================================================
 # TEST 1: Basic arun_many functionality
 # =============================================================================
 async def test_basic_arun_many():
    """Test that arun_many works correctly with basic configuration."""
    print("=== TEST 1: Basic arun_many functionality ===")
    # Configuration to bypass cache for testing
    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    # Test URLs - using reliable test URLs
    test_urls = [
        "https://httpbin.org/html",  # Simple HTML page
        "https://httpbin.org/json",  # Simple JSON response
    ]
    async with AsyncWebCrawler() as crawler:
        print(f"Testing concurrent crawling of {len(test_urls)} URLs...")
        # This should work correctly
        result = await crawler.arun_many(urls=test_urls, config=config)
        # Simple verification - if we get here without exception, the basic functionality works
        print(f"✓ arun_many completed successfully")
        return True
 # =============================================================================
 # TEST 2: CDP Browser with Managed Configuration
 # =============================================================================
 async def test_arun_many_with_managed_cdp_browser():
    """Test that arun_many works correctly with managed CDP browsers."""
    print("\n=== TEST 2: arun_many with managed CDP browser ===")
    # Create a temporary user data directory for the CDP browser
    user_data_dir = tempfile.mkdtemp(prefix="crawl4ai-cdp-test-")
    try:
        # Configure browser to use managed CDP mode
        browser_config = BrowserConfig(
            use_managed_browser=True,
            browser_type="chromium",
            headless=True,
            user_data_dir=user_data_dir,
            verbose=True,
        )
        # Configuration to bypass cache for testing
        crawler_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            page_timeout=60000,
            wait_until="domcontentloaded",
        )
        # Test URLs - using reliable test URLs
        test_urls = [
            "https://httpbin.org/html",  # Simple HTML page
            "https://httpbin.org/json",  # Simple JSON response
        ]
        # Create crawler with CDP browser configuration
        async with AsyncWebCrawler(config=browser_config) as crawler:
            print(f"Testing concurrent crawling of {len(test_urls)} URLs...")
            # This should work correctly with our fix
            result = await crawler.arun_many(urls=test_urls, config=crawler_config)
            print(f"✓ arun_many completed successfully with managed CDP browser")
            return True
    except Exception as e:
        print(f"❌ Test failed with error: {str(e)}")
        raise
    finally:
        # Clean up temporary directory
        try:
            shutil.rmtree(user_data_dir, ignore_errors=True)
        except:
            pass
 # =============================================================================
 # TEST 3: Concurrency Verification
 # =============================================================================
 async def test_concurrent_crawling():
    """Test concurrent crawling to verify the fix works."""
    print("\n=== TEST 3: Concurrent crawling verification ===")
    # Configuration to bypass cache for testing
    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    # Test URLs - using reliable test URLs
    test_urls = [
        "https://httpbin.org/html",  # Simple HTML page
        "https://httpbin.org/json",  # Simple JSON response
        "https://httpbin.org/uuid",  # Simple UUID response
        "https://example.com/",  # Standard example page
    ]
    async with AsyncWebCrawler() as crawler:
        print(f"Testing concurrent crawling of {len(test_urls)} URLs...")
        # This should work correctly with our fix
        results = await crawler.arun_many(urls=test_urls, config=config)
        # Simple verification - if we get here without exception, the fix works
        print("✓ arun_many completed successfully with concurrent crawling")
        return True
 # =============================================================================
 # TEST 4: Concurrency Fix Demonstration
 # =============================================================================
 async def test_concurrency_fix():
    """Demonstrate that the concurrency fix works."""
    print("\n=== TEST 4: Concurrency fix demonstration ===")
    # Configuration to bypass cache for testing
    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    # Test URLs - using reliable test URLs
    test_urls = [
        "https://httpbin.org/html",  # Simple HTML page
        "https://httpbin.org/json",  # Simple JSON response
        "https://httpbin.org/uuid",  # Simple UUID response
    ]
    async with AsyncWebCrawler() as crawler:
        print(f"Testing concurrent crawling of {len(test_urls)} URLs...")
        # This should work correctly with our fix
        results = await crawler.arun_many(urls=test_urls, config=config)
        # Simple verification - if we get here without exception, the fix works
        print("✓ arun_many completed successfully with concurrent crawling")
        return True
 # =============================================================================
 # TEST 5: Before/After Behavior Comparison
 # =============================================================================
 async def test_before_after_behavior():
    """Test that demonstrates concurrent crawling works correctly after the fix."""
    print("\n=== TEST 5: Before/After behavior test ===")
    # Configuration to bypass cache for testing
    config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    # Test URLs - using reliable test URLs that would stress the concurrency system
    test_urls = [
        "https://httpbin.org/delay/1",  # Delayed response to increase chance of contention
        "https://httpbin.org/delay/2",  # Delayed response to increase chance of contention
        "https://httpbin.org/uuid",  # Fast response
        "https://httpbin.org/json",  # Fast response
    ]
    async with AsyncWebCrawler() as crawler:
        print(
            f"Testing concurrent crawling of {len(test_urls)} URLs (including delayed responses)..."
        )
        print(
            "This test would have failed before the concurrency fix due to page contention."
        )
        # This should work correctly with our fix
        results = await crawler.arun_many(urls=test_urls, config=config)
        # Simple verification - if we get here without exception, the fix works
        print("✓ arun_many completed successfully with concurrent crawling")
        print("✓ No page contention issues detected")
        return True
 # =============================================================================
 # TEST 6: Reference Pattern Test
 # =============================================================================
 async def test_reference_pattern():
    """Main test function following reference pattern."""
    print("\n=== TEST 6: Reference pattern test ===")
    # Configure crawler settings
    crawler_cfg = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        page_timeout=60000,
        wait_until="domcontentloaded",
    )
    # Define URLs to crawl
    URLS = [
        "https://httpbin.org/html",
        "https://httpbin.org/json",
        "https://httpbin.org/uuid",
    ]
    # Crawl all URLs using arun_many
    async with AsyncWebCrawler() as crawler:
        print(f"Testing concurrent crawling of {len(URLS)} URLs...")
        results = await crawler.arun_many(urls=URLS, config=crawler_cfg)
        # Simple verification - if we get here without exception, the fix works
        print("✓ arun_many completed successfully with concurrent crawling")
        print("✅ Reference pattern test completed successfully!")
 # =============================================================================
 # MAIN EXECUTION
 # =============================================================================
 async def main():
    """Run all tests."""
    print("Running compact CDP concurrency test suite...")
    print("=" * 60)
    tests = [
        test_basic_arun_many,
        test_arun_many_with_managed_cdp_browser,
        test_concurrent_crawling,
        test_concurrency_fix,
        test_before_after_behavior,
        test_reference_pattern,
    ]
    passed = 0
    failed = 0
    for test_func in tests:
        try:
            await test_func()
            passed += 1
        except Exception as e:
            print(f"❌ Test failed: {str(e)}")
            failed += 1
    print("\n" + "=" * 60)
    print(f"Test Results: {passed} passed, {failed} failed")
    if failed == 0:
        print("🎉 All tests passed! The CDP concurrency fix is working correctly.")
        return True
    else:
        print(f"❌ {failed} test(s) failed!")
        return False
 if __name__ == "__main__":
    success = asyncio.run(main())
    sys.exit(0 if success else 1)
--- a/tests/test_pyopenssl_security_fix.py
+++ b/tests/test_pyopenssl_security_fix.py
@@ -1,168 +0,0 @@
 """
 Lightweight test to verify pyOpenSSL security fix (Issue #1545).
 This test verifies the security requirements are met:
 1. pyOpenSSL >= 25.3.0 is installed
 2. cryptography >= 45.0.7 is installed (above vulnerable range)
 3. SSL/TLS functionality works correctly
 This test can run without full crawl4ai dependencies installed.
 """
 import sys
 from packaging import version
 def test_package_versions():
    """Test that package versions meet security requirements."""
    print("=" * 70)
    print("TEST: Package Version Security Requirements (Issue #1545)")
    print("=" * 70)
    all_passed = True
    # Test pyOpenSSL version
    try:
        import OpenSSL
        pyopenssl_version = OpenSSL.__version__
        print(f"\n✓ pyOpenSSL is installed: {pyopenssl_version}")
        if version.parse(pyopenssl_version) >= version.parse("25.3.0"):
            print(f"  ✓ PASS: pyOpenSSL {pyopenssl_version} >= 25.3.0 (required)")
        else:
            print(f"  ✗ FAIL: pyOpenSSL {pyopenssl_version} < 25.3.0 (required)")
            all_passed = False
    except ImportError as e:
        print(f"\n✗ FAIL: pyOpenSSL not installed - {e}")
        all_passed = False
    # Test cryptography version
    try:
        import cryptography
        crypto_version = cryptography.__version__
        print(f"\n✓ cryptography is installed: {crypto_version}")
        # The vulnerable range is >=37.0.0 & <43.0.1
        # We need >= 45.0.7 to be safe
        if version.parse(crypto_version) >= version.parse("45.0.7"):
            print(f"  ✓ PASS: cryptography {crypto_version} >= 45.0.7 (secure)")
            print(f"  ✓ NOT in vulnerable range (37.0.0 to 43.0.0)")
        elif version.parse(crypto_version) >= version.parse("37.0.0") and version.parse(crypto_version) < version.parse("43.0.1"):
            print(f"  ✗ FAIL: cryptography {crypto_version} is VULNERABLE")
            print(f"  ✗ Version is in vulnerable range (>=37.0.0 & <43.0.1)")
            all_passed = False
        else:
            print(f"  ⚠ WARNING: cryptography {crypto_version} < 45.0.7")
            print(f"  ⚠ May not meet security requirements")
    except ImportError as e:
        print(f"\n✗ FAIL: cryptography not installed - {e}")
        all_passed = False
    return all_passed
 def test_ssl_basic_functionality():
    """Test that SSL/TLS basic functionality works."""
    print("\n" + "=" * 70)
    print("TEST: SSL/TLS Basic Functionality")
    print("=" * 70)
    try:
        import OpenSSL.SSL
        # Create a basic SSL context to verify functionality
        context = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_2_METHOD)
        print("\n✓ SSL Context created successfully")
        print("  ✓ PASS: SSL/TLS functionality is working")
        return True
    except Exception as e:
        print(f"\n✗ FAIL: SSL functionality test failed - {e}")
        return False
 def test_pyopenssl_crypto_integration():
    """Test that pyOpenSSL and cryptography integration works."""
    print("\n" + "=" * 70)
    print("TEST: pyOpenSSL <-> cryptography Integration")
    print("=" * 70)
    try:
        from OpenSSL import crypto
        # Generate a simple key pair to test integration
        key = crypto.PKey()
        key.generate_key(crypto.TYPE_RSA, 2048)
        print("\n✓ Generated RSA key pair successfully")
        print("  ✓ PASS: pyOpenSSL and cryptography are properly integrated")
        return True
    except Exception as e:
        print(f"\n✗ FAIL: Integration test failed - {e}")
        import traceback
        traceback.print_exc()
        return False
 def main():
    """Run all security tests."""
    print("\n")
    print("╔" + "=" * 68 + "╗")
    print("║  pyOpenSSL Security Fix Verification - Issue #1545               ║")
    print("╚" + "=" * 68 + "╝")
    print("\nVerifying that the pyOpenSSL update resolves the security vulnerability")
    print("in the cryptography package (CVE: versions >=37.0.0 & <43.0.1)\n")
    results = []
    # Test 1: Package versions
    results.append(("Package Versions", test_package_versions()))
    # Test 2: SSL functionality
    results.append(("SSL Functionality", test_ssl_basic_functionality()))
    # Test 3: Integration
    results.append(("pyOpenSSL-crypto Integration", test_pyopenssl_crypto_integration()))
    # Summary
    print("\n" + "=" * 70)
    print("TEST SUMMARY")
    print("=" * 70)
    all_passed = True
    for test_name, passed in results:
        status = "✓ PASS" if passed else "✗ FAIL"
        print(f"{status}: {test_name}")
        all_passed = all_passed and passed
    print("=" * 70)
    if all_passed:
        print("\n✓✓✓ ALL TESTS PASSED ✓✓✓")
        print("✓ Security vulnerability is resolved")
        print("✓ pyOpenSSL >= 25.3.0 is working correctly")
        print("✓ cryptography >= 45.0.7 (not vulnerable)")
        print("\nThe dependency update is safe to merge.\n")
        return True
    else:
        print("\n✗✗✗ SOME TESTS FAILED ✗✗✗")
        print("✗ Security requirements not met")
        print("\nDo NOT merge until all tests pass.\n")
        return False
 if __name__ == "__main__":
    try:
        success = main()
        sys.exit(0 if success else 1)
    except KeyboardInterrupt:
        print("\n\nTest interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n✗ Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
--- a/tests/test_pyopenssl_update.py
+++ b/tests/test_pyopenssl_update.py
@@ -1,184 +0,0 @@
 """
 Test script to verify pyOpenSSL update doesn't break crawl4ai functionality.
 This test verifies:
 1. pyOpenSSL and cryptography versions are correct and secure
 2. Basic crawling functionality still works
 3. HTTPS/SSL connections work properly
 4. Stealth mode integration works (uses playwright-stealth internally)
 Issue: #1545 - Security vulnerability in cryptography package
 Fix: Updated pyOpenSSL from >=24.3.0 to >=25.3.0
 Expected: cryptography package should be >=45.0.7 (above vulnerable range)
 """
 import asyncio
 import sys
 from packaging import version
 def check_versions():
    """Verify pyOpenSSL and cryptography versions meet security requirements."""
    print("=" * 60)
    print("STEP 1: Checking Package Versions")
    print("=" * 60)
    try:
        import OpenSSL
        pyopenssl_version = OpenSSL.__version__
        print(f"✓ pyOpenSSL version: {pyopenssl_version}")
        # Check pyOpenSSL >= 25.3.0
        if version.parse(pyopenssl_version) >= version.parse("25.3.0"):
            print(f"  ✓ Version check passed: {pyopenssl_version} >= 25.3.0")
        else:
            print(f"  ✗ Version check FAILED: {pyopenssl_version} < 25.3.0")
            return False
    except ImportError as e:
        print(f"✗ Failed to import pyOpenSSL: {e}")
        return False
    try:
        import cryptography
        crypto_version = cryptography.__version__
        print(f"✓ cryptography version: {crypto_version}")
        # Check cryptography >= 45.0.7 (above vulnerable range)
        if version.parse(crypto_version) >= version.parse("45.0.7"):
            print(f"  ✓ Security check passed: {crypto_version} >= 45.0.7 (not vulnerable)")
        else:
            print(f"  ✗ Security check FAILED: {crypto_version} < 45.0.7 (potentially vulnerable)")
            return False
    except ImportError as e:
        print(f"✗ Failed to import cryptography: {e}")
        return False
    print("\n✓ All version checks passed!\n")
    return True
 async def test_basic_crawl():
    """Test basic crawling functionality with HTTPS site."""
    print("=" * 60)
    print("STEP 2: Testing Basic HTTPS Crawling")
    print("=" * 60)
    try:
        from crawl4ai import AsyncWebCrawler
        async with AsyncWebCrawler(verbose=True) as crawler:
            # Test with a simple HTTPS site (requires SSL/TLS)
            print("Crawling example.com (HTTPS)...")
            result = await crawler.arun(
                url="https://www.example.com",
                bypass_cache=True
            )
            if result.success:
                print(f"✓ Crawl successful!")
                print(f"  - Status code: {result.status_code}")
                print(f"  - Content length: {len(result.html)} bytes")
                print(f"  - SSL/TLS connection: ✓ Working")
                return True
            else:
                print(f"✗ Crawl failed: {result.error_message}")
                return False
    except Exception as e:
        print(f"✗ Test failed with error: {e}")
        import traceback
        traceback.print_exc()
        return False
 async def test_stealth_mode():
    """Test stealth mode functionality (depends on playwright-stealth)."""
    print("\n" + "=" * 60)
    print("STEP 3: Testing Stealth Mode Integration")
    print("=" * 60)
    try:
        from crawl4ai import AsyncWebCrawler, BrowserConfig
        # Create browser config with stealth mode
        browser_config = BrowserConfig(
            headless=True,
            verbose=False
        )
        async with AsyncWebCrawler(config=browser_config, verbose=True) as crawler:
            print("Crawling with stealth mode enabled...")
            result = await crawler.arun(
                url="https://www.example.com",
                bypass_cache=True
            )
            if result.success:
                print(f"✓ Stealth crawl successful!")
                print(f"  - Stealth mode: ✓ Working")
                return True
            else:
                print(f"✗ Stealth crawl failed: {result.error_message}")
                return False
    except Exception as e:
        print(f"✗ Stealth test failed with error: {e}")
        import traceback
        traceback.print_exc()
        return False
 async def main():
    """Run all tests."""
    print("\n")
    print("╔" + "=" * 58 + "╗")
    print("║  pyOpenSSL Security Update Verification Test (Issue #1545) ║")
    print("╚" + "=" * 58 + "╝")
    print("\n")
    # Step 1: Check versions
    versions_ok = check_versions()
    if not versions_ok:
        print("\n✗ FAILED: Version requirements not met")
        return False
    # Step 2: Test basic crawling
    crawl_ok = await test_basic_crawl()
    if not crawl_ok:
        print("\n✗ FAILED: Basic crawling test failed")
        return False
    # Step 3: Test stealth mode
    stealth_ok = await test_stealth_mode()
    if not stealth_ok:
        print("\n✗ FAILED: Stealth mode test failed")
        return False
    # All tests passed
    print("\n" + "=" * 60)
    print("FINAL RESULT")
    print("=" * 60)
    print("✓ All tests passed successfully!")
    print("✓ pyOpenSSL update is working correctly")
    print("✓ No breaking changes detected")
    print("✓ Security vulnerability resolved")
    print("=" * 60)
    print("\n")
    return True
 if __name__ == "__main__":
    try:
        success = asyncio.run(main())
        sys.exit(0 if success else 1)
    except KeyboardInterrupt:
        print("\n\nTest interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n✗ Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)